@thotischner/observability-mcp 1.7.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/products.yaml.example +48 -0
- package/dist/analysis/history.d.ts +70 -0
- package/dist/analysis/history.js +170 -0
- package/dist/analysis/history.test.d.ts +1 -0
- package/dist/analysis/history.test.js +141 -0
- package/dist/audit/log.d.ts +108 -0
- package/dist/audit/log.js +200 -0
- package/dist/audit/log.test.d.ts +1 -0
- package/dist/audit/log.test.js +147 -0
- package/dist/audit/middleware.d.ts +20 -0
- package/dist/audit/middleware.js +50 -0
- package/dist/audit/redaction-bypass.d.ts +67 -0
- package/dist/audit/redaction-bypass.js +64 -0
- package/dist/audit/redaction-bypass.test.d.ts +1 -0
- package/dist/audit/redaction-bypass.test.js +72 -0
- package/dist/audit/sinks/types.d.ts +18 -0
- package/dist/audit/sinks/types.js +1 -0
- package/dist/audit/sinks/webhook.d.ts +45 -0
- package/dist/audit/sinks/webhook.js +111 -0
- package/dist/audit/sinks/webhook.test.d.ts +1 -0
- package/dist/audit/sinks/webhook.test.js +162 -0
- package/dist/auth/credentials.d.ts +29 -0
- package/dist/auth/credentials.js +53 -1
- package/dist/auth/credentials.test.js +46 -1
- package/dist/auth/csrf.d.ts +26 -0
- package/dist/auth/csrf.js +128 -0
- package/dist/auth/csrf.test.d.ts +1 -0
- package/dist/auth/csrf.test.js +143 -0
- package/dist/auth/local-users.d.ts +68 -0
- package/dist/auth/local-users.js +154 -0
- package/dist/auth/local-users.test.d.ts +1 -0
- package/dist/auth/local-users.test.js +121 -0
- package/dist/auth/middleware.d.ts +49 -0
- package/dist/auth/middleware.js +65 -0
- package/dist/auth/middleware.test.d.ts +1 -0
- package/dist/auth/middleware.test.js +90 -0
- package/dist/auth/oidc/client.d.ts +73 -0
- package/dist/auth/oidc/client.js +104 -0
- package/dist/auth/oidc/client.test.d.ts +1 -0
- package/dist/auth/oidc/client.test.js +121 -0
- package/dist/auth/oidc/dcr.d.ts +70 -0
- package/dist/auth/oidc/dcr.js +160 -0
- package/dist/auth/oidc/dcr.test.d.ts +1 -0
- package/dist/auth/oidc/dcr.test.js +109 -0
- package/dist/auth/oidc/discovery.d.ts +38 -0
- package/dist/auth/oidc/discovery.js +48 -0
- package/dist/auth/oidc/discovery.test.d.ts +1 -0
- package/dist/auth/oidc/discovery.test.js +68 -0
- package/dist/auth/oidc/endpoints.d.ts +20 -0
- package/dist/auth/oidc/endpoints.js +168 -0
- package/dist/auth/oidc/endpoints.test.d.ts +7 -0
- package/dist/auth/oidc/endpoints.test.js +304 -0
- package/dist/auth/oidc/flow-cookie.d.ts +57 -0
- package/dist/auth/oidc/flow-cookie.js +142 -0
- package/dist/auth/oidc/flow-cookie.test.d.ts +1 -0
- package/dist/auth/oidc/flow-cookie.test.js +0 -0
- package/dist/auth/oidc/index.d.ts +7 -0
- package/dist/auth/oidc/index.js +6 -0
- package/dist/auth/oidc/jwks.d.ts +36 -0
- package/dist/auth/oidc/jwks.js +69 -0
- package/dist/auth/oidc/jwks.test.d.ts +1 -0
- package/dist/auth/oidc/jwks.test.js +65 -0
- package/dist/auth/oidc/jwt.d.ts +62 -0
- package/dist/auth/oidc/jwt.js +113 -0
- package/dist/auth/oidc/jwt.test.d.ts +1 -0
- package/dist/auth/oidc/jwt.test.js +141 -0
- package/dist/auth/oidc/pkce.d.ts +19 -0
- package/dist/auth/oidc/pkce.js +43 -0
- package/dist/auth/oidc/pkce.test.d.ts +1 -0
- package/dist/auth/oidc/pkce.test.js +55 -0
- package/dist/auth/oidc/profiles.d.ts +22 -0
- package/dist/auth/oidc/profiles.js +95 -0
- package/dist/auth/oidc/profiles.test.d.ts +1 -0
- package/dist/auth/oidc/profiles.test.js +51 -0
- package/dist/auth/oidc/runtime.d.ts +66 -0
- package/dist/auth/oidc/runtime.js +142 -0
- package/dist/auth/oidc/runtime.test.d.ts +1 -0
- package/dist/auth/oidc/runtime.test.js +181 -0
- package/dist/auth/policy/batch-dry-run.d.ts +56 -0
- package/dist/auth/policy/batch-dry-run.js +129 -0
- package/dist/auth/policy/batch-dry-run.test.d.ts +1 -0
- package/dist/auth/policy/batch-dry-run.test.js +140 -0
- package/dist/auth/policy/engine.d.ts +64 -0
- package/dist/auth/policy/engine.js +87 -0
- package/dist/auth/policy/engine.test.d.ts +1 -0
- package/dist/auth/policy/engine.test.js +98 -0
- package/dist/auth/policy/loader.d.ts +45 -0
- package/dist/auth/policy/loader.js +137 -0
- package/dist/auth/policy/loader.test.d.ts +1 -0
- package/dist/auth/policy/loader.test.js +86 -0
- package/dist/auth/policy/opa.d.ts +69 -0
- package/dist/auth/policy/opa.js +173 -0
- package/dist/auth/policy/opa.test.d.ts +1 -0
- package/dist/auth/policy/opa.test.js +206 -0
- package/dist/auth/rbac.d.ts +62 -0
- package/dist/auth/rbac.js +162 -0
- package/dist/auth/rbac.test.d.ts +1 -0
- package/dist/auth/rbac.test.js +183 -0
- package/dist/auth/session.d.ts +66 -0
- package/dist/auth/session.js +146 -0
- package/dist/auth/session.test.d.ts +1 -0
- package/dist/auth/session.test.js +90 -0
- package/dist/catalog/loader.d.ts +67 -0
- package/dist/catalog/loader.js +122 -0
- package/dist/catalog/loader.test.d.ts +1 -0
- package/dist/catalog/loader.test.js +108 -0
- package/dist/cli/index.js +3 -0
- package/dist/cli/inspector-config.d.ts +9 -0
- package/dist/cli/inspector-config.js +28 -0
- package/dist/cli/inspector-config.test.d.ts +1 -0
- package/dist/cli/inspector-config.test.js +33 -0
- package/dist/cli/lib.d.ts +1 -1
- package/dist/cli/lib.js +1 -0
- package/dist/conformance/mcp-2025-11-25.test.d.ts +1 -0
- package/dist/conformance/mcp-2025-11-25.test.js +206 -0
- package/dist/connectors/interface.d.ts +5 -1
- package/dist/connectors/loader.js +6 -4
- package/dist/connectors/loader.test.d.ts +1 -0
- package/dist/connectors/loader.test.js +78 -0
- package/dist/connectors/prometheus.test.js +31 -13
- package/dist/connectors/registry.d.ts +13 -0
- package/dist/connectors/registry.js +30 -0
- package/dist/connectors/registry.test.js +56 -2
- package/dist/context.d.ts +45 -1
- package/dist/context.js +40 -1
- package/dist/context.test.d.ts +1 -0
- package/dist/context.test.js +58 -0
- package/dist/federation/registry.d.ts +32 -0
- package/dist/federation/registry.js +77 -0
- package/dist/federation/registry.test.d.ts +1 -0
- package/dist/federation/registry.test.js +130 -0
- package/dist/federation/upstream.d.ts +60 -0
- package/dist/federation/upstream.js +114 -0
- package/dist/index.js +2124 -73
- package/dist/middleware/ssrfGuard.d.ts +15 -0
- package/dist/middleware/ssrfGuard.js +103 -0
- package/dist/middleware/ssrfGuard.test.d.ts +1 -0
- package/dist/middleware/ssrfGuard.test.js +81 -0
- package/dist/net/egress-policy.js +2 -0
- package/dist/observability/otel.d.ts +20 -0
- package/dist/observability/otel.js +118 -0
- package/dist/observability/otel.test.d.ts +1 -0
- package/dist/observability/otel.test.js +56 -0
- package/dist/openapi.js +654 -6
- package/dist/openapi.test.d.ts +1 -0
- package/dist/openapi.test.js +98 -0
- package/dist/policy/redact.d.ts +44 -0
- package/dist/policy/redact.js +144 -0
- package/dist/policy/redact.test.d.ts +1 -0
- package/dist/policy/redact.test.js +172 -0
- package/dist/postmortem/synthesizer.d.ts +83 -0
- package/dist/postmortem/synthesizer.js +205 -0
- package/dist/postmortem/synthesizer.test.d.ts +1 -0
- package/dist/postmortem/synthesizer.test.js +141 -0
- package/dist/products/loader.d.ts +112 -0
- package/dist/products/loader.js +289 -0
- package/dist/products/loader.test.d.ts +1 -0
- package/dist/products/loader.test.js +257 -0
- package/dist/quota/charge.d.ts +28 -0
- package/dist/quota/charge.js +30 -0
- package/dist/quota/charge.test.d.ts +1 -0
- package/dist/quota/charge.test.js +83 -0
- package/dist/quota/limiter.d.ts +97 -0
- package/dist/quota/limiter.js +161 -0
- package/dist/quota/limiter.test.d.ts +1 -0
- package/dist/quota/limiter.test.js +205 -0
- package/dist/quota/token-budget.d.ts +119 -0
- package/dist/quota/token-budget.js +297 -0
- package/dist/quota/token-budget.test.d.ts +1 -0
- package/dist/quota/token-budget.test.js +215 -0
- package/dist/scim/group-role-map.d.ts +4 -0
- package/dist/scim/group-role-map.js +33 -0
- package/dist/scim/group-role-map.test.d.ts +1 -0
- package/dist/scim/group-role-map.test.js +33 -0
- package/dist/scim/routes.d.ts +15 -0
- package/dist/scim/routes.js +249 -0
- package/dist/scim/store.d.ts +37 -0
- package/dist/scim/store.js +178 -0
- package/dist/scim/store.test.d.ts +1 -0
- package/dist/scim/store.test.js +121 -0
- package/dist/scim/types.d.ts +73 -0
- package/dist/scim/types.js +29 -0
- package/dist/sdk/hooks.d.ts +77 -0
- package/dist/sdk/hooks.js +72 -0
- package/dist/sdk/hooks.test.d.ts +1 -0
- package/dist/sdk/hooks.test.js +159 -0
- package/dist/sdk/index.d.ts +2 -0
- package/dist/sdk/index.js +1 -0
- package/dist/sdk/manifest-schema.d.ts +17 -0
- package/dist/sdk/manifest-schema.js +21 -0
- package/dist/tenancy/context.d.ts +45 -0
- package/dist/tenancy/context.js +97 -0
- package/dist/tenancy/context.test.d.ts +1 -0
- package/dist/tenancy/context.test.js +72 -0
- package/dist/tenancy/migration.test.d.ts +7 -0
- package/dist/tenancy/migration.test.js +75 -0
- package/dist/tools/context-seam.test.js +6 -1
- package/dist/tools/detect-anomalies.d.ts +1 -1
- package/dist/tools/detect-anomalies.js +5 -4
- package/dist/tools/generate-postmortem.d.ts +35 -0
- package/dist/tools/generate-postmortem.js +191 -0
- package/dist/tools/get-anomaly-history.d.ts +35 -0
- package/dist/tools/get-anomaly-history.js +126 -0
- package/dist/tools/get-service-health.d.ts +1 -1
- package/dist/tools/get-service-health.js +4 -3
- package/dist/tools/list-services.d.ts +1 -1
- package/dist/tools/list-services.js +3 -2
- package/dist/tools/list-sources.d.ts +1 -1
- package/dist/tools/list-sources.js +6 -2
- package/dist/tools/query-logs.d.ts +1 -1
- package/dist/tools/query-logs.js +2 -2
- package/dist/tools/query-metrics.d.ts +1 -1
- package/dist/tools/query-metrics.js +19 -6
- package/dist/tools/query-traces.d.ts +47 -0
- package/dist/tools/query-traces.js +145 -0
- package/dist/tools/query-traces.test.d.ts +1 -0
- package/dist/tools/query-traces.test.js +110 -0
- package/dist/tools/registry-names.d.ts +35 -0
- package/dist/tools/registry-names.js +54 -0
- package/dist/tools/registry-names.test.d.ts +1 -0
- package/dist/tools/registry-names.test.js +61 -0
- package/dist/tools/topology.d.ts +3 -3
- package/dist/tools/topology.js +10 -6
- package/dist/topology/merge.d.ts +22 -0
- package/dist/topology/merge.js +178 -0
- package/dist/topology/merge.test.d.ts +1 -0
- package/dist/topology/merge.test.js +110 -0
- package/dist/transport/sessionStore.d.ts +66 -0
- package/dist/transport/sessionStore.js +138 -0
- package/dist/transport/sessionStore.test.d.ts +1 -0
- package/dist/transport/sessionStore.test.js +118 -0
- package/dist/transport/websocket.d.ts +35 -0
- package/dist/transport/websocket.js +133 -0
- package/dist/transport/websocket.test.d.ts +1 -0
- package/dist/transport/websocket.test.js +124 -0
- package/dist/types.d.ts +51 -0
- package/dist/ui/index.html +3083 -88
- package/package.json +32 -5
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { test } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { applyBudgetDecision } from "./charge.js";
|
|
4
|
+
function decision(over) {
|
|
5
|
+
return {
|
|
6
|
+
allowed: false,
|
|
7
|
+
used: 0,
|
|
8
|
+
limit: 1000,
|
|
9
|
+
retryAfterSeconds: 3600,
|
|
10
|
+
freedAtRetry: 100,
|
|
11
|
+
...over,
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
const sampleResult = () => ({ content: [{ text: "original tool output" }] });
|
|
15
|
+
test("applyBudgetDecision — passes the result through when allowed", () => {
|
|
16
|
+
const r = sampleResult();
|
|
17
|
+
const out = applyBudgetDecision(r, decision({ allowed: true }), 50, "query_logs");
|
|
18
|
+
assert.equal(out, r, "exact passthrough when allowed");
|
|
19
|
+
});
|
|
20
|
+
test("applyBudgetDecision — passes through when uncapped (limit === 0)", () => {
|
|
21
|
+
const r = sampleResult();
|
|
22
|
+
const out = applyBudgetDecision(r, decision({ allowed: false, limit: 0 }), 50_000, "query_logs");
|
|
23
|
+
assert.equal(out.content[0].text, "original tool output");
|
|
24
|
+
});
|
|
25
|
+
test("applyBudgetDecision — cumulative exceed emits OMCP_TOKEN_BUDGET_EXCEEDED", () => {
|
|
26
|
+
// Tokens fit a single request (<= limit) but cumulative pushes over.
|
|
27
|
+
const r = sampleResult();
|
|
28
|
+
const out = applyBudgetDecision(r, decision({ used: 950, limit: 1000, retryAfterSeconds: 7200, freedAtRetry: 200 }), 100, "query_logs");
|
|
29
|
+
const body = JSON.parse(out.content[0].text);
|
|
30
|
+
assert.equal(body.error, "OMCP_TOKEN_BUDGET_EXCEEDED");
|
|
31
|
+
assert.equal(body.tool, "query_logs");
|
|
32
|
+
assert.equal(body.used, 950);
|
|
33
|
+
assert.equal(body.limit, 1000);
|
|
34
|
+
assert.equal(body.requested, 100);
|
|
35
|
+
assert.equal(body.retryAfterSeconds, 7200);
|
|
36
|
+
assert.equal(body.freedAtRetry, 200);
|
|
37
|
+
assert.match(body.message, /Daily token budget exceeded/);
|
|
38
|
+
assert.match(body.message, /Try again in ~2h/);
|
|
39
|
+
});
|
|
40
|
+
test("applyBudgetDecision — single request > limit emits the DISTINCT OMCP_TOKEN_REQUEST_EXCEEDS_BUDGET", () => {
|
|
41
|
+
// The whole point of the distinct code: an agent that sees this
|
|
42
|
+
// must NOT retry — waiting can never fit a request larger than the
|
|
43
|
+
// entire daily cap. retryAfterSeconds is forced to 0 so naive
|
|
44
|
+
// backoff loops terminate.
|
|
45
|
+
const r = sampleResult();
|
|
46
|
+
const out = applyBudgetDecision(r, decision({ used: 0, limit: 1000, retryAfterSeconds: 3600, freedAtRetry: 0 }), 5000, // request > limit
|
|
47
|
+
"query_metrics");
|
|
48
|
+
const body = JSON.parse(out.content[0].text);
|
|
49
|
+
assert.equal(body.error, "OMCP_TOKEN_REQUEST_EXCEEDS_BUDGET");
|
|
50
|
+
assert.equal(body.tool, "query_metrics");
|
|
51
|
+
assert.equal(body.requested, 5000);
|
|
52
|
+
assert.equal(body.limit, 1000);
|
|
53
|
+
assert.equal(body.retryAfterSeconds, 0, "retry-loop killer: 0 instead of inherited 3600");
|
|
54
|
+
assert.match(body.message, /larger than the entire daily budget/);
|
|
55
|
+
assert.match(body.message, /Retrying won't help/);
|
|
56
|
+
});
|
|
57
|
+
test("applyBudgetDecision — boundary: request == limit is NOT the request-exceeds-cap code", () => {
|
|
58
|
+
// A request exactly equal to the cap can theoretically succeed on
|
|
59
|
+
// an empty bucket — it's the cumulative-exceeded path, not the
|
|
60
|
+
// unconditional-deny path.
|
|
61
|
+
const r = sampleResult();
|
|
62
|
+
const out = applyBudgetDecision(r, decision({ used: 100, limit: 1000 }), 1000, "query_logs");
|
|
63
|
+
const body = JSON.parse(out.content[0].text);
|
|
64
|
+
assert.equal(body.error, "OMCP_TOKEN_BUDGET_EXCEEDED");
|
|
65
|
+
});
|
|
66
|
+
test("applyBudgetDecision — preserves additional content entries past the first", () => {
|
|
67
|
+
const r = {
|
|
68
|
+
content: [
|
|
69
|
+
{ text: "first", extraField: 42 },
|
|
70
|
+
{ text: "second" },
|
|
71
|
+
{ text: "third" },
|
|
72
|
+
],
|
|
73
|
+
};
|
|
74
|
+
const out = applyBudgetDecision(r, decision({}), 10, "t");
|
|
75
|
+
assert.equal(out.content.length, 3);
|
|
76
|
+
// First entry's text replaced; its other fields (extraField) preserved.
|
|
77
|
+
const body = JSON.parse(out.content[0].text);
|
|
78
|
+
assert.equal(body.error, "OMCP_TOKEN_BUDGET_EXCEEDED");
|
|
79
|
+
assert.equal(out.content[0].extraField, 42);
|
|
80
|
+
// Trailing entries pass through verbatim.
|
|
81
|
+
assert.equal(out.content[1].text, "second");
|
|
82
|
+
assert.equal(out.content[2].text, "third");
|
|
83
|
+
});
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-identity sliding-window rate limiter for the MCP tool surface.
|
|
3
|
+
*
|
|
4
|
+
* The bearer-token credential resolved by `auth/credentials.ts`
|
|
5
|
+
* (`OMCP_API_KEYS`) names every distinct caller; this limiter caps how
|
|
6
|
+
* many MCP tool calls each named caller can make per minute. Anonymous
|
|
7
|
+
* MCP traffic (when no `OMCP_API_KEYS` is set) bypasses the per-identity
|
|
8
|
+
* cap — the existing express-rate-limit IP gate at the /mcp transport
|
|
9
|
+
* still applies.
|
|
10
|
+
*
|
|
11
|
+
* The window is sliding: each call records its timestamp under the
|
|
12
|
+
* identity's key, and `check()` first prunes entries older than the
|
|
13
|
+
* configured window before counting. Memory bound is O(callers × N)
|
|
14
|
+
* where N is the per-window cap — a few KB even for a busy deployment.
|
|
15
|
+
*
|
|
16
|
+
* Persistence is out of scope here. A future revision can plug a
|
|
17
|
+
* Redis-backed store via the same interface.
|
|
18
|
+
*/
|
|
19
|
+
/** Resolve `OMCP_TOOL_RATE_PER_MIN` (or any equivalent caller-supplied
|
|
20
|
+
* string) into the per-identity cap used by the limiter and reported
|
|
21
|
+
* by `/api/info` + `/api/usage`. Single source of truth, so the three
|
|
22
|
+
* call sites don't drift.
|
|
23
|
+
*
|
|
24
|
+
* Behaviour:
|
|
25
|
+
* - unset / empty / non-numeric → DEFAULT_LIMIT_PER_MIN (60)
|
|
26
|
+
* - `"0"` → DEFAULT_LIMIT_PER_MIN (limit=0 would deny every request,
|
|
27
|
+
* which is almost never what an operator setting "0" wants — they
|
|
28
|
+
* either mean "default" or "disable"; this function maps it to the
|
|
29
|
+
* default so they aren't accidentally locked out, and the explicit
|
|
30
|
+
* disable path is one of the UNLIMITED_TOKENS instead)
|
|
31
|
+
* - `"off"` / `"none"` / `"unlimited"` / `"disabled"` / `"false"`
|
|
32
|
+
* (case-insensitive) → Number.POSITIVE_INFINITY, which the
|
|
33
|
+
* `count >= limit` comparison in check() always allows. JSON
|
|
34
|
+
* serialisation renders Infinity as `null`; consumers can treat
|
|
35
|
+
* a null limit as "uncapped".
|
|
36
|
+
* - negative → DEFAULT_LIMIT_PER_MIN (limit=-1 with the current
|
|
37
|
+
* `count >= limit` check would also deny every request)
|
|
38
|
+
* - any positive integer ≥ 1 → that value
|
|
39
|
+
*/
|
|
40
|
+
export declare function resolveToolRatePerMin(raw: string | undefined): number;
|
|
41
|
+
export interface LimiterConfig {
|
|
42
|
+
/** Default cap per identity per window. Defaults to 60. */
|
|
43
|
+
limit?: number;
|
|
44
|
+
/** Window length in milliseconds. Defaults to 60_000. */
|
|
45
|
+
windowMs?: number;
|
|
46
|
+
/** Optional per-identity override. Returns the cap for the named
|
|
47
|
+
* identity, or undefined to fall back to the default `limit`.
|
|
48
|
+
* Useful for the OMCP_KEY_RATE_PER_MIN credential-level override
|
|
49
|
+
* (`agent=600;ci=240`) — admin gives a noisy automation a higher
|
|
50
|
+
* quota without affecting every other caller. Returning Infinity
|
|
51
|
+
* disables the cap for that identity (matches the global
|
|
52
|
+
* unlimited-token contract). */
|
|
53
|
+
limitFor?: (identity: string) => number | undefined;
|
|
54
|
+
}
|
|
55
|
+
/** Parse OMCP_KEY_RATE_PER_MIN — `name=count;name2=count2`. Same
|
|
56
|
+
* shape as parseKeyTenants / parseKeyProducts so operators have one
|
|
57
|
+
* syntactic model across all per-credential overrides. Unknown
|
|
58
|
+
* counts (non-numeric / ≤ 0) silently skip. Magic disable tokens
|
|
59
|
+
* (off/none/unlimited/disabled/false) map to Infinity, same as the
|
|
60
|
+
* global OMCP_TOOL_RATE_PER_MIN. */
|
|
61
|
+
export declare function parseKeyRateLimits(raw: string | undefined): Map<string, number>;
|
|
62
|
+
export interface CheckResult {
|
|
63
|
+
/** True when the call is allowed (and the timestamp recorded). */
|
|
64
|
+
allowed: boolean;
|
|
65
|
+
/** Number of calls already made in the current window (after counting this one if allowed). */
|
|
66
|
+
count: number;
|
|
67
|
+
/** Configured per-window cap. */
|
|
68
|
+
limit: number;
|
|
69
|
+
/** Window length in ms. */
|
|
70
|
+
windowMs: number;
|
|
71
|
+
/** Seconds until the oldest in-window record falls off and a new
|
|
72
|
+
* slot opens. 0 when allowed. */
|
|
73
|
+
retryAfterSeconds: number;
|
|
74
|
+
}
|
|
75
|
+
export declare class IdentityRateLimiter {
|
|
76
|
+
private readonly defaultLimit;
|
|
77
|
+
private readonly windowMs;
|
|
78
|
+
private readonly limitFor?;
|
|
79
|
+
private readonly buckets;
|
|
80
|
+
constructor(cfg?: LimiterConfig);
|
|
81
|
+
/** Resolved cap for one identity: the per-identity override wins
|
|
82
|
+
* when defined; otherwise the process-wide default applies. */
|
|
83
|
+
private resolveLimit;
|
|
84
|
+
/** Record-and-test a call for the given identity. Returns the
|
|
85
|
+
* decision plus enough context to render a 429 with Retry-After. */
|
|
86
|
+
check(identity: string, now?: number): CheckResult;
|
|
87
|
+
/** Read-only snapshot — useful for /api/usage and tests. */
|
|
88
|
+
inspect(identity: string, now?: number): {
|
|
89
|
+
count: number;
|
|
90
|
+
limit: number;
|
|
91
|
+
windowMs: number;
|
|
92
|
+
};
|
|
93
|
+
/** All identities we've ever seen — for /api/usage aggregation. */
|
|
94
|
+
knownIdentities(): string[];
|
|
95
|
+
/** For testing — reset every identity's bucket. */
|
|
96
|
+
reset(): void;
|
|
97
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-identity sliding-window rate limiter for the MCP tool surface.
|
|
3
|
+
*
|
|
4
|
+
* The bearer-token credential resolved by `auth/credentials.ts`
|
|
5
|
+
* (`OMCP_API_KEYS`) names every distinct caller; this limiter caps how
|
|
6
|
+
* many MCP tool calls each named caller can make per minute. Anonymous
|
|
7
|
+
* MCP traffic (when no `OMCP_API_KEYS` is set) bypasses the per-identity
|
|
8
|
+
* cap — the existing express-rate-limit IP gate at the /mcp transport
|
|
9
|
+
* still applies.
|
|
10
|
+
*
|
|
11
|
+
* The window is sliding: each call records its timestamp under the
|
|
12
|
+
* identity's key, and `check()` first prunes entries older than the
|
|
13
|
+
* configured window before counting. Memory bound is O(callers × N)
|
|
14
|
+
* where N is the per-window cap — a few KB even for a busy deployment.
|
|
15
|
+
*
|
|
16
|
+
* Persistence is out of scope here. A future revision can plug a
|
|
17
|
+
* Redis-backed store via the same interface.
|
|
18
|
+
*/
|
|
19
|
+
const DEFAULT_LIMIT_PER_MIN = 60;
|
|
20
|
+
const DEFAULT_WINDOW_MS = 60_000;
|
|
21
|
+
/** Magic strings that explicitly disable the per-identity cap.
|
|
22
|
+
* Matched case-insensitively. Operators picked any of these to
|
|
23
|
+
* mean "no rate limit at all" — useful when the caps are enforced
|
|
24
|
+
* upstream (envoy / API-gateway) and OMCP shouldn't double-count. */
|
|
25
|
+
const UNLIMITED_TOKENS = new Set(["off", "none", "unlimited", "disabled", "false"]);
|
|
26
|
+
/** Resolve `OMCP_TOOL_RATE_PER_MIN` (or any equivalent caller-supplied
|
|
27
|
+
* string) into the per-identity cap used by the limiter and reported
|
|
28
|
+
* by `/api/info` + `/api/usage`. Single source of truth, so the three
|
|
29
|
+
* call sites don't drift.
|
|
30
|
+
*
|
|
31
|
+
* Behaviour:
|
|
32
|
+
* - unset / empty / non-numeric → DEFAULT_LIMIT_PER_MIN (60)
|
|
33
|
+
* - `"0"` → DEFAULT_LIMIT_PER_MIN (limit=0 would deny every request,
|
|
34
|
+
* which is almost never what an operator setting "0" wants — they
|
|
35
|
+
* either mean "default" or "disable"; this function maps it to the
|
|
36
|
+
* default so they aren't accidentally locked out, and the explicit
|
|
37
|
+
* disable path is one of the UNLIMITED_TOKENS instead)
|
|
38
|
+
* - `"off"` / `"none"` / `"unlimited"` / `"disabled"` / `"false"`
|
|
39
|
+
* (case-insensitive) → Number.POSITIVE_INFINITY, which the
|
|
40
|
+
* `count >= limit` comparison in check() always allows. JSON
|
|
41
|
+
* serialisation renders Infinity as `null`; consumers can treat
|
|
42
|
+
* a null limit as "uncapped".
|
|
43
|
+
* - negative → DEFAULT_LIMIT_PER_MIN (limit=-1 with the current
|
|
44
|
+
* `count >= limit` check would also deny every request)
|
|
45
|
+
* - any positive integer ≥ 1 → that value
|
|
46
|
+
*/
|
|
47
|
+
export function resolveToolRatePerMin(raw) {
|
|
48
|
+
if (raw === undefined || raw === "")
|
|
49
|
+
return DEFAULT_LIMIT_PER_MIN;
|
|
50
|
+
if (UNLIMITED_TOKENS.has(raw.trim().toLowerCase()))
|
|
51
|
+
return Number.POSITIVE_INFINITY;
|
|
52
|
+
const n = Number(raw);
|
|
53
|
+
if (!Number.isFinite(n) || n < 1)
|
|
54
|
+
return DEFAULT_LIMIT_PER_MIN;
|
|
55
|
+
return Math.floor(n);
|
|
56
|
+
}
|
|
57
|
+
/** Parse OMCP_KEY_RATE_PER_MIN — `name=count;name2=count2`. Same
|
|
58
|
+
* shape as parseKeyTenants / parseKeyProducts so operators have one
|
|
59
|
+
* syntactic model across all per-credential overrides. Unknown
|
|
60
|
+
* counts (non-numeric / ≤ 0) silently skip. Magic disable tokens
|
|
61
|
+
* (off/none/unlimited/disabled/false) map to Infinity, same as the
|
|
62
|
+
* global OMCP_TOOL_RATE_PER_MIN. */
|
|
63
|
+
export function parseKeyRateLimits(raw) {
|
|
64
|
+
const m = new Map();
|
|
65
|
+
if (!raw)
|
|
66
|
+
return m;
|
|
67
|
+
for (const entry of raw.split(";").map((s) => s.trim()).filter(Boolean)) {
|
|
68
|
+
const eq = entry.indexOf("=");
|
|
69
|
+
if (eq <= 0)
|
|
70
|
+
continue;
|
|
71
|
+
const name = entry.slice(0, eq).trim();
|
|
72
|
+
const valueRaw = entry.slice(eq + 1).trim();
|
|
73
|
+
if (!name || !valueRaw)
|
|
74
|
+
continue;
|
|
75
|
+
if (UNLIMITED_TOKENS.has(valueRaw.toLowerCase())) {
|
|
76
|
+
m.set(name, Number.POSITIVE_INFINITY);
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
const n = Number(valueRaw);
|
|
80
|
+
if (!Number.isFinite(n) || n < 1)
|
|
81
|
+
continue;
|
|
82
|
+
m.set(name, Math.floor(n));
|
|
83
|
+
}
|
|
84
|
+
return m;
|
|
85
|
+
}
|
|
86
|
+
export class IdentityRateLimiter {
|
|
87
|
+
defaultLimit;
|
|
88
|
+
windowMs;
|
|
89
|
+
limitFor;
|
|
90
|
+
// identity → ring of millisecond timestamps, newest at the end.
|
|
91
|
+
buckets = new Map();
|
|
92
|
+
constructor(cfg = {}) {
|
|
93
|
+
this.defaultLimit = cfg.limit ?? DEFAULT_LIMIT_PER_MIN;
|
|
94
|
+
this.windowMs = cfg.windowMs ?? DEFAULT_WINDOW_MS;
|
|
95
|
+
this.limitFor = cfg.limitFor;
|
|
96
|
+
}
|
|
97
|
+
/** Resolved cap for one identity: the per-identity override wins
|
|
98
|
+
* when defined; otherwise the process-wide default applies. */
|
|
99
|
+
resolveLimit(identity) {
|
|
100
|
+
if (this.limitFor) {
|
|
101
|
+
const v = this.limitFor(identity);
|
|
102
|
+
if (typeof v === "number" && (Number.isFinite(v) ? v >= 1 : v === Number.POSITIVE_INFINITY)) {
|
|
103
|
+
return v;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return this.defaultLimit;
|
|
107
|
+
}
|
|
108
|
+
/** Record-and-test a call for the given identity. Returns the
|
|
109
|
+
* decision plus enough context to render a 429 with Retry-After. */
|
|
110
|
+
check(identity, now = Date.now()) {
|
|
111
|
+
const cutoff = now - this.windowMs;
|
|
112
|
+
const bucket = this.buckets.get(identity) ?? [];
|
|
113
|
+
// Drop expired entries from the front of the bucket.
|
|
114
|
+
let i = 0;
|
|
115
|
+
while (i < bucket.length && bucket[i] <= cutoff)
|
|
116
|
+
i++;
|
|
117
|
+
const fresh = i === 0 ? bucket : bucket.slice(i);
|
|
118
|
+
const limit = this.resolveLimit(identity);
|
|
119
|
+
if (fresh.length >= limit) {
|
|
120
|
+
// Compute when the oldest in-window record drops off.
|
|
121
|
+
const retryAfterMs = fresh[0] + this.windowMs - now;
|
|
122
|
+
// Don't store the call we just denied — that would push the
|
|
123
|
+
// window forward and starve the next legitimate request.
|
|
124
|
+
this.buckets.set(identity, fresh);
|
|
125
|
+
return {
|
|
126
|
+
allowed: false,
|
|
127
|
+
count: fresh.length,
|
|
128
|
+
limit,
|
|
129
|
+
windowMs: this.windowMs,
|
|
130
|
+
retryAfterSeconds: Math.max(1, Math.ceil(retryAfterMs / 1000)),
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
fresh.push(now);
|
|
134
|
+
this.buckets.set(identity, fresh);
|
|
135
|
+
return {
|
|
136
|
+
allowed: true,
|
|
137
|
+
count: fresh.length,
|
|
138
|
+
limit,
|
|
139
|
+
windowMs: this.windowMs,
|
|
140
|
+
retryAfterSeconds: 0,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/** Read-only snapshot — useful for /api/usage and tests. */
|
|
144
|
+
inspect(identity, now = Date.now()) {
|
|
145
|
+
const cutoff = now - this.windowMs;
|
|
146
|
+
const bucket = this.buckets.get(identity) ?? [];
|
|
147
|
+
let count = 0;
|
|
148
|
+
for (const t of bucket)
|
|
149
|
+
if (t > cutoff)
|
|
150
|
+
count++;
|
|
151
|
+
return { count, limit: this.resolveLimit(identity), windowMs: this.windowMs };
|
|
152
|
+
}
|
|
153
|
+
/** All identities we've ever seen — for /api/usage aggregation. */
|
|
154
|
+
knownIdentities() {
|
|
155
|
+
return Array.from(this.buckets.keys());
|
|
156
|
+
}
|
|
157
|
+
/** For testing — reset every identity's bucket. */
|
|
158
|
+
reset() {
|
|
159
|
+
this.buckets.clear();
|
|
160
|
+
}
|
|
161
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import { test } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { IdentityRateLimiter, resolveToolRatePerMin } from "./limiter.js";
|
|
4
|
+
test("resolveToolRatePerMin — unset / empty / non-numeric returns default 60", () => {
|
|
5
|
+
assert.equal(resolveToolRatePerMin(undefined), 60);
|
|
6
|
+
assert.equal(resolveToolRatePerMin(""), 60);
|
|
7
|
+
assert.equal(resolveToolRatePerMin("not-a-number"), 60);
|
|
8
|
+
assert.equal(resolveToolRatePerMin("NaN"), 60);
|
|
9
|
+
});
|
|
10
|
+
test("resolveToolRatePerMin — zero / negative falls back to default (limit=0 would deny every call)", () => {
|
|
11
|
+
// Footgun pin: "0" looks like "disable" but the limiter treats it as
|
|
12
|
+
// "instantly over-cap". Treat as default so operators don't lock
|
|
13
|
+
// themselves out by mistake.
|
|
14
|
+
assert.equal(resolveToolRatePerMin("0"), 60);
|
|
15
|
+
assert.equal(resolveToolRatePerMin("-1"), 60);
|
|
16
|
+
assert.equal(resolveToolRatePerMin("-1000"), 60);
|
|
17
|
+
});
|
|
18
|
+
test("resolveToolRatePerMin — positive integer passes through; decimals floored", () => {
|
|
19
|
+
assert.equal(resolveToolRatePerMin("1"), 1);
|
|
20
|
+
assert.equal(resolveToolRatePerMin("120"), 120);
|
|
21
|
+
assert.equal(resolveToolRatePerMin("240"), 240);
|
|
22
|
+
assert.equal(resolveToolRatePerMin("60.7"), 60);
|
|
23
|
+
});
|
|
24
|
+
test("allows up to the configured limit, then denies", () => {
|
|
25
|
+
const lim = new IdentityRateLimiter({ limit: 3, windowMs: 60_000 });
|
|
26
|
+
const t = 1_700_000_000_000;
|
|
27
|
+
assert.equal(lim.check("alice", t + 0).allowed, true);
|
|
28
|
+
assert.equal(lim.check("alice", t + 100).allowed, true);
|
|
29
|
+
assert.equal(lim.check("alice", t + 200).allowed, true);
|
|
30
|
+
const denied = lim.check("alice", t + 300);
|
|
31
|
+
assert.equal(denied.allowed, false);
|
|
32
|
+
assert.equal(denied.count, 3);
|
|
33
|
+
assert.equal(denied.limit, 3);
|
|
34
|
+
assert.ok(denied.retryAfterSeconds >= 1);
|
|
35
|
+
});
|
|
36
|
+
test("sliding window: expired entries free up slots", () => {
|
|
37
|
+
const lim = new IdentityRateLimiter({ limit: 2, windowMs: 10_000 });
|
|
38
|
+
const t = 1_700_000_000_000;
|
|
39
|
+
lim.check("alice", t + 0);
|
|
40
|
+
lim.check("alice", t + 5_000);
|
|
41
|
+
// At t+9s alice is still at the cap.
|
|
42
|
+
assert.equal(lim.check("alice", t + 9_000).allowed, false);
|
|
43
|
+
// At t+11s the first entry has aged out → one slot opens.
|
|
44
|
+
const after = lim.check("alice", t + 11_000);
|
|
45
|
+
assert.equal(after.allowed, true);
|
|
46
|
+
assert.equal(after.count, 2);
|
|
47
|
+
});
|
|
48
|
+
test("identities are isolated from each other", () => {
|
|
49
|
+
const lim = new IdentityRateLimiter({ limit: 1, windowMs: 60_000 });
|
|
50
|
+
const t = 1_700_000_000_000;
|
|
51
|
+
assert.equal(lim.check("alice", t).allowed, true);
|
|
52
|
+
assert.equal(lim.check("alice", t).allowed, false);
|
|
53
|
+
// bob has his own fresh bucket.
|
|
54
|
+
assert.equal(lim.check("bob", t).allowed, true);
|
|
55
|
+
});
|
|
56
|
+
test("retryAfterSeconds points at the oldest in-window record's expiry", () => {
|
|
57
|
+
const lim = new IdentityRateLimiter({ limit: 1, windowMs: 30_000 });
|
|
58
|
+
const t = 1_700_000_000_000;
|
|
59
|
+
lim.check("alice", t);
|
|
60
|
+
const denied = lim.check("alice", t + 5_000);
|
|
61
|
+
assert.equal(denied.allowed, false);
|
|
62
|
+
// 30s window started at t, so expiry is t+30s → 25s from t+5s.
|
|
63
|
+
assert.equal(denied.retryAfterSeconds, 25);
|
|
64
|
+
});
|
|
65
|
+
test("denied calls do NOT push the window forward", () => {
|
|
66
|
+
const lim = new IdentityRateLimiter({ limit: 1, windowMs: 10_000 });
|
|
67
|
+
const t = 1_700_000_000_000;
|
|
68
|
+
lim.check("alice", t);
|
|
69
|
+
// Multiple denies — none of them should reset the oldest-timestamp.
|
|
70
|
+
for (let i = 1; i < 10; i++)
|
|
71
|
+
lim.check("alice", t + i * 100);
|
|
72
|
+
// Still expecting expiry at t+10s, not pushed forward by the denies.
|
|
73
|
+
const justAfterExpiry = lim.check("alice", t + 10_001);
|
|
74
|
+
assert.equal(justAfterExpiry.allowed, true);
|
|
75
|
+
});
|
|
76
|
+
test("inspect: returns counts without consuming a slot", () => {
|
|
77
|
+
const lim = new IdentityRateLimiter({ limit: 5, windowMs: 60_000 });
|
|
78
|
+
const t = 1_700_000_000_000;
|
|
79
|
+
lim.check("alice", t);
|
|
80
|
+
lim.check("alice", t);
|
|
81
|
+
const ins = lim.inspect("alice", t);
|
|
82
|
+
assert.equal(ins.count, 2);
|
|
83
|
+
assert.equal(ins.limit, 5);
|
|
84
|
+
// Subsequent check still has room for 3 more.
|
|
85
|
+
assert.equal(lim.check("alice", t).allowed, true);
|
|
86
|
+
});
|
|
87
|
+
test("knownIdentities — returns every identity that has been checked", () => {
|
|
88
|
+
const lim = new IdentityRateLimiter({ limit: 5, windowMs: 60_000 });
|
|
89
|
+
const t = 1_700_000_000_000;
|
|
90
|
+
lim.check("alice", t);
|
|
91
|
+
lim.check("bob", t);
|
|
92
|
+
lim.check("alice", t);
|
|
93
|
+
const ids = lim.knownIdentities().sort();
|
|
94
|
+
assert.deepEqual(ids, ["alice", "bob"]);
|
|
95
|
+
});
|
|
96
|
+
test("inspect on an unknown identity returns count=0", () => {
|
|
97
|
+
const lim = new IdentityRateLimiter({ limit: 5, windowMs: 60_000 });
|
|
98
|
+
const ins = lim.inspect("never-seen");
|
|
99
|
+
assert.equal(ins.count, 0);
|
|
100
|
+
assert.equal(ins.limit, 5);
|
|
101
|
+
});
|
|
102
|
+
test("reset clears all buckets", () => {
|
|
103
|
+
const lim = new IdentityRateLimiter({ limit: 1, windowMs: 60_000 });
|
|
104
|
+
const t = 1_700_000_000_000;
|
|
105
|
+
lim.check("alice", t);
|
|
106
|
+
lim.check("bob", t);
|
|
107
|
+
lim.reset();
|
|
108
|
+
assert.equal(lim.check("alice", t).allowed, true);
|
|
109
|
+
assert.equal(lim.check("bob", t).allowed, true);
|
|
110
|
+
});
|
|
111
|
+
test("default limit applies when constructed with no args", () => {
|
|
112
|
+
const lim = new IdentityRateLimiter();
|
|
113
|
+
// Exhaust the default 60/min cap.
|
|
114
|
+
const t = 1_700_000_000_000;
|
|
115
|
+
for (let i = 0; i < 60; i++) {
|
|
116
|
+
assert.equal(lim.check("alice", t + i).allowed, true);
|
|
117
|
+
}
|
|
118
|
+
assert.equal(lim.check("alice", t + 60).allowed, false);
|
|
119
|
+
});
|
|
120
|
+
test("resolveToolRatePerMin — explicit-disable tokens map to Infinity (any case, with whitespace)", () => {
|
|
121
|
+
for (const tok of ["off", "OFF", "Off", "none", "NONE", "unlimited", "UNLIMITED", "disabled", "false", " off "]) {
|
|
122
|
+
assert.equal(resolveToolRatePerMin(tok), Number.POSITIVE_INFINITY, `'${tok}' should disable the limiter`);
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
test("IdentityRateLimiter — limit=Infinity always allows (the explicit-disable contract)", () => {
|
|
126
|
+
const lim = new IdentityRateLimiter({ limit: Number.POSITIVE_INFINITY });
|
|
127
|
+
const t = 1_700_000_000_000;
|
|
128
|
+
// Burst far past the default cap; every call must allow.
|
|
129
|
+
for (let i = 0; i < 1000; i++) {
|
|
130
|
+
const r = lim.check("alice", t + i);
|
|
131
|
+
assert.equal(r.allowed, true);
|
|
132
|
+
assert.equal(r.retryAfterSeconds, 0);
|
|
133
|
+
// Limit reflects the configured Infinity — JSON serialisation
|
|
134
|
+
// would render this as null; callers can branch on Number.isFinite.
|
|
135
|
+
assert.equal(r.limit, Number.POSITIVE_INFINITY);
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
test("resolveToolRatePerMin — disable tokens are NOT a number trap (\"infinity\" alone is not a token)", () => {
|
|
139
|
+
// We deliberately do NOT accept the literal string "infinity"
|
|
140
|
+
// because Number("Infinity") === Infinity — operators expect
|
|
141
|
+
// OMCP_TOOL_RATE_PER_MIN=Infinity to error out, not silently
|
|
142
|
+
// mean "unlimited". The explicit tokens are off/none/unlimited/
|
|
143
|
+
// disabled/false. (Number.isFinite is what the resolver checks.)
|
|
144
|
+
assert.equal(resolveToolRatePerMin("Infinity"), 60, "literal 'Infinity' must NOT secretly enable unlimited mode");
|
|
145
|
+
});
|
|
146
|
+
import { parseKeyRateLimits } from "./limiter.js";
|
|
147
|
+
test("parseKeyRateLimits — parses name=count pairs, skips malformed entries", () => {
|
|
148
|
+
const m = parseKeyRateLimits("agent=600;ci=240;bad;empty=;negative=-1;zero=0;notnum=abc");
|
|
149
|
+
assert.equal(m.get("agent"), 600);
|
|
150
|
+
assert.equal(m.get("ci"), 240);
|
|
151
|
+
assert.equal(m.get("bad"), undefined);
|
|
152
|
+
assert.equal(m.get("empty"), undefined);
|
|
153
|
+
assert.equal(m.get("negative"), undefined);
|
|
154
|
+
assert.equal(m.get("zero"), undefined);
|
|
155
|
+
assert.equal(m.get("notnum"), undefined);
|
|
156
|
+
});
|
|
157
|
+
test("parseKeyRateLimits — disable tokens map to Infinity (same vocabulary as the global override)", () => {
|
|
158
|
+
const m = parseKeyRateLimits("agent=off;ci=unlimited;loud=DISABLED");
|
|
159
|
+
assert.equal(m.get("agent"), Number.POSITIVE_INFINITY);
|
|
160
|
+
assert.equal(m.get("ci"), Number.POSITIVE_INFINITY);
|
|
161
|
+
assert.equal(m.get("loud"), Number.POSITIVE_INFINITY);
|
|
162
|
+
});
|
|
163
|
+
test("IdentityRateLimiter — limitFor override wins over the default cap", () => {
|
|
164
|
+
// Default is 60; override gives agent=2.
|
|
165
|
+
const lim = new IdentityRateLimiter({
|
|
166
|
+
limit: 60,
|
|
167
|
+
limitFor: (id) => (id === "default agent" ? 2 : undefined),
|
|
168
|
+
});
|
|
169
|
+
const t = 1_700_000_000_000;
|
|
170
|
+
// Two calls allowed for agent, third denies.
|
|
171
|
+
assert.equal(lim.check("default agent", t).allowed, true);
|
|
172
|
+
assert.equal(lim.check("default agent", t + 1).allowed, true);
|
|
173
|
+
const denied = lim.check("default agent", t + 2);
|
|
174
|
+
assert.equal(denied.allowed, false);
|
|
175
|
+
assert.equal(denied.limit, 2, "reported limit must be the per-identity override, not the default");
|
|
176
|
+
// Default identity still gets the global 60.
|
|
177
|
+
for (let i = 0; i < 60; i++) {
|
|
178
|
+
assert.equal(lim.check("default other", t + i).allowed, true);
|
|
179
|
+
}
|
|
180
|
+
assert.equal(lim.check("default other", t + 60).allowed, false);
|
|
181
|
+
});
|
|
182
|
+
test("IdentityRateLimiter — limitFor returning undefined falls back to default; Infinity disables for that identity", () => {
|
|
183
|
+
const lim = new IdentityRateLimiter({
|
|
184
|
+
limit: 5,
|
|
185
|
+
limitFor: (id) => (id === "default vip" ? Number.POSITIVE_INFINITY : undefined),
|
|
186
|
+
});
|
|
187
|
+
const t = 1_700_000_000_000;
|
|
188
|
+
// VIP can burst far past 5.
|
|
189
|
+
for (let i = 0; i < 1000; i++) {
|
|
190
|
+
assert.equal(lim.check("default vip", t + i).allowed, true);
|
|
191
|
+
}
|
|
192
|
+
// Default user still capped at 5.
|
|
193
|
+
for (let i = 0; i < 5; i++) {
|
|
194
|
+
assert.equal(lim.check("default user", t + i).allowed, true);
|
|
195
|
+
}
|
|
196
|
+
assert.equal(lim.check("default user", t + 5).allowed, false);
|
|
197
|
+
});
|
|
198
|
+
test("IdentityRateLimiter — inspect() reports the per-identity limit too (not just the default)", () => {
|
|
199
|
+
const lim = new IdentityRateLimiter({
|
|
200
|
+
limit: 60,
|
|
201
|
+
limitFor: (id) => (id === "default agent" ? 600 : undefined),
|
|
202
|
+
});
|
|
203
|
+
assert.equal(lim.inspect("default agent").limit, 600);
|
|
204
|
+
assert.equal(lim.inspect("default other").limit, 60);
|
|
205
|
+
});
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-identity token-budget tracker.
|
|
3
|
+
*
|
|
4
|
+
* The MCP transport gets per-call sliding-window cap from
|
|
5
|
+
* `IdentityRateLimiter`. Operators with paid-tier LLM agents want a
|
|
6
|
+
* second axis: a daily token quota that limits the number of tokens
|
|
7
|
+
* a credential can pull through the tool layer in a 24-hour rolling
|
|
8
|
+
* window. This module is the data-plane half of that knob.
|
|
9
|
+
*
|
|
10
|
+
* Token estimation:
|
|
11
|
+
* The MCP tool response (and the agent's request args) cross the
|
|
12
|
+
* boundary as JSON text. We don't tokenize with a real tokenizer
|
|
13
|
+
* here — pulling in tiktoken / gpt-tokenizer would add a non-trivial
|
|
14
|
+
* wasm/dep that the airgapped-friendly posture wants to avoid. The
|
|
15
|
+
* estimate uses a deliberate over-approximation:
|
|
16
|
+
* tokens ≈ ceil(chars / 4) * 1.05
|
|
17
|
+
* which tends to over-count by ~5% vs. cl100k_base on prose payloads
|
|
18
|
+
* and ~15% on dense code/JSON. Under-counting is the worse error
|
|
19
|
+
* mode for budget control, so the rounding direction is intentional.
|
|
20
|
+
*
|
|
21
|
+
* Window:
|
|
22
|
+
* 24h rolling, bucketed at 1-hour resolution to keep memory bounded.
|
|
23
|
+
* Each bucket records (hour-aligned timestamp, tokens). On every
|
|
24
|
+
* `check()` we drop buckets older than 24h and sum the rest.
|
|
25
|
+
*
|
|
26
|
+
* Persistence is OUT OF SCOPE for this slice (planned for E6/3). The
|
|
27
|
+
* in-memory tracker is constructed fresh at boot; restart-survival
|
|
28
|
+
* requires the persistence layer.
|
|
29
|
+
*/
|
|
30
|
+
/** Estimate tokens from a string. Intentionally over-counts. */
|
|
31
|
+
export declare function estimateTokens(text: string): number;
|
|
32
|
+
/** Estimate tokens for an arbitrary JSON-serialisable value. */
|
|
33
|
+
export declare function estimateTokensFor(v: unknown): number;
|
|
34
|
+
export interface TokenBudgetConfig {
|
|
35
|
+
/** Daily cap in tokens per identity. 0 / undefined / negative
|
|
36
|
+
* disables the cap (the limiter never denies). */
|
|
37
|
+
dailyLimit?: number;
|
|
38
|
+
/** Override Date.now for tests. */
|
|
39
|
+
now?: () => number;
|
|
40
|
+
/** Optional path to a JSON snapshot file. When set, the tracker
|
|
41
|
+
* loads buckets on bootstrap() and atomically rewrites the
|
|
42
|
+
* snapshot on a debounced timer after each charge — so a server
|
|
43
|
+
* restart picks up the rolling 24h window where it left off.
|
|
44
|
+
* Unset → in-memory only (fine for demo / single-instance). */
|
|
45
|
+
filePath?: string;
|
|
46
|
+
/** Debounce window in ms for snapshot writes; default 1000. Tests
|
|
47
|
+
* pass 0 to flush synchronously between assertions. */
|
|
48
|
+
flushDebounceMs?: number;
|
|
49
|
+
}
|
|
50
|
+
export interface CheckResult {
|
|
51
|
+
allowed: boolean;
|
|
52
|
+
/** Tokens used in the trailing 24h window AFTER this call was
|
|
53
|
+
* counted (when allowed) — or as of now (when denied). */
|
|
54
|
+
used: number;
|
|
55
|
+
/** Configured daily cap. 0 means uncapped. */
|
|
56
|
+
limit: number;
|
|
57
|
+
/** Seconds until ENOUGH buckets drop off to fit the denied request.
|
|
58
|
+
* Walks the bucket list oldest-first and stops at the first
|
|
59
|
+
* timestamp where dropping every bucket older would free
|
|
60
|
+
* >= (used + tokens - limit) tokens. Rounded up. 0 when allowed
|
|
61
|
+
* (or when uncapped). */
|
|
62
|
+
retryAfterSeconds: number;
|
|
63
|
+
/** How many tokens will be available again at retryAfterSeconds.
|
|
64
|
+
* Useful for HTTP 429 bodies + Retry-After hints. 0 when allowed. */
|
|
65
|
+
freedAtRetry: number;
|
|
66
|
+
}
|
|
67
|
+
/** Per-identity 24h-rolling token budget with 1h buckets. */
|
|
68
|
+
export declare class TokenBudget {
|
|
69
|
+
private readonly limit;
|
|
70
|
+
private readonly now;
|
|
71
|
+
private readonly buckets;
|
|
72
|
+
private readonly filePath;
|
|
73
|
+
private readonly debounceMs;
|
|
74
|
+
private flushTimer;
|
|
75
|
+
private writeQueue;
|
|
76
|
+
private bootstrapped;
|
|
77
|
+
constructor(cfg?: TokenBudgetConfig);
|
|
78
|
+
/** Load a prior snapshot from disk (when filePath is set).
|
|
79
|
+
* Safe to call multiple times — bootstraps once and caches. */
|
|
80
|
+
bootstrap(): Promise<void>;
|
|
81
|
+
/** Record-and-test: does adding `tokens` keep `identity` under the
|
|
82
|
+
* daily cap? When `allowed`, the tokens are persisted into the
|
|
83
|
+
* bucket; when denied, they are NOT recorded (so a single huge
|
|
84
|
+
* request can't push the bucket arbitrarily over the cap and
|
|
85
|
+
* starve the rest of the window). */
|
|
86
|
+
check(identity: string, tokens: number, now?: number): CheckResult;
|
|
87
|
+
/** Read-only snapshot for /api/usage. */
|
|
88
|
+
inspect(identity: string, now?: number): {
|
|
89
|
+
used: number;
|
|
90
|
+
limit: number;
|
|
91
|
+
windowMs: number;
|
|
92
|
+
};
|
|
93
|
+
/** All identities the tracker has ever seen — for /api/usage aggregation. */
|
|
94
|
+
knownIdentities(): string[];
|
|
95
|
+
/** For tests — clear everything. */
|
|
96
|
+
reset(): void;
|
|
97
|
+
/** Internal: append `tokens` to the current hour's bucket for
|
|
98
|
+
* `identity`. Creates a new bucket when the hour boundary rolls. */
|
|
99
|
+
private record;
|
|
100
|
+
/** Debounce a snapshot write. No-op when filePath isn't configured. */
|
|
101
|
+
private scheduleFlush;
|
|
102
|
+
/** Write the current bucket state to disk atomically (tmp + rename).
|
|
103
|
+
* Public so a graceful shutdown can `await tokenBudget.flushNow()`. */
|
|
104
|
+
flushNow(): Promise<void>;
|
|
105
|
+
/** Internal: drop buckets older than 24h and return the remainder. */
|
|
106
|
+
private pruneOld;
|
|
107
|
+
private usedInWindow;
|
|
108
|
+
/** Walk the bucket list oldest-first until enough tokens would have
|
|
109
|
+
* dropped off to fit a request needing `needed` extra headroom.
|
|
110
|
+
* Returns the wait in ms + the cumulative freed tokens at that
|
|
111
|
+
* point. When `needed` exceeds the entire window's content (the
|
|
112
|
+
* caller wants more than the cap), returns the time until the
|
|
113
|
+
* newest bucket drops + everything freed. */
|
|
114
|
+
private nextEnoughHeadroom;
|
|
115
|
+
}
|
|
116
|
+
/** Parse OMCP_TOOL_DAILY_TOKENS into a daily limit. Mirrors the
|
|
117
|
+
* resolveToolRatePerMin pattern: unset / empty / non-numeric /
|
|
118
|
+
* zero / negative → uncapped (0). Positive integers pass through. */
|
|
119
|
+
export declare function resolveDailyTokenLimit(raw: string | undefined): number;
|