@ooky/sdk 0.1.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +109 -0
- package/README.md +59 -12
- package/package.json +26 -8
- package/src/bots.js +43 -1
- package/src/core.js +490 -51
- package/src/edge.d.ts +1 -0
- package/src/express.d.ts +6 -0
- package/src/express.js +159 -19
- package/src/index.d.ts +125 -0
- package/src/mcp.js +127 -0
- package/src/next.d.ts +12 -0
- package/src/next.js +135 -16
- package/src/referrals.js +73 -0
package/src/express.js
CHANGED
|
@@ -1,20 +1,58 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Express adapter — `app.use(ookyMiddleware({ apiKey, domain }))`.
|
|
3
3
|
*
|
|
4
|
-
* Intercepts the well-known AI paths and serves the manifest
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Intercepts the well-known AI paths and serves the manifest, answers MCP
|
|
5
|
+
* tool invocations on POST /mcp, and for every request checks the
|
|
6
|
+
* User-Agent against the bot registry — firing a fire-and-forget event when
|
|
7
|
+
* a bot is detected, or an ai_referral event when a human arrives from an
|
|
8
|
+
* AI platform (ChatGPT, Perplexity, Claude, …).
|
|
7
9
|
*/
|
|
8
10
|
|
|
9
|
-
import {
|
|
11
|
+
import {
|
|
12
|
+
createOokyHandler,
|
|
13
|
+
logMiddlewareDisabled,
|
|
14
|
+
MAX_MCP_BODY_BYTES,
|
|
15
|
+
MAX_UA_LENGTH,
|
|
16
|
+
MAX_PATH_LENGTH,
|
|
17
|
+
clampString,
|
|
18
|
+
} from "./core.js";
|
|
10
19
|
|
|
11
20
|
export function ookyMiddleware(options) {
|
|
12
|
-
|
|
21
|
+
let handler;
|
|
22
|
+
try {
|
|
23
|
+
handler = createOokyHandler(options);
|
|
24
|
+
} catch (err) {
|
|
25
|
+
// Construction failed (almost always missing OOKY_API_KEY / OOKY_DOMAIN).
|
|
26
|
+
// This middleware is mounted on every request, so throwing here would take
|
|
27
|
+
// down the customer's whole app. Fail safe: log loudly, return a no-op that
|
|
28
|
+
// simply calls next().
|
|
29
|
+
logMiddlewareDisabled(err);
|
|
30
|
+
return function ookyHandlerDisabled(req, res, next) {
|
|
31
|
+
return next();
|
|
32
|
+
};
|
|
33
|
+
}
|
|
13
34
|
|
|
14
35
|
return async function ookyHandler(req, res, next) {
|
|
15
|
-
const
|
|
36
|
+
const rawUa = req.headers["user-agent"] || "";
|
|
16
37
|
const path = req.path || req.url || "/";
|
|
17
|
-
|
|
38
|
+
// Defensive caps on untrusted strings before they enter the event payload.
|
|
39
|
+
const ua = clampString(rawUa, MAX_UA_LENGTH);
|
|
40
|
+
const pagePath = clampString(path.split("?")[0] || "/", MAX_PATH_LENGTH);
|
|
41
|
+
const method = req.method || "GET";
|
|
42
|
+
const country = countryFromExpress(req);
|
|
43
|
+
|
|
44
|
+
// detectBot / matchPath run on EVERY request. By contract they never
|
|
45
|
+
// throw (a malformed bot registry is sanitised in core), but a middleware
|
|
46
|
+
// must not be able to crash the customer's process under ANY circumstance
|
|
47
|
+
// — so we degrade to pass-through if anything here throws.
|
|
48
|
+
let bot = null;
|
|
49
|
+
let kind = null;
|
|
50
|
+
try {
|
|
51
|
+
bot = handler.detectBot(ua);
|
|
52
|
+
kind = handler.matchPath(path);
|
|
53
|
+
} catch {
|
|
54
|
+
return next();
|
|
55
|
+
}
|
|
18
56
|
|
|
19
57
|
// Fire bot event regardless of whether we serve the manifest. The Ooky
|
|
20
58
|
// dashboard tracks bot visits across all routes, not just /llms.txt.
|
|
@@ -24,24 +62,126 @@ export function ookyMiddleware(options) {
|
|
|
24
62
|
handler.recordEvent({
|
|
25
63
|
bot: { name: bot.name, verified: false, ua_string: ua },
|
|
26
64
|
request: {
|
|
27
|
-
page_path:
|
|
28
|
-
method
|
|
65
|
+
page_path: pagePath,
|
|
66
|
+
method,
|
|
67
|
+
manifest_file: kind || null,
|
|
29
68
|
},
|
|
69
|
+
geo: country ? { country } : null,
|
|
30
70
|
});
|
|
71
|
+
} else {
|
|
72
|
+
// Human traffic: attribute visits referred by AI platforms.
|
|
73
|
+
const referral = handler.detectReferral(
|
|
74
|
+
req.headers["referer"] || req.headers["referrer"],
|
|
75
|
+
req.query?.utm_source ?? parseUtmSource(req.url)
|
|
76
|
+
);
|
|
77
|
+
if (referral) {
|
|
78
|
+
handler.recordEvent({
|
|
79
|
+
event_type: "ai_referral",
|
|
80
|
+
referral: {
|
|
81
|
+
source: referral.source,
|
|
82
|
+
referrer_url: referral.referrerUrl,
|
|
83
|
+
detection_method: referral.method,
|
|
84
|
+
},
|
|
85
|
+
request: { page_path: pagePath, method },
|
|
86
|
+
geo: country ? { country } : null,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
31
89
|
}
|
|
32
90
|
|
|
33
|
-
const kind = handler.matchPath(path);
|
|
34
91
|
if (!kind) return next();
|
|
35
92
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
93
|
+
try {
|
|
94
|
+
let result;
|
|
95
|
+
if (kind === "mcp" && req.method === "POST") {
|
|
96
|
+
result = await handler.handleMcpInvocation(await readJsonBody(req));
|
|
97
|
+
} else if (kind === "mcp" && req.method === "OPTIONS") {
|
|
98
|
+
result = { status: 204, headers: corsPreflightHeaders(), body: "" };
|
|
99
|
+
} else {
|
|
100
|
+
result = await handler.serveManifest(kind);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const { status, headers, body } = result;
|
|
104
|
+
if (res.headersSent) return;
|
|
105
|
+
res.status(status);
|
|
106
|
+
for (const [k, v] of Object.entries(headers)) {
|
|
107
|
+
res.setHeader(k, v);
|
|
108
|
+
}
|
|
109
|
+
if (body === null || body === undefined) {
|
|
110
|
+
res.end(); // e.g. 202 for MCP notifications
|
|
111
|
+
} else if (typeof body === "string") {
|
|
112
|
+
res.send(body);
|
|
113
|
+
} else {
|
|
114
|
+
res.json(body);
|
|
115
|
+
}
|
|
116
|
+
} catch (err) {
|
|
117
|
+
// serveManifest never throws by contract, but a middleware must not be
|
|
118
|
+
// able to crash the customer's app under any circumstances.
|
|
119
|
+
if (!res.headersSent) next(err);
|
|
45
120
|
}
|
|
46
121
|
};
|
|
47
122
|
}
|
|
123
|
+
|
|
124
|
+
function corsPreflightHeaders() {
|
|
125
|
+
return {
|
|
126
|
+
"Access-Control-Allow-Origin": "*",
|
|
127
|
+
"Access-Control-Allow-Methods": "GET, POST, OPTIONS",
|
|
128
|
+
"Access-Control-Allow-Headers": "Content-Type",
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Best-effort country from edge/CDN headers so the dashboard's geo panel
|
|
134
|
+
* isn't empty for SDK customers. Cloudflare sets `cf-ipcountry`; Vercel sets
|
|
135
|
+
* `x-vercel-ip-country`. "XX"/"T1" are CF placeholders for unknown/Tor — drop
|
|
136
|
+
* them. Returns a 2-letter uppercase code or null.
|
|
137
|
+
*/
|
|
138
|
+
function countryFromExpress(req) {
|
|
139
|
+
const raw =
|
|
140
|
+
req.headers["cf-ipcountry"] ||
|
|
141
|
+
req.headers["x-vercel-ip-country"] ||
|
|
142
|
+
req.headers["x-appengine-country"] ||
|
|
143
|
+
"";
|
|
144
|
+
return normalizeCountry(raw);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function normalizeCountry(raw) {
|
|
148
|
+
if (!raw || typeof raw !== "string") return null;
|
|
149
|
+
const code = raw.trim().toUpperCase();
|
|
150
|
+
if (code.length !== 2 || code === "XX" || code === "T1") return null;
|
|
151
|
+
return code;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/** Fallback utm_source extraction when req.query isn't populated. */
|
|
155
|
+
function parseUtmSource(url) {
|
|
156
|
+
if (!url || !url.includes("utm_source=")) return null;
|
|
157
|
+
try {
|
|
158
|
+
return new URL(url, "http://localhost").searchParams.get("utm_source");
|
|
159
|
+
} catch {
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Read the request body as JSON. Uses req.body when a body parser already
|
|
166
|
+
* consumed the stream (e.g. express.json() mounted before us); otherwise
|
|
167
|
+
* reads the raw stream with a size cap. Returns null on any parse failure —
|
|
168
|
+
* handleMcpInvocation turns that into a 400.
|
|
169
|
+
*/
|
|
170
|
+
async function readJsonBody(req) {
|
|
171
|
+
if (req.body && typeof req.body === "object" && Object.keys(req.body).length > 0) {
|
|
172
|
+
return req.body;
|
|
173
|
+
}
|
|
174
|
+
try {
|
|
175
|
+
let size = 0;
|
|
176
|
+
const chunks = [];
|
|
177
|
+
for await (const chunk of req) {
|
|
178
|
+
size += chunk.length;
|
|
179
|
+
if (size > MAX_MCP_BODY_BYTES) return null;
|
|
180
|
+
chunks.push(chunk);
|
|
181
|
+
}
|
|
182
|
+
if (chunks.length === 0) return null;
|
|
183
|
+
return JSON.parse(Buffer.concat(chunks).toString("utf8"));
|
|
184
|
+
} catch {
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
}
|
package/src/index.d.ts
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type declarations for @ooky/sdk. Hand-written — the package ships plain
|
|
3
|
+
* ESM JavaScript with no build step; keep these in sync with src/core.js.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export type ManifestKind = "llms" | "llms-full" | "manifest" | "agents" | "mcp";
|
|
7
|
+
|
|
8
|
+
export interface BotEntry {
|
|
9
|
+
name: string;
|
|
10
|
+
/** Case-insensitive substring matched against the User-Agent. */
|
|
11
|
+
pattern: string;
|
|
12
|
+
category?: "ai" | "search" | "social" | "other" | string;
|
|
13
|
+
verified?: boolean;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface OokyErrorContext {
|
|
17
|
+
op: "recordEvent" | "serveManifest" | "refreshBotRegistry";
|
|
18
|
+
/** HTTP status when the failure was a non-2xx upstream response. */
|
|
19
|
+
status?: number;
|
|
20
|
+
kind?: ManifestKind;
|
|
21
|
+
/** True when events were dropped by the maxEventsPerMinute token bucket. */
|
|
22
|
+
throttled?: boolean;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface AIReferral {
|
|
26
|
+
source: string;
|
|
27
|
+
referrerUrl: string | null;
|
|
28
|
+
method: "referer_header" | "utm_param";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface OokyHandlerOptions {
|
|
32
|
+
/** Bearer token from the dashboard (`ooky_sk_*`). */
|
|
33
|
+
apiKey: string;
|
|
34
|
+
/** Domain registered in Ooky, e.g. "acme.com". */
|
|
35
|
+
domain: string;
|
|
36
|
+
/** Override the Ooky API base URL. Default: https://api.ooky.ai/api */
|
|
37
|
+
apiBase?: string;
|
|
38
|
+
/** Override the manifest CDN base URL. Default: `${apiBase}/public/manifest` */
|
|
39
|
+
cdnBase?: string;
|
|
40
|
+
/** Override the bot registry. Default: the built-in list. */
|
|
41
|
+
bots?: BotEntry[];
|
|
42
|
+
/** Periodically refresh the bot registry from /api/public/bots. Default: true */
|
|
43
|
+
autoRefreshBots?: boolean;
|
|
44
|
+
/** Upstream fetch timeout in ms (manifest + registry + events). Default: 10000 */
|
|
45
|
+
fetchTimeoutMs?: number;
|
|
46
|
+
/** In-memory manifest cache TTL in ms. 0 disables caching. Default: 300000 */
|
|
47
|
+
manifestCacheTtlMs?: number;
|
|
48
|
+
/**
|
|
49
|
+
* Called for every failure the SDK swallows (event POST rejections and
|
|
50
|
+
* non-2xx responses — e.g. 401 after a key rotation — manifest fetch
|
|
51
|
+
* failures, registry refresh failures). Default: silent.
|
|
52
|
+
*/
|
|
53
|
+
onError?: (error: Error, context: OokyErrorContext) => void;
|
|
54
|
+
/**
|
|
55
|
+
* Token-bucket cap on event POSTs per minute (bot-storm insurance).
|
|
56
|
+
* Drops are reported via onError. Pass Infinity to disable. Default: 300
|
|
57
|
+
*/
|
|
58
|
+
maxEventsPerMinute?: number;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface ManifestResponse {
|
|
62
|
+
status: number;
|
|
63
|
+
headers: Record<string, string>;
|
|
64
|
+
/** String for text kinds (llms, llms-full, agents); object for JSON kinds. */
|
|
65
|
+
body: string | Record<string, unknown>;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface OokyEventPayload {
|
|
69
|
+
event_id?: string;
|
|
70
|
+
timestamp?: string;
|
|
71
|
+
/** Set to "ai_referral" (with `referral`) for AI-platform referral visits. */
|
|
72
|
+
event_type?: string;
|
|
73
|
+
referral?: {
|
|
74
|
+
source: string;
|
|
75
|
+
referrer_url?: string | null;
|
|
76
|
+
detection_method?: string;
|
|
77
|
+
} | null;
|
|
78
|
+
bot?: { name: string; verified?: boolean; ua_string?: string } | null;
|
|
79
|
+
request?: {
|
|
80
|
+
page_path?: string;
|
|
81
|
+
method?: string;
|
|
82
|
+
manifest_version?: string | null;
|
|
83
|
+
manifest_file?: string | null;
|
|
84
|
+
} | null;
|
|
85
|
+
session?: Record<string, unknown> | null;
|
|
86
|
+
geo?: { country?: string | null } | null;
|
|
87
|
+
serve?: Record<string, unknown> | null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export interface OokyHandler {
|
|
91
|
+
matchPath(path: string): ManifestKind | null;
|
|
92
|
+
detectBot(userAgent: string): BotEntry | null;
|
|
93
|
+
/** Detect a human visit referred from an AI platform (Referer / utm_source). */
|
|
94
|
+
detectReferral(referer?: string | null, utmSource?: string | null): AIReferral | null;
|
|
95
|
+
serveManifest(kind: ManifestKind): Promise<ManifestResponse>;
|
|
96
|
+
/**
|
|
97
|
+
* Answer an MCP request (POST /mcp or /.well-known/mcp). Speaks BOTH
|
|
98
|
+
* protocols:
|
|
99
|
+
* - Standard MCP — JSON-RPC 2.0 over streamable HTTP (`initialize`,
|
|
100
|
+
* `tools/list`, `tools/call`, `ping`). What real MCP clients (Claude,
|
|
101
|
+
* MCP Inspector) use; detected by `jsonrpc: "2.0"` on the body. A `null`
|
|
102
|
+
* body (unparseable JSON) returns a JSON-RPC parse error (-32700).
|
|
103
|
+
* - Legacy Ooky — `{ tool, arguments }` → `{ result }`, kept for
|
|
104
|
+
* Worker-tier compatibility.
|
|
105
|
+
*/
|
|
106
|
+
handleMcpInvocation(body: unknown): Promise<ManifestResponse>;
|
|
107
|
+
/** Fire-and-forget; never rejects. Hand to `event.waitUntil()` on edge runtimes. */
|
|
108
|
+
recordEvent(payload: OokyEventPayload): Promise<unknown>;
|
|
109
|
+
refreshBotRegistry(force?: boolean): Promise<BotEntry[]>;
|
|
110
|
+
/** In-flight registry refresh, if any. Hand to `event.waitUntil()` on edge runtimes. */
|
|
111
|
+
pendingBotRefresh(): Promise<BotEntry[]> | null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export const SDK_VERSION: string;
|
|
115
|
+
/** Best-effort runtime tag stamped into the `X-Ooky-Sdk` header ("node" | "edge" | "web"). */
|
|
116
|
+
export const SDK_RUNTIME: string;
|
|
117
|
+
/** Max MCP request body the adapters accept (64KB). */
|
|
118
|
+
export const MAX_MCP_BODY_BYTES: number;
|
|
119
|
+
/** Defensive cap on the bot User-Agent copied into events (1024). */
|
|
120
|
+
export const MAX_UA_LENGTH: number;
|
|
121
|
+
/** Defensive cap on the request path copied into events (2048). */
|
|
122
|
+
export const MAX_PATH_LENGTH: number;
|
|
123
|
+
/** Truncate an untrusted string to `max` chars; non-strings pass through. */
|
|
124
|
+
export function clampString<T>(value: T, max: number): T;
|
|
125
|
+
export function createOokyHandler(options: OokyHandlerOptions): OokyHandler;
|
package/src/mcp.js
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stateless MCP (Model Context Protocol) JSON-RPC 2.0 handler.
|
|
3
|
+
*
|
|
4
|
+
* Real MCP clients (Claude, MCP Inspector, ChatGPT connectors) speak
|
|
5
|
+
* JSON-RPC over streamable HTTP: initialize → notifications/initialized →
|
|
6
|
+
* tools/list → tools/call. This module implements the stateless subset —
|
|
7
|
+
* each POST is handled independently, single JSON response per request (the
|
|
8
|
+
* spec allows servers to answer with plain application/json instead of an
|
|
9
|
+
* SSE stream).
|
|
10
|
+
*
|
|
11
|
+
* Mirrors worker/src/mcp.js — when you change the protocol surface here,
|
|
12
|
+
* change it there too. Runtime-agnostic: no Node-only APIs.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
export const MCP_PROTOCOL_VERSION = "2025-03-26";
|
|
16
|
+
|
|
17
|
+
// JSON-RPC 2.0 error codes.
|
|
18
|
+
const PARSE_ERROR = -32700;
|
|
19
|
+
const INVALID_REQUEST = -32600;
|
|
20
|
+
const METHOD_NOT_FOUND = -32601;
|
|
21
|
+
const INVALID_PARAMS = -32602;
|
|
22
|
+
const INTERNAL_ERROR = -32603;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Handle one MCP JSON-RPC message.
|
|
26
|
+
*
|
|
27
|
+
* @param {unknown} message Parsed request body (or null on JSON parse failure).
|
|
28
|
+
* @param {object} server
|
|
29
|
+
* @param {string} server.name serverInfo.name
|
|
30
|
+
* @param {string} server.version serverInfo.version
|
|
31
|
+
* @param {Array} server.tools Tool descriptors ({ name, description, inputSchema }).
|
|
32
|
+
* @param {Function} server.callTool async (name, args) → result object, or throws
|
|
33
|
+
* McpToolError for invalid-params-class failures.
|
|
34
|
+
* @returns {{ status: number, body: object|null }} body null → empty response (notifications).
|
|
35
|
+
*/
|
|
36
|
+
export async function handleMcpJsonRpc(message, server) {
|
|
37
|
+
if (message === null || message === undefined) {
|
|
38
|
+
return rpcError(null, PARSE_ERROR, "Parse error: body must be valid JSON");
|
|
39
|
+
}
|
|
40
|
+
// JSON-RPC batches were removed from MCP in the 2025-06-18 revision; we
|
|
41
|
+
// never supported them, so reject explicitly.
|
|
42
|
+
if (Array.isArray(message)) {
|
|
43
|
+
return rpcError(null, INVALID_REQUEST, "Batch requests are not supported");
|
|
44
|
+
}
|
|
45
|
+
if (typeof message !== "object" || message.jsonrpc !== "2.0" || typeof message.method !== "string") {
|
|
46
|
+
return rpcError(message?.id ?? null, INVALID_REQUEST, "Invalid JSON-RPC 2.0 request");
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const { id, method, params } = message;
|
|
50
|
+
const isNotification = id === undefined || id === null;
|
|
51
|
+
|
|
52
|
+
// Notifications get 202 Accepted with no body (streamable HTTP transport).
|
|
53
|
+
if (method.startsWith("notifications/")) {
|
|
54
|
+
return { status: 202, body: null };
|
|
55
|
+
}
|
|
56
|
+
if (isNotification) {
|
|
57
|
+
// Requests we'd have to answer but can't address — accept and drop.
|
|
58
|
+
return { status: 202, body: null };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
switch (method) {
|
|
63
|
+
case "initialize":
|
|
64
|
+
return rpcResult(id, {
|
|
65
|
+
protocolVersion: negotiateVersion(params?.protocolVersion),
|
|
66
|
+
capabilities: { tools: {} },
|
|
67
|
+
serverInfo: { name: server.name, version: server.version },
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
case "ping":
|
|
71
|
+
return rpcResult(id, {});
|
|
72
|
+
|
|
73
|
+
case "tools/list":
|
|
74
|
+
return rpcResult(id, { tools: server.tools });
|
|
75
|
+
|
|
76
|
+
case "tools/call": {
|
|
77
|
+
const name = params?.name;
|
|
78
|
+
if (!name || typeof name !== "string") {
|
|
79
|
+
return rpcError(id, INVALID_PARAMS, "tools/call requires params.name");
|
|
80
|
+
}
|
|
81
|
+
if (!server.tools.some((t) => t.name === name)) {
|
|
82
|
+
return rpcError(id, INVALID_PARAMS, `Unknown tool: ${name}`);
|
|
83
|
+
}
|
|
84
|
+
const data = await server.callTool(name, params?.arguments || {});
|
|
85
|
+
return rpcResult(id, {
|
|
86
|
+
content: [{ type: "text", text: JSON.stringify(data) }],
|
|
87
|
+
isError: false,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
default:
|
|
92
|
+
return rpcError(id, METHOD_NOT_FOUND, `Method not found: ${method}`);
|
|
93
|
+
}
|
|
94
|
+
} catch (err) {
|
|
95
|
+
if (err instanceof McpToolError) {
|
|
96
|
+
// Tool execution failures are reported in-band per the MCP spec so the
|
|
97
|
+
// LLM can see them, not as protocol errors.
|
|
98
|
+
return rpcResult(id, {
|
|
99
|
+
content: [{ type: "text", text: err.message }],
|
|
100
|
+
isError: true,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
return rpcError(id, INTERNAL_ERROR, "Internal error");
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/** Throw from callTool to report a tool-level failure in-band (isError: true). */
|
|
108
|
+
export class McpToolError extends Error {}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Echo the client's requested protocol version when we can speak it,
|
|
112
|
+
* otherwise offer ours (the client disconnects if that's unacceptable).
|
|
113
|
+
*/
|
|
114
|
+
function negotiateVersion(requested) {
|
|
115
|
+
if (typeof requested === "string" && requested <= MCP_PROTOCOL_VERSION) {
|
|
116
|
+
return requested;
|
|
117
|
+
}
|
|
118
|
+
return MCP_PROTOCOL_VERSION;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function rpcResult(id, result) {
|
|
122
|
+
return { status: 200, body: { jsonrpc: "2.0", id, result } };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function rpcError(id, code, message) {
|
|
126
|
+
return { status: 200, body: { jsonrpc: "2.0", id, error: { code, message } } };
|
|
127
|
+
}
|
package/src/next.d.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { OokyHandlerOptions } from "./index";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Next.js middleware (Node or Edge runtime).
|
|
5
|
+
* Returns a Response for the well-known AI paths, undefined to fall through.
|
|
6
|
+
*/
|
|
7
|
+
export function ookyMiddleware(
|
|
8
|
+
options: OokyHandlerOptions
|
|
9
|
+
): (
|
|
10
|
+
request: Request,
|
|
11
|
+
event?: { waitUntil(promise: Promise<unknown>): void }
|
|
12
|
+
) => Promise<Response | undefined>;
|
package/src/next.js
CHANGED
|
@@ -8,36 +8,155 @@
|
|
|
8
8
|
*
|
|
9
9
|
* Returns a function with the Next.js middleware signature: it receives a
|
|
10
10
|
* NextRequest-like object (compatible Web Request) and returns a Response,
|
|
11
|
-
* or undefined to fall through to the next middleware/route.
|
|
11
|
+
* or undefined to fall through to the next middleware/route. Serves the
|
|
12
|
+
* well-known AI paths, answers MCP tool invocations on POST /mcp, and fires
|
|
13
|
+
* bot / ai_referral analytics events.
|
|
12
14
|
*/
|
|
13
15
|
|
|
14
|
-
import { createOokyHandler } from "./core.js";
|
|
16
|
+
import { createOokyHandler, logMiddlewareDisabled, MAX_MCP_BODY_BYTES, MAX_UA_LENGTH, MAX_PATH_LENGTH, clampString } from "./core.js";
|
|
15
17
|
|
|
16
18
|
export function ookyMiddleware(options) {
|
|
17
|
-
|
|
19
|
+
let handler;
|
|
20
|
+
try {
|
|
21
|
+
handler = createOokyHandler(options);
|
|
22
|
+
} catch (err) {
|
|
23
|
+
// Construction failed (almost always missing OOKY_API_KEY / OOKY_DOMAIN).
|
|
24
|
+
// The middleware runs on every request, so throwing here would 500 the
|
|
25
|
+
// customer's entire site. Fail safe: log loudly, return a pass-through.
|
|
26
|
+
logMiddlewareDisabled(err);
|
|
27
|
+
return function ookyNextMiddlewareDisabled() {
|
|
28
|
+
return undefined; // Next continues to the route — app unaffected.
|
|
29
|
+
};
|
|
30
|
+
}
|
|
18
31
|
|
|
19
|
-
return async function ookyNextMiddleware(request) {
|
|
32
|
+
return async function ookyNextMiddleware(request, event) {
|
|
20
33
|
const url = new URL(request.url);
|
|
21
|
-
const
|
|
22
|
-
const
|
|
34
|
+
const rawUa = request.headers.get("user-agent") || "";
|
|
35
|
+
const ua = clampString(rawUa, MAX_UA_LENGTH);
|
|
36
|
+
const pagePath = clampString(url.pathname || "/", MAX_PATH_LENGTH);
|
|
37
|
+
const method = request.method || "GET";
|
|
38
|
+
const country = countryFromRequest(request);
|
|
39
|
+
|
|
40
|
+
const waitUntil =
|
|
41
|
+
event && typeof event.waitUntil === "function"
|
|
42
|
+
? (p) => event.waitUntil(p)
|
|
43
|
+
: () => {};
|
|
44
|
+
|
|
45
|
+
// detectBot / matchPath run on EVERY request. They never throw by
|
|
46
|
+
// contract (the registry is sanitised in core), but a middleware must
|
|
47
|
+
// never be able to crash the customer's app — degrade to pass-through.
|
|
48
|
+
let bot = null;
|
|
49
|
+
let kind = null;
|
|
50
|
+
try {
|
|
51
|
+
bot = handler.detectBot(ua);
|
|
52
|
+
kind = handler.matchPath(url.pathname);
|
|
53
|
+
} catch {
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// detectBot may have kicked off the hourly bot-registry refresh; keep it
|
|
58
|
+
// alive past the response on edge runtimes.
|
|
59
|
+
const refresh = handler.pendingBotRefresh();
|
|
60
|
+
if (refresh) waitUntil(refresh);
|
|
23
61
|
|
|
24
62
|
// `verified: false` — UA-only matching can't prove bot identity. The
|
|
25
63
|
// Worker tier does IP + reverse-DNS verification; the SDK can't.
|
|
26
64
|
if (bot) {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
65
|
+
waitUntil(
|
|
66
|
+
handler.recordEvent({
|
|
67
|
+
bot: { name: bot.name, verified: false, ua_string: ua },
|
|
68
|
+
request: {
|
|
69
|
+
page_path: pagePath,
|
|
70
|
+
method,
|
|
71
|
+
manifest_file: kind || null,
|
|
72
|
+
},
|
|
73
|
+
geo: country ? { country } : null,
|
|
74
|
+
})
|
|
75
|
+
);
|
|
76
|
+
} else {
|
|
77
|
+
// Human traffic: attribute visits referred by AI platforms.
|
|
78
|
+
const referral = handler.detectReferral(
|
|
79
|
+
request.headers.get("referer"),
|
|
80
|
+
url.searchParams.get("utm_source")
|
|
81
|
+
);
|
|
82
|
+
if (referral) {
|
|
83
|
+
waitUntil(
|
|
84
|
+
handler.recordEvent({
|
|
85
|
+
event_type: "ai_referral",
|
|
86
|
+
referral: {
|
|
87
|
+
source: referral.source,
|
|
88
|
+
referrer_url: referral.referrerUrl,
|
|
89
|
+
detection_method: referral.method,
|
|
90
|
+
},
|
|
91
|
+
request: { page_path: pagePath, method },
|
|
92
|
+
geo: country ? { country } : null,
|
|
93
|
+
})
|
|
94
|
+
);
|
|
95
|
+
}
|
|
34
96
|
}
|
|
35
97
|
|
|
36
|
-
const kind = handler.matchPath(url.pathname);
|
|
37
98
|
if (!kind) return undefined;
|
|
38
99
|
|
|
39
|
-
|
|
40
|
-
|
|
100
|
+
let result;
|
|
101
|
+
if (kind === "mcp" && request.method === "POST") {
|
|
102
|
+
// Cap the MCP request body (parity with the Express adapter's 64KB
|
|
103
|
+
// streaming cap). Reject oversized bodies via Content-Length before
|
|
104
|
+
// buffering the whole thing into memory with request.json().
|
|
105
|
+
const declaredLength = Number(request.headers.get("content-length"));
|
|
106
|
+
if (Number.isFinite(declaredLength) && declaredLength > MAX_MCP_BODY_BYTES) {
|
|
107
|
+
return new Response(JSON.stringify({ error: "Request body too large" }), {
|
|
108
|
+
status: 413,
|
|
109
|
+
headers: { "Content-Type": "application/json; charset=utf-8" },
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
const body = await request.json().catch(() => null);
|
|
113
|
+
result = await handler.handleMcpInvocation(body);
|
|
114
|
+
} else if (kind === "mcp" && request.method === "OPTIONS") {
|
|
115
|
+
return new Response(null, {
|
|
116
|
+
status: 204,
|
|
117
|
+
headers: {
|
|
118
|
+
"Access-Control-Allow-Origin": "*",
|
|
119
|
+
"Access-Control-Allow-Methods": "GET, POST, OPTIONS",
|
|
120
|
+
"Access-Control-Allow-Headers": "Content-Type",
|
|
121
|
+
},
|
|
122
|
+
});
|
|
123
|
+
} else {
|
|
124
|
+
result = await handler.serveManifest(kind);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const { status, headers, body } = result;
|
|
128
|
+
const responseBody =
|
|
129
|
+
body === null || body === undefined
|
|
130
|
+
? null // e.g. 202 for MCP notifications
|
|
131
|
+
: typeof body === "string"
|
|
132
|
+
? body
|
|
133
|
+
: JSON.stringify(body);
|
|
41
134
|
return new Response(responseBody, { status, headers });
|
|
42
135
|
};
|
|
43
136
|
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Best-effort country from edge headers so the dashboard's geo panel isn't
|
|
140
|
+
* empty for SDK customers. Prefers `request.geo?.country` (Vercel populates it
|
|
141
|
+
* on NextRequest), then Cloudflare `cf-ipcountry` / Vercel
|
|
142
|
+
* `x-vercel-ip-country` headers. "XX"/"T1" are CF unknown/Tor placeholders.
|
|
143
|
+
*/
|
|
144
|
+
function countryFromRequest(request) {
|
|
145
|
+
const geoCountry = request && request.geo && request.geo.country;
|
|
146
|
+
if (geoCountry) {
|
|
147
|
+
const norm = normalizeCountry(geoCountry);
|
|
148
|
+
if (norm) return norm;
|
|
149
|
+
}
|
|
150
|
+
const raw =
|
|
151
|
+
request.headers.get("cf-ipcountry") ||
|
|
152
|
+
request.headers.get("x-vercel-ip-country") ||
|
|
153
|
+
"";
|
|
154
|
+
return normalizeCountry(raw);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function normalizeCountry(raw) {
|
|
158
|
+
if (!raw || typeof raw !== "string") return null;
|
|
159
|
+
const code = raw.trim().toUpperCase();
|
|
160
|
+
if (code.length !== 2 || code === "XX" || code === "T1") return null;
|
|
161
|
+
return code;
|
|
162
|
+
}
|