@ooky/sdk 0.1.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/README.md +59 -12
- package/package.json +26 -8
- package/src/bots.js +43 -1
- package/src/core.js +471 -51
- package/src/edge.d.ts +1 -0
- package/src/express.d.ts +6 -0
- package/src/express.js +145 -18
- package/src/index.d.ts +125 -0
- package/src/mcp.js +127 -0
- package/src/next.d.ts +12 -0
- package/src/next.js +123 -15
- package/src/referrals.js +73 -0
package/src/core.js
CHANGED
|
@@ -4,15 +4,50 @@
|
|
|
4
4
|
* The handler exposes:
|
|
5
5
|
* - matchPath(path) → null | "llms" | "llms-full" | "manifest" | "agents" | "mcp"
|
|
6
6
|
* - detectBot(ua) → null | { name, pattern, category }
|
|
7
|
-
* - serveManifest(kind
|
|
7
|
+
* - serveManifest(kind) → { status, body, headers }
|
|
8
8
|
* - recordEvent(payload) → fire-and-forget POST to /api/ingest/events
|
|
9
9
|
* - refreshBotRegistry() → optional manual refresh from /api/public/bots
|
|
10
10
|
*
|
|
11
11
|
* Adapters (express/next/edge) wrap this with the framework's request/response
|
|
12
12
|
* conventions but never duplicate logic.
|
|
13
|
+
*
|
|
14
|
+
* IMPORTANT: never import Node-only APIs here (no `node:*`, no `Buffer`, no
|
|
15
|
+
* `setImmediate`) — this file runs verbatim on the Vercel Edge runtime.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { detectBot as detectFromList, DEFAULT_BOTS, sanitizeBotRegistry } from "./bots.js";
|
|
19
|
+
import { detectAIReferral } from "./referrals.js";
|
|
20
|
+
import { handleMcpJsonRpc, McpToolError } from "./mcp.js";
|
|
21
|
+
|
|
22
|
+
export const SDK_VERSION = "0.5.0";
|
|
23
|
+
|
|
24
|
+
// Defensive length caps on untrusted strings copied into the event payload.
|
|
25
|
+
// The load-bearing clamp is server-side (owned by the backend), but capping
|
|
26
|
+
// here keeps a pathological UA / very long path from being POSTed at all.
|
|
27
|
+
export const MAX_UA_LENGTH = 1024;
|
|
28
|
+
export const MAX_PATH_LENGTH = 2048;
|
|
29
|
+
|
|
30
|
+
/** Truncate an untrusted string to `max` chars; pass through non-strings. */
|
|
31
|
+
export function clampString(value, max) {
|
|
32
|
+
if (typeof value !== "string") return value;
|
|
33
|
+
return value.length > max ? value.slice(0, max) : value;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Hard cap on the MCP request body the adapters will read before answering.
|
|
38
|
+
* The Express adapter enforces this while streaming; the Next/Edge adapter
|
|
39
|
+
* uses it for a Content-Length pre-check. Exported so both adapters (and the
|
|
40
|
+
* Worker tier, for parity) share one number.
|
|
13
41
|
*/
|
|
42
|
+
export const MAX_MCP_BODY_BYTES = 64 * 1024;
|
|
14
43
|
|
|
15
|
-
|
|
44
|
+
/**
|
|
45
|
+
* Runtime tag stamped into the `X-Ooky-Sdk` response header so we don't claim
|
|
46
|
+
* "node" on the Edge/Web runtime. Best-effort detection — falls back to
|
|
47
|
+
* "node" only when we positively look like Node (has `process.versions.node`
|
|
48
|
+
* and no Web-runtime markers).
|
|
49
|
+
*/
|
|
50
|
+
export const SDK_RUNTIME = detectRuntime();
|
|
16
51
|
|
|
17
52
|
const DEFAULT_API_BASE = "https://api.ooky.ai/api";
|
|
18
53
|
// Manifest content is served from the public Ooky API by default. Customers
|
|
@@ -20,7 +55,9 @@ const DEFAULT_API_BASE = "https://api.ooky.ai/api";
|
|
|
20
55
|
// CloudFront) sitting in front of `${apiBase}/public/manifest`.
|
|
21
56
|
const DEFAULT_CDN_PATH = "/public/manifest";
|
|
22
57
|
|
|
23
|
-
//
|
|
58
|
+
// Well-known paths the SDK answers on behalf of the customer's app. Must stay
|
|
59
|
+
// feature-equivalent with worker/src/index.js (the Worker also serves bare
|
|
60
|
+
// /mcp because some platforms intercept /.well-known/*).
|
|
24
61
|
const PATH_MAP = {
|
|
25
62
|
"/llms.txt": "llms",
|
|
26
63
|
"/llms-full.txt": "llms-full",
|
|
@@ -28,6 +65,7 @@ const PATH_MAP = {
|
|
|
28
65
|
"/ai-manifest.json": "manifest",
|
|
29
66
|
"/agents.md": "agents",
|
|
30
67
|
"/.well-known/mcp": "mcp",
|
|
68
|
+
"/mcp": "mcp",
|
|
31
69
|
};
|
|
32
70
|
|
|
33
71
|
const CONTENT_TYPE = {
|
|
@@ -38,6 +76,13 @@ const CONTENT_TYPE = {
|
|
|
38
76
|
mcp: "application/json; charset=utf-8",
|
|
39
77
|
};
|
|
40
78
|
|
|
79
|
+
const DEFAULT_FETCH_TIMEOUT_MS = 10_000;
|
|
80
|
+
const DEFAULT_MAX_EVENTS_PER_MINUTE = 300;
|
|
81
|
+
const THROTTLE_REPORT_INTERVAL_MS = 10_000;
|
|
82
|
+
const DEFAULT_MANIFEST_CACHE_TTL_MS = 5 * 60 * 1000; // mirrors Cache-Control max-age=300
|
|
83
|
+
const MANIFEST_STALE_MAX_MS = 24 * 60 * 60 * 1000; // serve stale up to 24h on upstream failure
|
|
84
|
+
const BOT_REFRESH_MS = 60 * 60 * 1000; // 1 hour
|
|
85
|
+
|
|
41
86
|
/**
|
|
42
87
|
* Create an Ooky handler instance.
|
|
43
88
|
*
|
|
@@ -48,6 +93,18 @@ const CONTENT_TYPE = {
|
|
|
48
93
|
* @param {string} [options.cdnBase] Override manifest CDN base URL.
|
|
49
94
|
* @param {Array} [options.bots] Override the bot registry; default uses DEFAULT_BOTS.
|
|
50
95
|
* @param {boolean}[options.autoRefreshBots=true] Periodically refresh from /api/public/bots.
|
|
96
|
+
* @param {number} [options.fetchTimeoutMs=10000] Upstream fetch timeout (manifest + registry).
|
|
97
|
+
* @param {number} [options.manifestCacheTtlMs=300000] In-memory manifest cache TTL. 0 disables.
|
|
98
|
+
* @param {Function}[options.onError] Called with (error, context) for every failure the
|
|
99
|
+
* SDK swallows (event POST rejections/non-2xx, manifest
|
|
100
|
+
* fetch failures, registry refresh failures). Use it to
|
|
101
|
+
* surface rotated keys / outages in your own logs.
|
|
102
|
+
* context = { op, status?, kind?, throttled? }. Never
|
|
103
|
+
* awaited; its own throws are swallowed.
|
|
104
|
+
* @param {number} [options.maxEventsPerMinute=300] Token-bucket cap on event POSTs so a
|
|
105
|
+
* bot storm can't turn your server into an unbounded
|
|
106
|
+
* POST cannon. Drops are reported via onError (at most
|
|
107
|
+
* once per 10s). Pass Infinity to disable.
|
|
51
108
|
*/
|
|
52
109
|
export function createOokyHandler(options) {
|
|
53
110
|
if (!options || typeof options !== "object") {
|
|
@@ -67,29 +124,123 @@ export function createOokyHandler(options) {
|
|
|
67
124
|
"cdnBase"
|
|
68
125
|
).replace(/\/+$/, "");
|
|
69
126
|
const autoRefreshBots = options.autoRefreshBots !== false;
|
|
127
|
+
const onError = typeof options.onError === "function" ? options.onError : null;
|
|
128
|
+
const maxEventsPerMinute =
|
|
129
|
+
options.maxEventsPerMinute === Infinity
|
|
130
|
+
? Infinity
|
|
131
|
+
: positiveNumber(options.maxEventsPerMinute, DEFAULT_MAX_EVENTS_PER_MINUTE);
|
|
132
|
+
const fetchTimeoutMs = positiveNumber(options.fetchTimeoutMs, DEFAULT_FETCH_TIMEOUT_MS);
|
|
133
|
+
const manifestCacheTtlMs =
|
|
134
|
+
options.manifestCacheTtlMs === 0
|
|
135
|
+
? 0
|
|
136
|
+
: positiveNumber(options.manifestCacheTtlMs, DEFAULT_MANIFEST_CACHE_TTL_MS);
|
|
70
137
|
|
|
71
|
-
|
|
138
|
+
// A customer-supplied `options.bots` typo (non-array, entries with no
|
|
139
|
+
// pattern, an empty-string pattern, a multi-thousand-entry blob) must not be
|
|
140
|
+
// able to crash the request path or match every UA. Sanitise on the way in;
|
|
141
|
+
// fall back to the built-in list when the override is unusable.
|
|
142
|
+
let botRegistry = DEFAULT_BOTS;
|
|
143
|
+
if (options.bots !== undefined) {
|
|
144
|
+
const cleaned = sanitizeBotRegistry(options.bots);
|
|
145
|
+
botRegistry = cleaned && cleaned.length > 0 ? cleaned : DEFAULT_BOTS;
|
|
146
|
+
}
|
|
72
147
|
let lastBotRefresh = 0;
|
|
73
|
-
|
|
148
|
+
let botEtag = null;
|
|
149
|
+
let botRefreshInFlight = null;
|
|
150
|
+
|
|
151
|
+
// kind → { status, headers, body, freshUntil, staleUntil }
|
|
152
|
+
const manifestCache = new Map();
|
|
153
|
+
// kind → Promise — dedupes concurrent cold-cache fetches for the same kind.
|
|
154
|
+
const manifestInFlight = new Map();
|
|
155
|
+
|
|
156
|
+
/** Report a swallowed failure to the customer's onError hook, if set. */
|
|
157
|
+
function reportError(error, context) {
|
|
158
|
+
if (!onError) return;
|
|
159
|
+
try {
|
|
160
|
+
onError(error, context);
|
|
161
|
+
} catch {
|
|
162
|
+
// The hook itself must never break the request path.
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Token bucket for event POSTs. Refills continuously at the per-minute
|
|
167
|
+
// rate; bursts up to one minute's allowance.
|
|
168
|
+
let eventTokens = maxEventsPerMinute;
|
|
169
|
+
let lastRefillAt = Date.now();
|
|
170
|
+
let droppedSinceReport = 0;
|
|
171
|
+
let lastDropReportAt = 0;
|
|
172
|
+
|
|
173
|
+
function takeEventToken() {
|
|
174
|
+
if (!Number.isFinite(maxEventsPerMinute)) return true;
|
|
175
|
+
const now = Date.now();
|
|
176
|
+
eventTokens = Math.min(
|
|
177
|
+
maxEventsPerMinute,
|
|
178
|
+
eventTokens + ((now - lastRefillAt) / 60_000) * maxEventsPerMinute
|
|
179
|
+
);
|
|
180
|
+
lastRefillAt = now;
|
|
181
|
+
if (eventTokens >= 1) {
|
|
182
|
+
eventTokens -= 1;
|
|
183
|
+
return true;
|
|
184
|
+
}
|
|
185
|
+
droppedSinceReport += 1;
|
|
186
|
+
if (now - lastDropReportAt >= THROTTLE_REPORT_INTERVAL_MS) {
|
|
187
|
+
lastDropReportAt = now;
|
|
188
|
+
const dropped = droppedSinceReport;
|
|
189
|
+
droppedSinceReport = 0;
|
|
190
|
+
reportError(
|
|
191
|
+
new Error(
|
|
192
|
+
`event throttle: dropped ${dropped} event(s) (maxEventsPerMinute=${maxEventsPerMinute})`
|
|
193
|
+
),
|
|
194
|
+
{ op: "recordEvent", throttled: true }
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
return false;
|
|
198
|
+
}
|
|
74
199
|
|
|
75
200
|
async function refreshBotRegistry(force = false) {
|
|
76
201
|
if (!autoRefreshBots && !force) return botRegistry;
|
|
77
202
|
if (!force && Date.now() - lastBotRefresh < BOT_REFRESH_MS) return botRegistry;
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
203
|
+
if (botRefreshInFlight) return botRefreshInFlight;
|
|
204
|
+
|
|
205
|
+
botRefreshInFlight = (async () => {
|
|
206
|
+
try {
|
|
207
|
+
const headers = { Accept: "application/json" };
|
|
208
|
+
if (botEtag) headers["If-None-Match"] = botEtag;
|
|
209
|
+
const res = await fetch(`${apiBase}/public/bots`, {
|
|
210
|
+
headers,
|
|
211
|
+
signal: timeoutSignal(fetchTimeoutMs),
|
|
212
|
+
});
|
|
213
|
+
if (res.status === 304) {
|
|
214
|
+
// Registry unchanged — just push the refresh window forward.
|
|
215
|
+
} else if (res.ok) {
|
|
216
|
+
const data = await res.json();
|
|
217
|
+
// Validate + cap before adopting: a malformed payload (empty-string
|
|
218
|
+
// patterns, non-string entries, a backend row-mapping regression,
|
|
219
|
+
// or an intermediary corrupting the JSON) must never reach
|
|
220
|
+
// detectBot on the hot path. A bad registry is re-fetched on every
|
|
221
|
+
// restart, so swallowing it here is what stops a crash loop.
|
|
222
|
+
const cleaned = sanitizeBotRegistry(data?.bots);
|
|
223
|
+
if (cleaned && cleaned.length > 0) {
|
|
224
|
+
botRegistry = cleaned;
|
|
225
|
+
botEtag = res.headers?.get?.("etag") || null;
|
|
226
|
+
} else {
|
|
227
|
+
reportError(new Error("ignored malformed /public/bots registry"), {
|
|
228
|
+
op: "refreshBotRegistry",
|
|
229
|
+
});
|
|
230
|
+
}
|
|
86
231
|
}
|
|
232
|
+
lastBotRefresh = Date.now();
|
|
233
|
+
} catch (err) {
|
|
234
|
+
// Network failure — keep stale list, but still bump the timestamp so
|
|
235
|
+
// a dead endpoint can't trigger a refresh attempt on every request.
|
|
236
|
+
lastBotRefresh = Date.now();
|
|
237
|
+
reportError(err, { op: "refreshBotRegistry" });
|
|
238
|
+
} finally {
|
|
239
|
+
botRefreshInFlight = null;
|
|
87
240
|
}
|
|
88
|
-
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
}
|
|
92
|
-
return botRegistry;
|
|
241
|
+
return botRegistry;
|
|
242
|
+
})();
|
|
243
|
+
return botRefreshInFlight;
|
|
93
244
|
}
|
|
94
245
|
|
|
95
246
|
function matchPath(path) {
|
|
@@ -100,6 +251,11 @@ export function createOokyHandler(options) {
|
|
|
100
251
|
}
|
|
101
252
|
|
|
102
253
|
function detectBot(userAgent) {
|
|
254
|
+
// Opportunistic, non-blocking registry refresh — this is the only hook
|
|
255
|
+
// that runs on every request, so it's where autoRefreshBots is honoured.
|
|
256
|
+
if (autoRefreshBots && Date.now() - lastBotRefresh >= BOT_REFRESH_MS) {
|
|
257
|
+
refreshBotRegistry().catch(() => {});
|
|
258
|
+
}
|
|
103
259
|
return detectFromList(userAgent, botRegistry);
|
|
104
260
|
}
|
|
105
261
|
|
|
@@ -107,56 +263,117 @@ export function createOokyHandler(options) {
|
|
|
107
263
|
// Edge CDN URL convention. The actual route map is owned by the backend
|
|
108
264
|
// and exposed via the per-domain /api/public/manifest/:domain endpoint.
|
|
109
265
|
const url = `${cdnBase}/${encodeURIComponent(domain)}/${kind}`;
|
|
110
|
-
const res = await fetch(url, {
|
|
266
|
+
const res = await fetch(url, {
|
|
267
|
+
headers: { Accept: CONTENT_TYPE[kind] || "*/*" },
|
|
268
|
+
signal: timeoutSignal(fetchTimeoutMs),
|
|
269
|
+
});
|
|
111
270
|
return res;
|
|
112
271
|
}
|
|
113
272
|
|
|
273
|
+
function cacheGet(kind) {
|
|
274
|
+
const entry = manifestCache.get(kind);
|
|
275
|
+
if (!entry) return { fresh: null, stale: null };
|
|
276
|
+
const now = Date.now();
|
|
277
|
+
if (now < entry.freshUntil) return { fresh: entry, stale: entry };
|
|
278
|
+
if (now < entry.staleUntil) return { fresh: null, stale: entry };
|
|
279
|
+
manifestCache.delete(kind);
|
|
280
|
+
return { fresh: null, stale: null };
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function cachePut(kind, result) {
|
|
284
|
+
if (manifestCacheTtlMs <= 0) return;
|
|
285
|
+
const now = Date.now();
|
|
286
|
+
manifestCache.set(kind, {
|
|
287
|
+
...result,
|
|
288
|
+
freshUntil: now + manifestCacheTtlMs,
|
|
289
|
+
staleUntil: now + MANIFEST_STALE_MAX_MS,
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function toResponseShape(entry) {
|
|
294
|
+
return { status: entry.status, headers: entry.headers, body: entry.body };
|
|
295
|
+
}
|
|
296
|
+
|
|
114
297
|
/**
|
|
115
298
|
* Build the response for one of the well-known paths.
|
|
116
299
|
* Returns { status, headers, body } where body is a string (text formats)
|
|
117
300
|
* or a JS object (JSON formats). Adapters serialize as needed.
|
|
301
|
+
*
|
|
302
|
+
* Successful responses are cached in-memory for `manifestCacheTtlMs`; when
|
|
303
|
+
* the upstream fails, a stale copy (up to 24h old) is served instead so a
|
|
304
|
+
* transient Ooky outage never breaks the customer's well-known endpoints.
|
|
118
305
|
*/
|
|
119
306
|
async function serveManifest(kind) {
|
|
120
307
|
if (!CONTENT_TYPE[kind]) {
|
|
121
308
|
return { status: 404, headers: {}, body: "Unknown manifest kind" };
|
|
122
309
|
}
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
310
|
+
|
|
311
|
+
const { fresh } = cacheGet(kind);
|
|
312
|
+
if (fresh) return toResponseShape(fresh);
|
|
313
|
+
|
|
314
|
+
const inFlight = manifestInFlight.get(kind);
|
|
315
|
+
if (inFlight) return inFlight;
|
|
316
|
+
|
|
317
|
+
const promise = (async () => {
|
|
318
|
+
try {
|
|
319
|
+
const res = await fetchFromCdn(kind);
|
|
320
|
+
if (!res.ok) {
|
|
321
|
+
reportError(new Error(`manifest upstream responded ${res.status}`), {
|
|
322
|
+
op: "serveManifest",
|
|
323
|
+
kind,
|
|
324
|
+
status: res.status,
|
|
325
|
+
});
|
|
326
|
+
const { stale } = cacheGet(kind);
|
|
327
|
+
if (stale && res.status >= 500) return toResponseShape(stale);
|
|
328
|
+
return {
|
|
329
|
+
status: res.status,
|
|
330
|
+
headers: { "Content-Type": CONTENT_TYPE[kind] },
|
|
331
|
+
body: kind === "manifest" || kind === "mcp"
|
|
332
|
+
? { error: `Manifest unavailable (${res.status})` }
|
|
333
|
+
: `Manifest unavailable (${res.status})`,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
const headers = {
|
|
337
|
+
"Content-Type": CONTENT_TYPE[kind],
|
|
338
|
+
"Cache-Control": "public, max-age=300, s-maxage=600",
|
|
339
|
+
"X-Ooky-Sdk": `${SDK_RUNTIME}/${SDK_VERSION}`,
|
|
340
|
+
};
|
|
341
|
+
const body =
|
|
342
|
+
kind === "manifest" || kind === "mcp" ? await res.json() : await res.text();
|
|
343
|
+
const result = { status: 200, headers, body };
|
|
344
|
+
cachePut(kind, result);
|
|
345
|
+
return result;
|
|
346
|
+
} catch (err) {
|
|
347
|
+
reportError(err, { op: "serveManifest", kind });
|
|
348
|
+
const { stale } = cacheGet(kind);
|
|
349
|
+
if (stale) return toResponseShape(stale);
|
|
350
|
+
// Match the declared content-type for this kind: JSON kinds get a
|
|
351
|
+
// JSON {error} body, text kinds get a text body. Otherwise a
|
|
352
|
+
// manifest/mcp consumer parsing application/json chokes on text.
|
|
353
|
+
const isJson = kind === "manifest" || kind === "mcp";
|
|
126
354
|
return {
|
|
127
|
-
status:
|
|
355
|
+
status: 502,
|
|
128
356
|
headers: { "Content-Type": CONTENT_TYPE[kind] },
|
|
129
|
-
body:
|
|
130
|
-
? { error: `
|
|
131
|
-
: `
|
|
357
|
+
body: isJson
|
|
358
|
+
? { error: `Ooky manifest fetch failed: ${err.message}` }
|
|
359
|
+
: `Ooky manifest fetch failed: ${err.message}`,
|
|
132
360
|
};
|
|
361
|
+
} finally {
|
|
362
|
+
manifestInFlight.delete(kind);
|
|
133
363
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
"X-Ooky-Sdk": "node",
|
|
138
|
-
};
|
|
139
|
-
if (kind === "manifest" || kind === "mcp") {
|
|
140
|
-
const body = await res.json();
|
|
141
|
-
return { status: 200, headers, body };
|
|
142
|
-
}
|
|
143
|
-
const body = await res.text();
|
|
144
|
-
return { status: 200, headers, body };
|
|
145
|
-
} catch (err) {
|
|
146
|
-
return {
|
|
147
|
-
status: 502,
|
|
148
|
-
headers: { "Content-Type": "text/plain; charset=utf-8" },
|
|
149
|
-
body: `Ooky manifest fetch failed: ${err.message}`,
|
|
150
|
-
};
|
|
151
|
-
}
|
|
364
|
+
})();
|
|
365
|
+
manifestInFlight.set(kind, promise);
|
|
366
|
+
return promise;
|
|
152
367
|
}
|
|
153
368
|
|
|
154
369
|
/**
|
|
155
|
-
* Fire-and-forget event POST. Returns
|
|
370
|
+
* Fire-and-forget event POST. Returns the underlying promise so edge
|
|
371
|
+
* runtimes can hand it to `event.waitUntil()`, but the caller must never
|
|
156
372
|
* `await` it on the request hot path. Errors are swallowed.
|
|
157
373
|
*/
|
|
158
374
|
function recordEvent(payload) {
|
|
159
|
-
|
|
375
|
+
if (!takeEventToken()) return Promise.resolve(undefined);
|
|
376
|
+
const event = {
|
|
160
377
|
event_id: payload.event_id || cryptoRandomId(),
|
|
161
378
|
timestamp: payload.timestamp || new Date().toISOString(),
|
|
162
379
|
domain, // server overrides anyway, but include for backward compat
|
|
@@ -165,7 +382,12 @@ export function createOokyHandler(options) {
|
|
|
165
382
|
session: payload.session || null,
|
|
166
383
|
geo: payload.geo || null,
|
|
167
384
|
serve: payload.serve || null,
|
|
168
|
-
}
|
|
385
|
+
};
|
|
386
|
+
// ai_referral events carry a referral block instead of a bot block —
|
|
387
|
+
// see backend ingest.js for how the two row types diverge.
|
|
388
|
+
if (payload.event_type) event.event_type = payload.event_type;
|
|
389
|
+
if (payload.referral) event.referral = payload.referral;
|
|
390
|
+
const body = JSON.stringify(event);
|
|
169
391
|
|
|
170
392
|
// Use fetch with keepalive=true so it survives the response cycle.
|
|
171
393
|
return fetch(`${apiBase}/ingest/events`, {
|
|
@@ -176,19 +398,140 @@ export function createOokyHandler(options) {
|
|
|
176
398
|
},
|
|
177
399
|
body,
|
|
178
400
|
keepalive: true,
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
401
|
+
signal: timeoutSignal(fetchTimeoutMs),
|
|
402
|
+
})
|
|
403
|
+
.then((res) => {
|
|
404
|
+
// A non-2xx here is the one signal a customer has that their key was
|
|
405
|
+
// rotated/revoked (401) or the payload drifted (400) — surface it.
|
|
406
|
+
if (res && !res.ok) {
|
|
407
|
+
reportError(new Error(`ingest responded ${res.status}`), {
|
|
408
|
+
op: "recordEvent",
|
|
409
|
+
status: res.status,
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
return res;
|
|
413
|
+
})
|
|
414
|
+
.catch((err) => {
|
|
415
|
+
// Fire-and-forget — never throw on the request path.
|
|
416
|
+
reportError(err, { op: "recordEvent" });
|
|
417
|
+
});
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Detect an AI-platform referral (human arriving from ChatGPT, Perplexity,
|
|
422
|
+
* Claude, …) from a Referer header and utm_source value. Pure check — the
|
|
423
|
+
* adapters call this for non-bot traffic and fire an ai_referral event.
|
|
424
|
+
*/
|
|
425
|
+
function detectReferral(referer, utmSource) {
|
|
426
|
+
return detectAIReferral(referer, utmSource);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// Tool surface for the MCP server. The SDK tier exposes get_brand_info
|
|
430
|
+
// only (product tools need feed data the SDK doesn't have) — keep in sync
|
|
431
|
+
// with the descriptor served by backend public_manifest.js.
|
|
432
|
+
const MCP_TOOLS = [
|
|
433
|
+
{
|
|
434
|
+
name: "get_brand_info",
|
|
435
|
+
description:
|
|
436
|
+
"Get brand information including company overview, products, contact, and policies.",
|
|
437
|
+
inputSchema: {
|
|
438
|
+
type: "object",
|
|
439
|
+
properties: {
|
|
440
|
+
section: {
|
|
441
|
+
type: "string",
|
|
442
|
+
description:
|
|
443
|
+
'Optional section filter: "about", "products", "contact", "policies", or omit for all.',
|
|
444
|
+
},
|
|
445
|
+
},
|
|
446
|
+
},
|
|
447
|
+
},
|
|
448
|
+
];
|
|
449
|
+
|
|
450
|
+
async function getBrandInfo(args) {
|
|
451
|
+
const manifest = await serveManifest("manifest");
|
|
452
|
+
if (manifest.status !== 200 || typeof manifest.body !== "object") {
|
|
453
|
+
throw new McpToolError("Brand information not available");
|
|
454
|
+
}
|
|
455
|
+
return filterBrandSection(manifest.body, args?.section);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
/**
|
|
459
|
+
* Handle an MCP request (POST /.well-known/mcp or /mcp).
|
|
460
|
+
*
|
|
461
|
+
* Speaks two protocols:
|
|
462
|
+
* - Standard MCP — JSON-RPC 2.0 over streamable HTTP (initialize,
|
|
463
|
+
* tools/list, tools/call). This is what real MCP clients (Claude, MCP
|
|
464
|
+
* Inspector) use. Detected by `jsonrpc: "2.0"` on the body.
|
|
465
|
+
* - Legacy Ooky protocol — { tool, arguments } → { result }, kept for
|
|
466
|
+
* Worker-tier compatibility.
|
|
467
|
+
*
|
|
468
|
+
* Returns the same { status, headers, body } shape as serveManifest
|
|
469
|
+
* (body null → respond with an empty body).
|
|
470
|
+
*/
|
|
471
|
+
async function handleMcpInvocation(body) {
|
|
472
|
+
const headers = {
|
|
473
|
+
"Content-Type": "application/json; charset=utf-8",
|
|
474
|
+
"Access-Control-Allow-Origin": "*",
|
|
475
|
+
"X-Ooky-Sdk": `${SDK_RUNTIME}/${SDK_VERSION}`,
|
|
476
|
+
};
|
|
477
|
+
|
|
478
|
+
// Standard JSON-RPC 2.0 path (also handles null = unparseable body).
|
|
479
|
+
if (body === null || body === undefined || body.jsonrpc === "2.0" || Array.isArray(body)) {
|
|
480
|
+
const { status, body: rpcBody } = await handleMcpJsonRpc(body ?? null, {
|
|
481
|
+
name: `ooky-${domain.replace(/[^a-z0-9-]/gi, "-")}`,
|
|
482
|
+
version: SDK_VERSION,
|
|
483
|
+
tools: MCP_TOOLS,
|
|
484
|
+
callTool: (name, args) => {
|
|
485
|
+
if (name === "get_brand_info") return getBrandInfo(args);
|
|
486
|
+
throw new McpToolError(`Unknown tool: ${name}`);
|
|
487
|
+
},
|
|
488
|
+
});
|
|
489
|
+
return { status, headers, body: rpcBody };
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Legacy { tool, arguments } path.
|
|
493
|
+
if (typeof body !== "object") {
|
|
494
|
+
return { status: 400, headers, body: { error: "JSON body required" } };
|
|
495
|
+
}
|
|
496
|
+
if (!body.tool) {
|
|
497
|
+
return { status: 400, headers, body: { error: 'Missing "tool" field' } };
|
|
498
|
+
}
|
|
499
|
+
if (body.tool !== "get_brand_info") {
|
|
500
|
+
return { status: 404, headers, body: { error: `Unknown tool: ${body.tool}` } };
|
|
501
|
+
}
|
|
502
|
+
try {
|
|
503
|
+
const result = await getBrandInfo(body.arguments);
|
|
504
|
+
return { status: 200, headers, body: { result } };
|
|
505
|
+
} catch (err) {
|
|
506
|
+
return {
|
|
507
|
+
status: 502,
|
|
508
|
+
headers,
|
|
509
|
+
body: { error: err instanceof McpToolError ? err.message : "Tool invocation failed" },
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* The in-flight bot-registry refresh, if any. Edge adapters must register
|
|
516
|
+
* this with `event.waitUntil()` — edge runtimes can cancel outstanding
|
|
517
|
+
* fetches once the response returns, which would otherwise abort the
|
|
518
|
+
* refresh kicked off by detectBot on every request.
|
|
519
|
+
*/
|
|
520
|
+
function pendingBotRefresh() {
|
|
521
|
+
return botRefreshInFlight;
|
|
182
522
|
}
|
|
183
523
|
|
|
184
524
|
return {
|
|
185
525
|
matchPath,
|
|
186
526
|
detectBot,
|
|
527
|
+
detectReferral,
|
|
187
528
|
serveManifest,
|
|
529
|
+
handleMcpInvocation,
|
|
188
530
|
recordEvent,
|
|
189
531
|
refreshBotRegistry,
|
|
532
|
+
pendingBotRefresh,
|
|
190
533
|
// exposed for tests / introspection
|
|
191
|
-
_options: { apiBase, cdnBase, domain },
|
|
534
|
+
_options: { apiBase, cdnBase, domain, fetchTimeoutMs, manifestCacheTtlMs },
|
|
192
535
|
};
|
|
193
536
|
}
|
|
194
537
|
|
|
@@ -215,6 +558,83 @@ function assertHttpUrl(value, optionName) {
|
|
|
215
558
|
return value;
|
|
216
559
|
}
|
|
217
560
|
|
|
561
|
+
function positiveNumber(value, fallback) {
|
|
562
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Best-effort runtime tag for the `X-Ooky-Sdk` header. We must not import
|
|
567
|
+
* `node:*` or touch `process` unconditionally (this file runs on Vercel Edge
|
|
568
|
+
* verbatim), so everything is feature-detected off globalThis.
|
|
569
|
+
*
|
|
570
|
+
* - "edge" — a Web-runtime marker is present (EdgeRuntime global, or
|
|
571
|
+
* navigator.userAgent reports Cloudflare-Workers/Vercel-Edge).
|
|
572
|
+
* - "node" — process.versions.node exists and no edge marker.
|
|
573
|
+
* - "web" — anything else with a Web Fetch surface but no Node.
|
|
574
|
+
*/
|
|
575
|
+
function detectRuntime() {
|
|
576
|
+
try {
|
|
577
|
+
const g = globalThis;
|
|
578
|
+
// Vercel Edge sets a global `EdgeRuntime` ("edge-runtime").
|
|
579
|
+
if (typeof g.EdgeRuntime !== "undefined") return "edge";
|
|
580
|
+
const navUA =
|
|
581
|
+
g.navigator && typeof g.navigator.userAgent === "string" ? g.navigator.userAgent : "";
|
|
582
|
+
if (/Cloudflare-Workers|Vercel-Edge|Deno/i.test(navUA)) return "edge";
|
|
583
|
+
if (g.process && g.process.versions && typeof g.process.versions.node === "string") {
|
|
584
|
+
return "node";
|
|
585
|
+
}
|
|
586
|
+
// Has a Web Fetch surface but isn't Node — generic web/edge worker.
|
|
587
|
+
if (typeof g.fetch === "function") return "web";
|
|
588
|
+
} catch {
|
|
589
|
+
// Fall through to the safe default.
|
|
590
|
+
}
|
|
591
|
+
return "node";
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
/**
|
|
595
|
+
* Section filter for get_brand_info — mirrors the Worker's getBrandInfo
|
|
596
|
+
* switch (worker/src/mcp.js). The public manifest JSON doesn't always carry
|
|
597
|
+
* the Worker's R2 key layout, so when a section's keys are absent we fall
|
|
598
|
+
* back to the full manifest rather than returning an empty object.
|
|
599
|
+
*/
|
|
600
|
+
function filterBrandSection(manifest, section) {
|
|
601
|
+
if (!section) return manifest;
|
|
602
|
+
let picked;
|
|
603
|
+
switch (section) {
|
|
604
|
+
case "about":
|
|
605
|
+
picked = { brand: manifest.brand, audience: manifest.audience };
|
|
606
|
+
break;
|
|
607
|
+
case "products":
|
|
608
|
+
picked = { positioning: manifest.positioning, brand: manifest.brand && { name: manifest.brand.name } };
|
|
609
|
+
break;
|
|
610
|
+
case "contact":
|
|
611
|
+
picked = {
|
|
612
|
+
support: manifest.support,
|
|
613
|
+
brand: manifest.brand && { name: manifest.brand.name, website: manifest.brand.website },
|
|
614
|
+
};
|
|
615
|
+
break;
|
|
616
|
+
case "policies":
|
|
617
|
+
picked = { aiGuidelines: manifest.aiGuidelines };
|
|
618
|
+
break;
|
|
619
|
+
default:
|
|
620
|
+
return manifest;
|
|
621
|
+
}
|
|
622
|
+
const hasContent = Object.values(picked).some((v) => v != null);
|
|
623
|
+
return hasContent ? picked : manifest;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* AbortSignal with a deadline, when the runtime supports it (Node ≥18,
|
|
628
|
+
* Cloudflare Workers, Vercel Edge all do). Returns undefined otherwise so
|
|
629
|
+
* fetch falls back to no timeout rather than crashing.
|
|
630
|
+
*/
|
|
631
|
+
function timeoutSignal(ms) {
|
|
632
|
+
if (typeof AbortSignal !== "undefined" && typeof AbortSignal.timeout === "function") {
|
|
633
|
+
return AbortSignal.timeout(ms);
|
|
634
|
+
}
|
|
635
|
+
return undefined;
|
|
636
|
+
}
|
|
637
|
+
|
|
218
638
|
/**
|
|
219
639
|
* Tiny stand-in for crypto.randomUUID() that works in every JS runtime.
|
|
220
640
|
* Returns a 16-char base36 string — collision risk is negligible for events
|
package/src/edge.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { ookyMiddleware as ookyEdge } from "./next";
|
package/src/express.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { OokyHandlerOptions } from "./index";
|
|
2
|
+
|
|
3
|
+
/** Express-compatible middleware: `app.use(ookyMiddleware({ apiKey, domain }))`. */
|
|
4
|
+
export function ookyMiddleware(
|
|
5
|
+
options: OokyHandlerOptions
|
|
6
|
+
): (req: any, res: any, next: (err?: unknown) => void) => Promise<void>;
|