@ooky/sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +189 -0
- package/package.json +64 -0
- package/src/bots.js +57 -0
- package/src/core.js +230 -0
- package/src/edge.js +14 -0
- package/src/express.js +47 -0
- package/src/next.js +43 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ooky
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# @ooky/sdk
|
|
2
|
+
|
|
3
|
+
Drop-in middleware for serving AI brand intelligence and capturing AI-bot analytics from your Node, Next.js, or Vercel Edge app — without DNS changes or Cloudflare workers.
|
|
4
|
+
|
|
5
|
+
When an AI bot (GPTBot, ClaudeBot, Perplexity, etc.) hits any page on your site, the SDK fires a non-blocking event to Ooky. When a request asks for one of the well-known AI paths (`/llms.txt`, `/.well-known/ai-manifest.json`, `/agents.md`, …), the SDK serves the latest manifest you've published from the Ooky dashboard.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @ooky/sdk
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
You'll need a domain registered in [the Ooky dashboard](https://app.ooky.ai) and an API key from **Integrations → SDK → Generate API Key**. The key looks like `ooky_sk_<random>` and is shown once — store it as an environment variable.
|
|
14
|
+
|
|
15
|
+
> **Reveal-once key**: Ooky shows the key only at the moment of generation. After your SDK fires its first successful event, the plaintext is purged server-side (only a SHA-256 hash remains, used for ongoing authentication). If you lose the key, rotate it — there's no "show key again" path. This matches the standard set by Stripe, Linear, and GitHub.
|
|
16
|
+
|
|
17
|
+
## Quickstart
|
|
18
|
+
|
|
19
|
+
### Express
|
|
20
|
+
|
|
21
|
+
```js
|
|
22
|
+
import express from "express";
|
|
23
|
+
import { ookyMiddleware } from "@ooky/sdk/express";
|
|
24
|
+
|
|
25
|
+
const app = express();
|
|
26
|
+
app.use(ookyMiddleware({
|
|
27
|
+
apiKey: process.env.OOKY_API_KEY,
|
|
28
|
+
domain: "acme.com",
|
|
29
|
+
}));
|
|
30
|
+
// your routes...
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Next.js
|
|
34
|
+
|
|
35
|
+
```ts
|
|
36
|
+
// middleware.ts
|
|
37
|
+
import { ookyMiddleware } from "@ooky/sdk/next";
|
|
38
|
+
|
|
39
|
+
export default ookyMiddleware({
|
|
40
|
+
apiKey: process.env.OOKY_API_KEY,
|
|
41
|
+
domain: "acme.com",
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
export const config = {
|
|
45
|
+
matcher: ["/llms.txt", "/llms-full.txt", "/agents.md", "/.well-known/:path*", "/(.*)"],
|
|
46
|
+
};
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Works in both the Node and Edge runtimes — no code change.
|
|
50
|
+
|
|
51
|
+
### Vercel Edge / Web Fetch
|
|
52
|
+
|
|
53
|
+
```ts
|
|
54
|
+
// middleware.ts (Vercel Edge)
|
|
55
|
+
import { ookyEdge } from "@ooky/sdk/edge";
|
|
56
|
+
|
|
57
|
+
export default ookyEdge({
|
|
58
|
+
apiKey: process.env.OOKY_API_KEY,
|
|
59
|
+
domain: "acme.com",
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
export const config = { matcher: "/(.*)" };
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## What gets intercepted
|
|
66
|
+
|
|
67
|
+
The SDK responds to these paths with the latest published manifest:
|
|
68
|
+
|
|
69
|
+
| Path | What gets served |
|
|
70
|
+
|---|---|
|
|
71
|
+
| `/llms.txt` | Markdown summary for LLM crawlers |
|
|
72
|
+
| `/llms-full.txt` | Extended markdown with all page sections |
|
|
73
|
+
| `/.well-known/ai-manifest.json` | Full JSON brand manifest (global + per-page) |
|
|
74
|
+
| `/ai-manifest.json` | Same as above (alternate path) |
|
|
75
|
+
| `/agents.md` | Markdown agent guide |
|
|
76
|
+
| `/.well-known/mcp` | MCP server descriptor |
|
|
77
|
+
|
|
78
|
+
Every other request passes through to your app unchanged.
|
|
79
|
+
|
|
80
|
+
## What gets logged
|
|
81
|
+
|
|
82
|
+
For **every** request (manifest or not), the SDK checks the `User-Agent` against the bot registry. When a known AI bot is detected, it fires a fire-and-forget POST to `/api/ingest/events` with:
|
|
83
|
+
|
|
84
|
+
```json
|
|
85
|
+
{
|
|
86
|
+
"event_id": "<uuid>",
|
|
87
|
+
"timestamp": "<ISO 8601>",
|
|
88
|
+
"bot": { "name": "GPTBot", "verified": true, "ua_string": "<full UA>" },
|
|
89
|
+
"request": { "page_path": "/pricing", "method": "GET" }
|
|
90
|
+
}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
The event scope (which domain it belongs to) is determined server-side from your API key — you cannot accidentally log events for a different customer's domain.
|
|
94
|
+
|
|
95
|
+
Human traffic produces no events.
|
|
96
|
+
|
|
97
|
+
## Configuration options
|
|
98
|
+
|
|
99
|
+
```ts
|
|
100
|
+
ookyMiddleware({
|
|
101
|
+
// Required
|
|
102
|
+
apiKey: "ooky_sk_...",
|
|
103
|
+
domain: "acme.com",
|
|
104
|
+
|
|
105
|
+
// Optional — defaults are right for production
|
|
106
|
+
apiBase: "https://api.ooky.ai/api", // Ooky API root
|
|
107
|
+
cdnBase: "https://api.ooky.ai/api/public/manifest", // Manifest source (default = apiBase + "/public/manifest")
|
|
108
|
+
bots: undefined, // Override the bot registry; default ships with major AI bots
|
|
109
|
+
autoRefreshBots: true, // Periodically refresh bot UA list from /api/public/bots
|
|
110
|
+
});
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
| Option | Type | Default | Notes |
|
|
114
|
+
|---|---|---|---|
|
|
115
|
+
| `apiKey` | `string` | — | Required. Per-domain Bearer token from the dashboard. |
|
|
116
|
+
| `domain` | `string` | — | Required. Must match the verified domain in Ooky. |
|
|
117
|
+
| `apiBase` | `string` | `https://api.ooky.ai/api` | Override for self-hosted Ooky or staging. |
|
|
118
|
+
| `cdnBase` | `string` | `${apiBase}/public/manifest` | Manifest source. By default the SDK fetches from Ooky's public manifest endpoint. Override to put your own CDN (Cloudflare, CloudFront, Fastly) in front. |
|
|
119
|
+
| `bots` | `Array<{name, pattern, category}>` | Built-in default list | Ships with the major AI bots. Override only if you have custom UA patterns. |
|
|
120
|
+
| `autoRefreshBots` | `boolean` | `true` | Refresh from `/api/public/bots` once an hour. Disable for fully offline use. |
|
|
121
|
+
|
|
122
|
+
## Performance
|
|
123
|
+
|
|
124
|
+
- The manifest fetch is HTTP-cached (`Cache-Control: public, max-age=300, s-maxage=600`) — your CDN/edge will serve repeat requests without hitting Ooky.
|
|
125
|
+
- Event firing uses `fetch(..., { keepalive: true })` so it survives the response cycle without delaying it.
|
|
126
|
+
- Bot detection is a substring check against an in-memory list — sub-millisecond per request.
|
|
127
|
+
|
|
128
|
+
## Troubleshooting
|
|
129
|
+
|
|
130
|
+
**"I installed it but no events show up"**
|
|
131
|
+
1. Confirm your domain is verified and the integration method is set to `sdk` (or `wordpress`) in the dashboard.
|
|
132
|
+
2. Check that `process.env.OOKY_API_KEY` is actually set in your runtime — log it once at boot.
|
|
133
|
+
3. Hit your site with a bot UA: `curl -H "User-Agent: GPTBot/1.0" https://your-site.com/` and watch the dashboard's AI Sessions page.
|
|
134
|
+
4. If your app is behind a CDN that strips `User-Agent`, the SDK can't see the bot. Check your CDN config.
|
|
135
|
+
|
|
136
|
+
**"`/llms.txt` returns 404"**
|
|
137
|
+
- The middleware only intercepts paths the SDK knows about. Make sure your framework's matcher passes those paths to the middleware before falling through to your routes.
|
|
138
|
+
- If you've published the manifest in the dashboard, also check Ooky's edge CDN is reachable from your server: `curl https://edge.ooky.ai/<your-domain>/llms`.
|
|
139
|
+
|
|
140
|
+
**"Events fail with 401 Unauthorized"**
|
|
141
|
+
- The API key has been revoked or rotated. Generate a new one from the dashboard and update the env var.
|
|
142
|
+
|
|
143
|
+
**"Manifest is stale"**
|
|
144
|
+
- The HTTP cache is honoring 5 min freshness. Ooky purges the CDN on publish, but your CDN may also cache. Force a fresh fetch by clearing your edge cache for the well-known paths.
|
|
145
|
+
|
|
146
|
+
## Security & key lifecycle
|
|
147
|
+
|
|
148
|
+
The SDK runs **server-side only** — Node middleware or the Edge runtime. The
|
|
149
|
+
API key never reaches the browser. Key handling follows the same lifecycle as
|
|
150
|
+
Stripe / Linear / GitHub:
|
|
151
|
+
|
|
152
|
+
| Stage | Where the key lives |
|
|
153
|
+
|---|---|
|
|
154
|
+
| Generated | Returned once in the dashboard. Plaintext + SHA-256 hash stored server-side. |
|
|
155
|
+
| Customer copies it | Customer pastes into their env (`OOKY_API_KEY`). |
|
|
156
|
+
| First event lands | Server drops the plaintext from the DB. Only the hash remains. |
|
|
157
|
+
| Ongoing auth | Server hashes the incoming Bearer token and compares against the stored hash in constant time. |
|
|
158
|
+
| Lost / leaked | Rotate via **Integrations → SDK → Rotate API Key**. Old key 401s immediately. |
|
|
159
|
+
|
|
160
|
+
The package is published with [npm provenance attestations](https://docs.npmjs.com/generating-provenance-statements) signed by GitHub Actions OIDC. You can verify the published tarball came from this repo's `release-sdk.yml` workflow:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
npm view @ooky/sdk dist.attestations
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
If you're using a private registry mirror, mirror the provenance bundle too — most package proxies (Artifactory, Verdaccio, GitHub Packages) preserve it.
|
|
167
|
+
|
|
168
|
+
### Recommended rotation cadence
|
|
169
|
+
|
|
170
|
+
- **Every 90 days** for production deployments.
|
|
171
|
+
- **Immediately** if you suspect leak (committed to a public repo, server compromise, contractor offboarding).
|
|
172
|
+
- Set a calendar reminder; Ooky will surface an in-dashboard warning at 90 days from generation.
|
|
173
|
+
|
|
174
|
+
### Where the key must never appear
|
|
175
|
+
|
|
176
|
+
- ✅ Server env vars (`process.env.OOKY_API_KEY`) — yes.
|
|
177
|
+
- ❌ `next.config.js`, `vite.config.js`, or any file shipped to the client — no.
|
|
178
|
+
- ❌ Git commits — never. If you push it by accident, rotate the key first, *then* clean git history.
|
|
179
|
+
- ❌ Logging — the SDK does not log the key. Don't log `process.env` blindly either.
|
|
180
|
+
|
|
181
|
+
## What this SDK does NOT do
|
|
182
|
+
|
|
183
|
+
- It does not crawl your site or generate the manifest — that happens in the dashboard.
|
|
184
|
+
- It does not modify HTML responses or rewrite content for bots. (For that, use the Worker or Full DNS integration.)
|
|
185
|
+
- It does not block bots. Your `robots.txt` and any rate limiting still apply.
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
MIT.
|
package/package.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ooky/sdk",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Ooky SDK — middleware for serving AI brand intelligence and capturing AI-bot analytics from your Node, Next.js, or Vercel Edge app.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"ai",
|
|
7
|
+
"llm",
|
|
8
|
+
"llms.txt",
|
|
9
|
+
"ai-bot",
|
|
10
|
+
"brand-visibility",
|
|
11
|
+
"express",
|
|
12
|
+
"nextjs",
|
|
13
|
+
"vercel-edge",
|
|
14
|
+
"middleware",
|
|
15
|
+
"ooky"
|
|
16
|
+
],
|
|
17
|
+
"homepage": "https://ooky.ai",
|
|
18
|
+
"bugs": {
|
|
19
|
+
"url": "https://github.com/ooky-ai/ooky/issues",
|
|
20
|
+
"email": "support@ooky.ai"
|
|
21
|
+
},
|
|
22
|
+
"repository": {
|
|
23
|
+
"type": "git",
|
|
24
|
+
"url": "git+https://github.com/ooky-ai/ooky.git",
|
|
25
|
+
"directory": "packages/sdk"
|
|
26
|
+
},
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"author": {
|
|
29
|
+
"name": "Ooky",
|
|
30
|
+
"url": "https://ooky.ai",
|
|
31
|
+
"email": "hello@ooky.ai"
|
|
32
|
+
},
|
|
33
|
+
"type": "module",
|
|
34
|
+
"main": "./src/core.js",
|
|
35
|
+
"exports": {
|
|
36
|
+
".": "./src/core.js",
|
|
37
|
+
"./core": "./src/core.js",
|
|
38
|
+
"./express": "./src/express.js",
|
|
39
|
+
"./next": "./src/next.js",
|
|
40
|
+
"./edge": "./src/edge.js"
|
|
41
|
+
},
|
|
42
|
+
"files": [
|
|
43
|
+
"src/",
|
|
44
|
+
"README.md",
|
|
45
|
+
"LICENSE"
|
|
46
|
+
],
|
|
47
|
+
"scripts": {
|
|
48
|
+
"test": "vitest run",
|
|
49
|
+
"test:watch": "vitest",
|
|
50
|
+
"prepublishOnly": "vitest run",
|
|
51
|
+
"prepack": "node -e \"const f=require('./package.json').files; if(!f.includes('src/')||!f.includes('README.md')||!f.includes('LICENSE')){console.error('files whitelist drift — refusing to pack');process.exit(1)}\""
|
|
52
|
+
},
|
|
53
|
+
"publishConfig": {
|
|
54
|
+
"access": "public"
|
|
55
|
+
},
|
|
56
|
+
"engines": {
|
|
57
|
+
"node": ">=18"
|
|
58
|
+
},
|
|
59
|
+
"devDependencies": {
|
|
60
|
+
"express": "^4.18.2",
|
|
61
|
+
"supertest": "^6.3.4",
|
|
62
|
+
"vitest": "^3.2.4"
|
|
63
|
+
}
|
|
64
|
+
}
|
package/src/bots.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Built-in bot UA patterns. Mirrors worker/src/bots.js DEFAULT_BOT_REGISTRY
|
|
3
|
+
* so the SDK and Worker behave the same when the public registry endpoint
|
|
4
|
+
* (/api/public/bots) is unreachable. When you add a bot here, also add it
|
|
5
|
+
* to worker/src/bots.js and the SQL seed in
|
|
6
|
+
* backend/src/db/migrations/007_bot_registry.sql.
|
|
7
|
+
*
|
|
8
|
+
* The pattern is matched as a case-insensitive substring of the User-Agent.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export const DEFAULT_BOTS = [
|
|
12
|
+
{ name: "GPTBot", pattern: "GPTBot", category: "ai" },
|
|
13
|
+
{ name: "ChatGPT-User", pattern: "ChatGPT-User", category: "ai" },
|
|
14
|
+
{ name: "OAI-SearchBot", pattern: "OAI-SearchBot", category: "ai" },
|
|
15
|
+
{ name: "Anthropic", pattern: "anthropic-ai", category: "ai" },
|
|
16
|
+
{ name: "ClaudeBot", pattern: "ClaudeBot", category: "ai" },
|
|
17
|
+
{ name: "Claude-Web", pattern: "Claude-Web", category: "ai" },
|
|
18
|
+
{ name: "Google-Extended", pattern: "Google-Extended", category: "ai" },
|
|
19
|
+
{ name: "GoogleOther", pattern: "GoogleOther", category: "ai" },
|
|
20
|
+
{ name: "Googlebot-Extended", pattern: "Googlebot-Extended", category: "ai" },
|
|
21
|
+
{ name: "Googlebot", pattern: "Googlebot", category: "search" },
|
|
22
|
+
{ name: "Applebot-Extended", pattern: "Applebot-Extended", category: "ai" },
|
|
23
|
+
{ name: "Applebot", pattern: "Applebot", category: "search" },
|
|
24
|
+
{ name: "Meta-ExternalAgent", pattern: "meta-externalagent", category: "ai" },
|
|
25
|
+
{ name: "Meta-ExternalFetcher", pattern: "Meta-ExternalFetcher", category: "ai" },
|
|
26
|
+
{ name: "FacebookBot", pattern: "FacebookBot", category: "social" },
|
|
27
|
+
{ name: "facebookexternalhit", pattern: "facebookexternalhit", category: "social" },
|
|
28
|
+
{ name: "Bingbot", pattern: "bingbot", category: "search" },
|
|
29
|
+
{ name: "Perplexity", pattern: "PerplexityBot", category: "ai" },
|
|
30
|
+
{ name: "YouBot", pattern: "YouBot", category: "ai" },
|
|
31
|
+
{ name: "CCBot", pattern: "CCBot", category: "ai" },
|
|
32
|
+
{ name: "Cohere", pattern: "cohere-ai", category: "ai" },
|
|
33
|
+
{ name: "Diffbot", pattern: "Diffbot", category: "ai" },
|
|
34
|
+
{ name: "Bytespider", pattern: "Bytespider", category: "ai" },
|
|
35
|
+
{ name: "Amazonbot", pattern: "Amazonbot", category: "search" },
|
|
36
|
+
{ name: "AI2Bot", pattern: "AI2Bot", category: "ai" },
|
|
37
|
+
{ name: "ImagesiftBot", pattern: "ImagesiftBot", category: "ai" },
|
|
38
|
+
{ name: "Timesbot", pattern: "Timesbot", category: "ai" },
|
|
39
|
+
{ name: "iaskspider", pattern: "iaskspider", category: "ai" },
|
|
40
|
+
{ name: "Twitterbot", pattern: "Twitterbot", category: "social" },
|
|
41
|
+
{ name: "Slurp", pattern: "Slurp", category: "search" },
|
|
42
|
+
{ name: "DuckDuckBot", pattern: "DuckDuckBot", category: "search" },
|
|
43
|
+
{ name: "ia_archiver", pattern: "ia_archiver", category: "other" },
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Returns the matched bot { name, pattern, category } or null.
|
|
48
|
+
* Case-insensitive substring match (the same logic the Worker uses).
|
|
49
|
+
*/
|
|
50
|
+
export function detectBot(userAgent, registry = DEFAULT_BOTS) {
|
|
51
|
+
if (!userAgent || typeof userAgent !== "string") return null;
|
|
52
|
+
const ua = userAgent.toLowerCase();
|
|
53
|
+
for (const b of registry) {
|
|
54
|
+
if (ua.includes(b.pattern.toLowerCase())) return b;
|
|
55
|
+
}
|
|
56
|
+
return null;
|
|
57
|
+
}
|
package/src/core.js
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ooky SDK core — runtime-agnostic logic shared across Express, Next, Edge.
|
|
3
|
+
*
|
|
4
|
+
* The handler exposes:
|
|
5
|
+
* - matchPath(path) → null | "llms" | "llms-full" | "manifest" | "agents" | "mcp"
|
|
6
|
+
* - detectBot(ua) → null | { name, pattern, category }
|
|
7
|
+
* - serveManifest(kind, ctx?) → { status, body, headers }
|
|
8
|
+
* - recordEvent(payload) → fire-and-forget POST to /api/ingest/events
|
|
9
|
+
* - refreshBotRegistry() → optional manual refresh from /api/public/bots
|
|
10
|
+
*
|
|
11
|
+
* Adapters (express/next/edge) wrap this with the framework's request/response
|
|
12
|
+
* conventions but never duplicate logic.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { detectBot as detectFromList, DEFAULT_BOTS } from "./bots.js";
|
|
16
|
+
|
|
17
|
+
const DEFAULT_API_BASE = "https://api.ooky.ai/api";
|
|
18
|
+
// Manifest content is served from the public Ooky API by default. Customers
|
|
19
|
+
// can override `cdnBase` to point at their own CDN (e.g., Cloudflare or
|
|
20
|
+
// CloudFront) sitting in front of `${apiBase}/public/manifest`.
|
|
21
|
+
const DEFAULT_CDN_PATH = "/public/manifest";
|
|
22
|
+
|
|
23
|
+
// Five well-known paths the SDK answers on behalf of the customer's app.
|
|
24
|
+
const PATH_MAP = {
|
|
25
|
+
"/llms.txt": "llms",
|
|
26
|
+
"/llms-full.txt": "llms-full",
|
|
27
|
+
"/.well-known/ai-manifest.json": "manifest",
|
|
28
|
+
"/ai-manifest.json": "manifest",
|
|
29
|
+
"/agents.md": "agents",
|
|
30
|
+
"/.well-known/mcp": "mcp",
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const CONTENT_TYPE = {
|
|
34
|
+
llms: "text/plain; charset=utf-8",
|
|
35
|
+
"llms-full": "text/plain; charset=utf-8",
|
|
36
|
+
manifest: "application/json; charset=utf-8",
|
|
37
|
+
agents: "text/markdown; charset=utf-8",
|
|
38
|
+
mcp: "application/json; charset=utf-8",
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Create an Ooky handler instance.
|
|
43
|
+
*
|
|
44
|
+
* @param {object} options
|
|
45
|
+
* @param {string} options.apiKey Bearer token from /integration/sdk-key (ooky_sk_*)
|
|
46
|
+
* @param {string} options.domain Domain registered in Ooky (e.g. "acme.com")
|
|
47
|
+
* @param {string} [options.apiBase] Override Ooky API base URL.
|
|
48
|
+
* @param {string} [options.cdnBase] Override manifest CDN base URL.
|
|
49
|
+
* @param {Array} [options.bots] Override the bot registry; default uses DEFAULT_BOTS.
|
|
50
|
+
* @param {boolean}[options.autoRefreshBots=true] Periodically refresh from /api/public/bots.
|
|
51
|
+
*/
|
|
52
|
+
export function createOokyHandler(options) {
|
|
53
|
+
if (!options || typeof options !== "object") {
|
|
54
|
+
throw new TypeError("createOokyHandler: options object is required");
|
|
55
|
+
}
|
|
56
|
+
const { apiKey, domain } = options;
|
|
57
|
+
if (!apiKey || typeof apiKey !== "string") {
|
|
58
|
+
throw new TypeError("createOokyHandler: options.apiKey is required");
|
|
59
|
+
}
|
|
60
|
+
if (!domain || typeof domain !== "string") {
|
|
61
|
+
throw new TypeError("createOokyHandler: options.domain is required");
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const apiBase = assertHttpUrl(options.apiBase || DEFAULT_API_BASE, "apiBase").replace(/\/+$/, "");
|
|
65
|
+
const cdnBase = assertHttpUrl(
|
|
66
|
+
options.cdnBase || `${apiBase}${DEFAULT_CDN_PATH}`,
|
|
67
|
+
"cdnBase"
|
|
68
|
+
).replace(/\/+$/, "");
|
|
69
|
+
const autoRefreshBots = options.autoRefreshBots !== false;
|
|
70
|
+
|
|
71
|
+
let botRegistry = options.bots || DEFAULT_BOTS;
|
|
72
|
+
let lastBotRefresh = 0;
|
|
73
|
+
const BOT_REFRESH_MS = 60 * 60 * 1000; // 1 hour
|
|
74
|
+
|
|
75
|
+
async function refreshBotRegistry(force = false) {
|
|
76
|
+
if (!autoRefreshBots && !force) return botRegistry;
|
|
77
|
+
if (!force && Date.now() - lastBotRefresh < BOT_REFRESH_MS) return botRegistry;
|
|
78
|
+
try {
|
|
79
|
+
const res = await fetch(`${apiBase}/public/bots`, {
|
|
80
|
+
headers: { Accept: "application/json" },
|
|
81
|
+
});
|
|
82
|
+
if (res.ok) {
|
|
83
|
+
const data = await res.json();
|
|
84
|
+
if (Array.isArray(data?.bots) && data.bots.length > 0) {
|
|
85
|
+
botRegistry = data.bots;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
lastBotRefresh = Date.now();
|
|
89
|
+
} catch {
|
|
90
|
+
// Network failure — keep stale list.
|
|
91
|
+
}
|
|
92
|
+
return botRegistry;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function matchPath(path) {
|
|
96
|
+
if (!path || typeof path !== "string") return null;
|
|
97
|
+
// Normalize trailing slash and query.
|
|
98
|
+
const clean = path.split("?")[0].replace(/\/+$/, "") || "/";
|
|
99
|
+
return PATH_MAP[clean] || PATH_MAP[path.split("?")[0]] || null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function detectBot(userAgent) {
|
|
103
|
+
return detectFromList(userAgent, botRegistry);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
async function fetchFromCdn(kind) {
|
|
107
|
+
// Edge CDN URL convention. The actual route map is owned by the backend
|
|
108
|
+
// and exposed via the per-domain /api/public/manifest/:domain endpoint.
|
|
109
|
+
const url = `${cdnBase}/${encodeURIComponent(domain)}/${kind}`;
|
|
110
|
+
const res = await fetch(url, { headers: { Accept: CONTENT_TYPE[kind] || "*/*" } });
|
|
111
|
+
return res;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Build the response for one of the well-known paths.
|
|
116
|
+
* Returns { status, headers, body } where body is a string (text formats)
|
|
117
|
+
* or a JS object (JSON formats). Adapters serialize as needed.
|
|
118
|
+
*/
|
|
119
|
+
async function serveManifest(kind) {
|
|
120
|
+
if (!CONTENT_TYPE[kind]) {
|
|
121
|
+
return { status: 404, headers: {}, body: "Unknown manifest kind" };
|
|
122
|
+
}
|
|
123
|
+
try {
|
|
124
|
+
const res = await fetchFromCdn(kind);
|
|
125
|
+
if (!res.ok) {
|
|
126
|
+
return {
|
|
127
|
+
status: res.status,
|
|
128
|
+
headers: { "Content-Type": CONTENT_TYPE[kind] },
|
|
129
|
+
body: kind === "manifest" || kind === "mcp"
|
|
130
|
+
? { error: `Manifest unavailable (${res.status})` }
|
|
131
|
+
: `Manifest unavailable (${res.status})`,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
const headers = {
|
|
135
|
+
"Content-Type": CONTENT_TYPE[kind],
|
|
136
|
+
"Cache-Control": "public, max-age=300, s-maxage=600",
|
|
137
|
+
"X-Ooky-Sdk": "node",
|
|
138
|
+
};
|
|
139
|
+
if (kind === "manifest" || kind === "mcp") {
|
|
140
|
+
const body = await res.json();
|
|
141
|
+
return { status: 200, headers, body };
|
|
142
|
+
}
|
|
143
|
+
const body = await res.text();
|
|
144
|
+
return { status: 200, headers, body };
|
|
145
|
+
} catch (err) {
|
|
146
|
+
return {
|
|
147
|
+
status: 502,
|
|
148
|
+
headers: { "Content-Type": "text/plain; charset=utf-8" },
|
|
149
|
+
body: `Ooky manifest fetch failed: ${err.message}`,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Fire-and-forget event POST. Returns immediately; the caller should not
|
|
156
|
+
* `await` it on the request hot path. Errors are swallowed.
|
|
157
|
+
*/
|
|
158
|
+
function recordEvent(payload) {
|
|
159
|
+
const body = JSON.stringify({
|
|
160
|
+
event_id: payload.event_id || cryptoRandomId(),
|
|
161
|
+
timestamp: payload.timestamp || new Date().toISOString(),
|
|
162
|
+
domain, // server overrides anyway, but include for backward compat
|
|
163
|
+
bot: payload.bot || null,
|
|
164
|
+
request: payload.request || null,
|
|
165
|
+
session: payload.session || null,
|
|
166
|
+
geo: payload.geo || null,
|
|
167
|
+
serve: payload.serve || null,
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// Use fetch with keepalive=true so it survives the response cycle.
|
|
171
|
+
return fetch(`${apiBase}/ingest/events`, {
|
|
172
|
+
method: "POST",
|
|
173
|
+
headers: {
|
|
174
|
+
"Content-Type": "application/json",
|
|
175
|
+
Authorization: `Bearer ${apiKey}`,
|
|
176
|
+
},
|
|
177
|
+
body,
|
|
178
|
+
keepalive: true,
|
|
179
|
+
}).catch(() => {
|
|
180
|
+
// Fire-and-forget — never throw on the request path.
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
matchPath,
|
|
186
|
+
detectBot,
|
|
187
|
+
serveManifest,
|
|
188
|
+
recordEvent,
|
|
189
|
+
refreshBotRegistry,
|
|
190
|
+
// exposed for tests / introspection
|
|
191
|
+
_options: { apiBase, cdnBase, domain },
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Reject any apiBase / cdnBase override that isn't a real http(s) URL.
|
|
197
|
+
* Prevents file://, data:, javascript:, etc. from being injected through SDK
|
|
198
|
+
* options on a misconfigured customer app.
|
|
199
|
+
*/
|
|
200
|
+
function assertHttpUrl(value, optionName) {
|
|
201
|
+
if (typeof value !== "string" || value.length === 0) {
|
|
202
|
+
throw new TypeError(`createOokyHandler: options.${optionName} must be a non-empty string`);
|
|
203
|
+
}
|
|
204
|
+
let parsed;
|
|
205
|
+
try {
|
|
206
|
+
parsed = new URL(value);
|
|
207
|
+
} catch {
|
|
208
|
+
throw new TypeError(`createOokyHandler: options.${optionName} must be a valid URL`);
|
|
209
|
+
}
|
|
210
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
211
|
+
throw new TypeError(
|
|
212
|
+
`createOokyHandler: options.${optionName} must use http or https (got ${parsed.protocol})`
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
return value;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Tiny stand-in for crypto.randomUUID() that works in every JS runtime.
|
|
220
|
+
* Returns a 16-char base36 string — collision risk is negligible for events
|
|
221
|
+
* (deduped server-side by event_id anyway).
|
|
222
|
+
*/
|
|
223
|
+
function cryptoRandomId() {
|
|
224
|
+
if (typeof globalThis.crypto?.randomUUID === "function") {
|
|
225
|
+
return globalThis.crypto.randomUUID();
|
|
226
|
+
}
|
|
227
|
+
return (
|
|
228
|
+
Date.now().toString(36) + "-" + Math.random().toString(36).slice(2, 11)
|
|
229
|
+
);
|
|
230
|
+
}
|
package/src/edge.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vercel Edge / generic Web Fetch adapter.
|
|
3
|
+
*
|
|
4
|
+
* // middleware.ts (Vercel Edge runtime)
|
|
5
|
+
* import { ookyEdge } from "@ooky/sdk/edge";
|
|
6
|
+
* export default ookyEdge({ apiKey: "ooky_sk_...", domain: "acme.com" });
|
|
7
|
+
* export const config = { matcher: "/(.*)" };
|
|
8
|
+
*
|
|
9
|
+
* Same shape as the Next.js adapter but exported under a different name to
|
|
10
|
+
* make intent obvious in the customer's middleware file. Both are pure Web
|
|
11
|
+
* Fetch — no Node-specific APIs are touched.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
export { ookyMiddleware as ookyEdge } from "./next.js";
|
package/src/express.js
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Express adapter — `app.use(ookyMiddleware({ apiKey, domain }))`.
|
|
3
|
+
*
|
|
4
|
+
* Intercepts the well-known AI paths and serves the manifest. For every
|
|
5
|
+
* request (manifest or not), checks the User-Agent against the bot registry
|
|
6
|
+
* and fires a fire-and-forget event when a bot is detected.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { createOokyHandler } from "./core.js";
|
|
10
|
+
|
|
11
|
+
export function ookyMiddleware(options) {
|
|
12
|
+
const handler = createOokyHandler(options);
|
|
13
|
+
|
|
14
|
+
return async function ookyHandler(req, res, next) {
|
|
15
|
+
const ua = req.headers["user-agent"] || "";
|
|
16
|
+
const path = req.path || req.url || "/";
|
|
17
|
+
const bot = handler.detectBot(ua);
|
|
18
|
+
|
|
19
|
+
// Fire bot event regardless of whether we serve the manifest. The Ooky
|
|
20
|
+
// dashboard tracks bot visits across all routes, not just /llms.txt.
|
|
21
|
+
// `verified: false` — UA-only matching can't prove bot identity. The
|
|
22
|
+
// Worker tier does IP + reverse-DNS verification; the SDK can't.
|
|
23
|
+
if (bot) {
|
|
24
|
+
handler.recordEvent({
|
|
25
|
+
bot: { name: bot.name, verified: false, ua_string: ua },
|
|
26
|
+
request: {
|
|
27
|
+
page_path: path.split("?")[0] || "/",
|
|
28
|
+
method: req.method || "GET",
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const kind = handler.matchPath(path);
|
|
34
|
+
if (!kind) return next();
|
|
35
|
+
|
|
36
|
+
const { status, headers, body } = await handler.serveManifest(kind);
|
|
37
|
+
res.status(status);
|
|
38
|
+
for (const [k, v] of Object.entries(headers)) {
|
|
39
|
+
res.setHeader(k, v);
|
|
40
|
+
}
|
|
41
|
+
if (typeof body === "string") {
|
|
42
|
+
res.send(body);
|
|
43
|
+
} else {
|
|
44
|
+
res.json(body);
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
}
|
package/src/next.js
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Next.js adapter — usable in both the Node and Edge runtimes.
|
|
3
|
+
*
|
|
4
|
+
* // middleware.ts
|
|
5
|
+
* import { ookyMiddleware } from "@ooky/sdk/next";
|
|
6
|
+
* export default ookyMiddleware({ apiKey: "ooky_sk_...", domain: "acme.com" });
|
|
7
|
+
* export const config = { matcher: ["/llms.txt", "/llms-full.txt", "/.well-known/:path*", "/agents.md", "/(.*)"] };
|
|
8
|
+
*
|
|
9
|
+
* Returns a function with the Next.js middleware signature: it receives a
|
|
10
|
+
* NextRequest-like object (compatible Web Request) and returns a Response,
|
|
11
|
+
* or undefined to fall through to the next middleware/route.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { createOokyHandler } from "./core.js";
|
|
15
|
+
|
|
16
|
+
export function ookyMiddleware(options) {
|
|
17
|
+
const handler = createOokyHandler(options);
|
|
18
|
+
|
|
19
|
+
return async function ookyNextMiddleware(request) {
|
|
20
|
+
const url = new URL(request.url);
|
|
21
|
+
const ua = request.headers.get("user-agent") || "";
|
|
22
|
+
const bot = handler.detectBot(ua);
|
|
23
|
+
|
|
24
|
+
// `verified: false` — UA-only matching can't prove bot identity. The
|
|
25
|
+
// Worker tier does IP + reverse-DNS verification; the SDK can't.
|
|
26
|
+
if (bot) {
|
|
27
|
+
handler.recordEvent({
|
|
28
|
+
bot: { name: bot.name, verified: false, ua_string: ua },
|
|
29
|
+
request: {
|
|
30
|
+
page_path: url.pathname || "/",
|
|
31
|
+
method: request.method || "GET",
|
|
32
|
+
},
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const kind = handler.matchPath(url.pathname);
|
|
37
|
+
if (!kind) return undefined;
|
|
38
|
+
|
|
39
|
+
const { status, headers, body } = await handler.serveManifest(kind);
|
|
40
|
+
const responseBody = typeof body === "string" ? body : JSON.stringify(body);
|
|
41
|
+
return new Response(responseBody, { status, headers });
|
|
42
|
+
};
|
|
43
|
+
}
|