third-audience-mdx 1.0.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dashboard/routes/okf-graph-route.d.mts +6 -0
- package/dist/dashboard/routes/okf-graph-route.d.ts +6 -0
- package/dist/dashboard/routes/okf-graph-route.js +266 -0
- package/dist/dashboard/routes/okf-graph-route.js.map +1 -0
- package/dist/dashboard/routes/okf-graph-route.mjs +231 -0
- package/dist/dashboard/routes/okf-graph-route.mjs.map +1 -0
- package/dist/dashboard/routes/okf-route.js +1 -1
- package/dist/dashboard/routes/okf-route.js.map +1 -1
- package/dist/dashboard/routes/okf-route.mjs +1 -1
- package/dist/dashboard/routes/okf-route.mjs.map +1 -1
- package/dist/dashboard/ui/components/Sidebar.js +15 -0
- package/dist/dashboard/ui/components/Sidebar.js.map +1 -1
- package/dist/dashboard/ui/components/Sidebar.mjs +15 -0
- package/dist/dashboard/ui/components/Sidebar.mjs.map +1 -1
- package/dist/dashboard/ui/pages/OkfPage.d.mts +5 -0
- package/dist/dashboard/ui/pages/OkfPage.d.ts +5 -0
- package/dist/dashboard/ui/pages/OkfPage.js +438 -0
- package/dist/dashboard/ui/pages/OkfPage.js.map +1 -0
- package/dist/dashboard/ui/pages/OkfPage.mjs +414 -0
- package/dist/dashboard/ui/pages/OkfPage.mjs.map +1 -0
- package/dist/index.d.mts +4 -8
- package/dist/index.d.ts +4 -8
- package/dist/index.js +94 -231
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +94 -234
- package/dist/index.mjs.map +1 -1
- package/package.json +20 -2
package/dist/index.mjs
CHANGED
|
@@ -1,208 +1,3 @@
|
|
|
1
|
-
var __defProp = Object.defineProperty;
|
|
2
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
3
|
-
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
4
|
-
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
5
|
-
}) : x)(function(x) {
|
|
6
|
-
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
7
|
-
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
8
|
-
});
|
|
9
|
-
var __esm = (fn, res) => function __init() {
|
|
10
|
-
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
11
|
-
};
|
|
12
|
-
var __export = (target, all) => {
|
|
13
|
-
for (var name in all)
|
|
14
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
15
|
-
};
|
|
16
|
-
|
|
17
|
-
// src/detection/known-patterns.ts
|
|
18
|
-
var KNOWN_BOTS;
|
|
19
|
-
var init_known_patterns = __esm({
|
|
20
|
-
"src/detection/known-patterns.ts"() {
|
|
21
|
-
"use strict";
|
|
22
|
-
KNOWN_BOTS = [
|
|
23
|
-
// AI Crawlers
|
|
24
|
-
{ name: "ClaudeBot", category: "ai_crawler", patterns: [/claudebot/i, /claude-web/i] },
|
|
25
|
-
{ name: "GPTBot", category: "ai_crawler", patterns: [/gptbot/i] },
|
|
26
|
-
{ name: "ChatGPT-User", category: "ai_crawler", patterns: [/chatgpt-user/i] },
|
|
27
|
-
{ name: "PerplexityBot", category: "ai_crawler", patterns: [/perplexitybot/i] },
|
|
28
|
-
{ name: "Googlebot-AI", category: "ai_crawler", patterns: [/google-extended/i, /googleother/i] },
|
|
29
|
-
{ name: "FacebookBot", category: "ai_crawler", patterns: [/facebookbot/i] },
|
|
30
|
-
{ name: "Applebot-Extended", category: "ai_crawler", patterns: [/applebot-extended/i] },
|
|
31
|
-
{ name: "YouBot", category: "ai_crawler", patterns: [/youbot/i] },
|
|
32
|
-
{ name: "CCBot", category: "ai_crawler", patterns: [/ccbot/i] },
|
|
33
|
-
{ name: "CohereCrawler", category: "ai_crawler", patterns: [/cohere-ai/i] },
|
|
34
|
-
{ name: "AI2Bot", category: "ai_crawler", patterns: [/ai2bot/i] },
|
|
35
|
-
{ name: "Bytespider", category: "ai_crawler", patterns: [/bytespider/i] },
|
|
36
|
-
{ name: "Diffbot", category: "ai_crawler", patterns: [/diffbot/i] },
|
|
37
|
-
// Search Engines
|
|
38
|
-
{ name: "Googlebot", category: "search_engine", patterns: [/googlebot/i] },
|
|
39
|
-
{ name: "Bingbot", category: "search_engine", patterns: [/bingbot/i, /msnbot/i] },
|
|
40
|
-
{ name: "DuckDuckBot", category: "search_engine", patterns: [/duckduckbot/i] },
|
|
41
|
-
{ name: "Baiduspider", category: "search_engine", patterns: [/baiduspider/i] },
|
|
42
|
-
{ name: "YandexBot", category: "search_engine", patterns: [/yandexbot/i] },
|
|
43
|
-
{ name: "Sogou", category: "search_engine", patterns: [/sogou/i] },
|
|
44
|
-
{ name: "Exabot", category: "search_engine", patterns: [/exabot/i] },
|
|
45
|
-
{ name: "ia_archiver", category: "search_engine", patterns: [/ia_archiver/i] }
|
|
46
|
-
];
|
|
47
|
-
}
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
// src/detection/bot-detection-pipeline.ts
|
|
51
|
-
function detectBot(input) {
|
|
52
|
-
const ua = input.userAgent ?? "";
|
|
53
|
-
for (const bot of KNOWN_BOTS) {
|
|
54
|
-
for (const pattern of bot.patterns) {
|
|
55
|
-
if (pattern.test(ua)) {
|
|
56
|
-
return {
|
|
57
|
-
isBot: true,
|
|
58
|
-
botName: bot.name,
|
|
59
|
-
confidence: "high",
|
|
60
|
-
detectionMethod: "known_pattern",
|
|
61
|
-
category: bot.category,
|
|
62
|
-
rawUserAgent: ua
|
|
63
|
-
};
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
const heuristicResult = checkHeuristics(ua, input.headers ?? {});
|
|
68
|
-
if (heuristicResult) return { ...heuristicResult, rawUserAgent: ua };
|
|
69
|
-
if (looksLikeBotUa(ua)) {
|
|
70
|
-
return {
|
|
71
|
-
isBot: true,
|
|
72
|
-
botName: null,
|
|
73
|
-
confidence: "low",
|
|
74
|
-
detectionMethod: "auto_learned",
|
|
75
|
-
category: "unknown_bot",
|
|
76
|
-
rawUserAgent: ua
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
return {
|
|
80
|
-
isBot: false,
|
|
81
|
-
botName: null,
|
|
82
|
-
confidence: "high",
|
|
83
|
-
detectionMethod: "none",
|
|
84
|
-
category: "human",
|
|
85
|
-
rawUserAgent: ua
|
|
86
|
-
};
|
|
87
|
-
}
|
|
88
|
-
function checkHeuristics(ua, headers) {
|
|
89
|
-
if (/headlesschrome/i.test(ua)) {
|
|
90
|
-
return { isBot: true, botName: "HeadlessChrome", confidence: "medium", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
91
|
-
}
|
|
92
|
-
if (/phantomjs/i.test(ua)) {
|
|
93
|
-
return { isBot: true, botName: "PhantomJS", confidence: "high", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
94
|
-
}
|
|
95
|
-
if (/selenium/i.test(ua)) {
|
|
96
|
-
return { isBot: true, botName: "Selenium", confidence: "high", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
97
|
-
}
|
|
98
|
-
if (ua.trim().length < 10) {
|
|
99
|
-
return { isBot: true, botName: null, confidence: "low", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
100
|
-
}
|
|
101
|
-
const hasAcceptLang = !!headers["accept-language"];
|
|
102
|
-
const hasAcceptEncoding = !!headers["accept-encoding"];
|
|
103
|
-
if (!hasAcceptLang && !hasAcceptEncoding) {
|
|
104
|
-
return { isBot: true, botName: null, confidence: "low", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
105
|
-
}
|
|
106
|
-
return null;
|
|
107
|
-
}
|
|
108
|
-
function looksLikeBotUa(ua) {
|
|
109
|
-
return /bot|crawler|spider|scraper|fetch|http|python|curl|java|ruby|go-http|node/i.test(ua) && !/chrome|firefox|safari|edge|opera/i.test(ua);
|
|
110
|
-
}
|
|
111
|
-
var init_bot_detection_pipeline = __esm({
|
|
112
|
-
"src/detection/bot-detection-pipeline.ts"() {
|
|
113
|
-
"use strict";
|
|
114
|
-
init_known_patterns();
|
|
115
|
-
}
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
// src/analytics/geolocation.ts
|
|
119
|
-
function loadGeoip() {
|
|
120
|
-
if (geoip) return geoip;
|
|
121
|
-
try {
|
|
122
|
-
geoip = __require("geoip-lite");
|
|
123
|
-
} catch {
|
|
124
|
-
geoip = null;
|
|
125
|
-
}
|
|
126
|
-
return geoip;
|
|
127
|
-
}
|
|
128
|
-
function getCountry(ip) {
|
|
129
|
-
if (!ip || ip === "unknown" || ip === "127.0.0.1" || ip.startsWith("::")) return null;
|
|
130
|
-
const geo = loadGeoip();
|
|
131
|
-
if (!geo) return null;
|
|
132
|
-
try {
|
|
133
|
-
const result = geo.lookup(ip);
|
|
134
|
-
return result?.country ?? null;
|
|
135
|
-
} catch {
|
|
136
|
-
return null;
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
var geoip;
|
|
140
|
-
var init_geolocation = __esm({
|
|
141
|
-
"src/analytics/geolocation.ts"() {
|
|
142
|
-
"use strict";
|
|
143
|
-
geoip = null;
|
|
144
|
-
}
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
// src/analytics/visit-tracker.ts
|
|
148
|
-
var visit_tracker_exports = {};
|
|
149
|
-
__export(visit_tracker_exports, {
|
|
150
|
-
VisitTracker: () => VisitTracker
|
|
151
|
-
});
|
|
152
|
-
import fs from "fs";
|
|
153
|
-
import path from "path";
|
|
154
|
-
var _VisitTracker, VisitTracker;
|
|
155
|
-
var init_visit_tracker = __esm({
|
|
156
|
-
"src/analytics/visit-tracker.ts"() {
|
|
157
|
-
"use strict";
|
|
158
|
-
init_bot_detection_pipeline();
|
|
159
|
-
init_geolocation();
|
|
160
|
-
_VisitTracker = class _VisitTracker {
|
|
161
|
-
constructor(dataDir) {
|
|
162
|
-
this.dataDir = dataDir;
|
|
163
|
-
}
|
|
164
|
-
static getInstance(dataDir = process.env.TA_DATA_DIR ?? "data") {
|
|
165
|
-
if (!_VisitTracker.instance) {
|
|
166
|
-
_VisitTracker.instance = new _VisitTracker(dataDir);
|
|
167
|
-
}
|
|
168
|
-
return _VisitTracker.instance;
|
|
169
|
-
}
|
|
170
|
-
record(req, meta = {}) {
|
|
171
|
-
const ua = req.headers.get("user-agent") ?? "";
|
|
172
|
-
const result = detectBot({ userAgent: ua, headers: Object.fromEntries(req.headers) });
|
|
173
|
-
if (!result.isBot) return;
|
|
174
|
-
const ip = req.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? req.headers.get("x-real-ip") ?? "unknown";
|
|
175
|
-
const record = {
|
|
176
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
177
|
-
bot_name: result.botName,
|
|
178
|
-
bot_category: result.category,
|
|
179
|
-
detection_method: result.detectionMethod,
|
|
180
|
-
confidence: result.confidence,
|
|
181
|
-
url: req.nextUrl.pathname,
|
|
182
|
-
ip,
|
|
183
|
-
country: getCountry(ip),
|
|
184
|
-
user_agent: ua,
|
|
185
|
-
referer: req.headers.get("referer"),
|
|
186
|
-
response_ms: meta.responseMs ?? null,
|
|
187
|
-
cache_hit: meta.cacheHit ?? false,
|
|
188
|
-
content_length: meta.contentLength ?? null
|
|
189
|
-
};
|
|
190
|
-
this.append("ta-visits.jsonl", record);
|
|
191
|
-
}
|
|
192
|
-
append(filename, record) {
|
|
193
|
-
try {
|
|
194
|
-
const filePath = path.join(this.dataDir, filename);
|
|
195
|
-
fs.mkdirSync(this.dataDir, { recursive: true });
|
|
196
|
-
fs.appendFileSync(filePath, JSON.stringify(record) + "\n", "utf-8");
|
|
197
|
-
} catch {
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
};
|
|
201
|
-
_VisitTracker.instance = null;
|
|
202
|
-
VisitTracker = _VisitTracker;
|
|
203
|
-
}
|
|
204
|
-
});
|
|
205
|
-
|
|
206
1
|
// src/core/config.ts
|
|
207
2
|
var defaultConfig = {
|
|
208
3
|
contentDir: "content",
|
|
@@ -257,28 +52,14 @@ function withThirdAudience(options = {}, nextConfig = {}) {
|
|
|
257
52
|
|
|
258
53
|
// src/core/middleware.ts
|
|
259
54
|
import { NextResponse } from "next/server";
|
|
260
|
-
|
|
261
|
-
// src/dashboard/admin-store.ts
|
|
262
|
-
import crypto from "crypto";
|
|
263
|
-
function verifySession(token) {
|
|
264
|
-
const lastDot = token.lastIndexOf(".");
|
|
265
|
-
if (lastDot === -1) return false;
|
|
266
|
-
const payload = token.slice(0, lastDot);
|
|
267
|
-
const sig = token.slice(lastDot + 1);
|
|
268
|
-
const expected = crypto.createHmac("sha256", process.env.THIRD_AUDIENCE_SECRET ?? "ta-salt").update(payload).digest("hex");
|
|
269
|
-
if (sig.length !== expected.length) return false;
|
|
270
|
-
return crypto.timingSafeEqual(Buffer.from(sig, "hex"), Buffer.from(expected, "hex"));
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
// src/core/middleware.ts
|
|
274
55
|
var COOKIE_NAME = "ta_session";
|
|
275
56
|
var RESET_COOKIE = "ta_session_reset";
|
|
276
|
-
|
|
57
|
+
function thirdAudienceMiddleware(req) {
|
|
277
58
|
const { pathname } = req.nextUrl;
|
|
278
59
|
const accept = req.headers.get("accept") ?? "";
|
|
279
60
|
if (pathname.startsWith("/third-audience") && !pathname.startsWith("/third-audience/login")) {
|
|
280
61
|
const session = req.cookies.get(COOKIE_NAME)?.value;
|
|
281
|
-
if (!session
|
|
62
|
+
if (!session) {
|
|
282
63
|
const loginUrl = req.nextUrl.clone();
|
|
283
64
|
loginUrl.pathname = "/third-audience/login";
|
|
284
65
|
return NextResponse.redirect(loginUrl);
|
|
@@ -287,7 +68,7 @@ async function thirdAudienceMiddleware(req) {
|
|
|
287
68
|
if (pathname === "/third-audience/login" && req.nextUrl.searchParams.get("reset") === "1") {
|
|
288
69
|
const resetCookie = req.cookies.get(RESET_COOKIE)?.value;
|
|
289
70
|
const sessionCookie = req.cookies.get(COOKIE_NAME)?.value;
|
|
290
|
-
if (
|
|
71
|
+
if (!resetCookie && !sessionCookie) {
|
|
291
72
|
const loginUrl = req.nextUrl.clone();
|
|
292
73
|
loginUrl.pathname = "/third-audience/login";
|
|
293
74
|
loginUrl.search = "";
|
|
@@ -312,7 +93,8 @@ async function thirdAudienceMiddleware(req) {
|
|
|
312
93
|
}
|
|
313
94
|
if (pathname.startsWith("/okf")) {
|
|
314
95
|
const url = req.nextUrl.clone();
|
|
315
|
-
|
|
96
|
+
const rest = pathname.slice(4);
|
|
97
|
+
url.pathname = `/api/third-audience/okf${rest || "/index"}`;
|
|
316
98
|
return NextResponse.rewrite(url);
|
|
317
99
|
}
|
|
318
100
|
if (pathname === "/llms.txt") {
|
|
@@ -325,19 +107,97 @@ async function thirdAudienceMiddleware(req) {
|
|
|
325
107
|
url.pathname = "/api/third-audience/sitemap-ai";
|
|
326
108
|
return NextResponse.rewrite(url);
|
|
327
109
|
}
|
|
328
|
-
|
|
329
|
-
trackVisitAsync(req);
|
|
330
|
-
return response;
|
|
331
|
-
}
|
|
332
|
-
function trackVisitAsync(req) {
|
|
333
|
-
void Promise.resolve().then(() => (init_visit_tracker(), visit_tracker_exports)).then(({ VisitTracker: VisitTracker2 }) => {
|
|
334
|
-
VisitTracker2.getInstance().record(req);
|
|
335
|
-
}).catch(() => {
|
|
336
|
-
});
|
|
110
|
+
return null;
|
|
337
111
|
}
|
|
338
112
|
|
|
339
|
-
// src/
|
|
340
|
-
|
|
113
|
+
// src/detection/known-patterns.ts
|
|
114
|
+
var KNOWN_BOTS = [
|
|
115
|
+
// AI Crawlers
|
|
116
|
+
{ name: "ClaudeBot", category: "ai_crawler", patterns: [/claudebot/i, /claude-web/i] },
|
|
117
|
+
{ name: "GPTBot", category: "ai_crawler", patterns: [/gptbot/i] },
|
|
118
|
+
{ name: "ChatGPT-User", category: "ai_crawler", patterns: [/chatgpt-user/i] },
|
|
119
|
+
{ name: "PerplexityBot", category: "ai_crawler", patterns: [/perplexitybot/i] },
|
|
120
|
+
{ name: "Googlebot-AI", category: "ai_crawler", patterns: [/google-extended/i, /googleother/i] },
|
|
121
|
+
{ name: "FacebookBot", category: "ai_crawler", patterns: [/facebookbot/i] },
|
|
122
|
+
{ name: "Applebot-Extended", category: "ai_crawler", patterns: [/applebot-extended/i] },
|
|
123
|
+
{ name: "YouBot", category: "ai_crawler", patterns: [/youbot/i] },
|
|
124
|
+
{ name: "CCBot", category: "ai_crawler", patterns: [/ccbot/i] },
|
|
125
|
+
{ name: "CohereCrawler", category: "ai_crawler", patterns: [/cohere-ai/i] },
|
|
126
|
+
{ name: "AI2Bot", category: "ai_crawler", patterns: [/ai2bot/i] },
|
|
127
|
+
{ name: "Bytespider", category: "ai_crawler", patterns: [/bytespider/i] },
|
|
128
|
+
{ name: "Diffbot", category: "ai_crawler", patterns: [/diffbot/i] },
|
|
129
|
+
// Search Engines
|
|
130
|
+
{ name: "Googlebot", category: "search_engine", patterns: [/googlebot/i] },
|
|
131
|
+
{ name: "Bingbot", category: "search_engine", patterns: [/bingbot/i, /msnbot/i] },
|
|
132
|
+
{ name: "DuckDuckBot", category: "search_engine", patterns: [/duckduckbot/i] },
|
|
133
|
+
{ name: "Baiduspider", category: "search_engine", patterns: [/baiduspider/i] },
|
|
134
|
+
{ name: "YandexBot", category: "search_engine", patterns: [/yandexbot/i] },
|
|
135
|
+
{ name: "Sogou", category: "search_engine", patterns: [/sogou/i] },
|
|
136
|
+
{ name: "Exabot", category: "search_engine", patterns: [/exabot/i] },
|
|
137
|
+
{ name: "ia_archiver", category: "search_engine", patterns: [/ia_archiver/i] }
|
|
138
|
+
];
|
|
139
|
+
|
|
140
|
+
// src/detection/bot-detection-pipeline.ts
|
|
141
|
+
function detectBot(input) {
|
|
142
|
+
const ua = input.userAgent ?? "";
|
|
143
|
+
for (const bot of KNOWN_BOTS) {
|
|
144
|
+
for (const pattern of bot.patterns) {
|
|
145
|
+
if (pattern.test(ua)) {
|
|
146
|
+
return {
|
|
147
|
+
isBot: true,
|
|
148
|
+
botName: bot.name,
|
|
149
|
+
confidence: "high",
|
|
150
|
+
detectionMethod: "known_pattern",
|
|
151
|
+
category: bot.category,
|
|
152
|
+
rawUserAgent: ua
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
const heuristicResult = checkHeuristics(ua, input.headers ?? {});
|
|
158
|
+
if (heuristicResult) return { ...heuristicResult, rawUserAgent: ua };
|
|
159
|
+
if (looksLikeBotUa(ua)) {
|
|
160
|
+
return {
|
|
161
|
+
isBot: true,
|
|
162
|
+
botName: null,
|
|
163
|
+
confidence: "low",
|
|
164
|
+
detectionMethod: "auto_learned",
|
|
165
|
+
category: "unknown_bot",
|
|
166
|
+
rawUserAgent: ua
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
isBot: false,
|
|
171
|
+
botName: null,
|
|
172
|
+
confidence: "high",
|
|
173
|
+
detectionMethod: "none",
|
|
174
|
+
category: "human",
|
|
175
|
+
rawUserAgent: ua
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
function checkHeuristics(ua, headers) {
|
|
179
|
+
if (/headlesschrome/i.test(ua)) {
|
|
180
|
+
return { isBot: true, botName: "HeadlessChrome", confidence: "medium", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
181
|
+
}
|
|
182
|
+
if (/phantomjs/i.test(ua)) {
|
|
183
|
+
return { isBot: true, botName: "PhantomJS", confidence: "high", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
184
|
+
}
|
|
185
|
+
if (/selenium/i.test(ua)) {
|
|
186
|
+
return { isBot: true, botName: "Selenium", confidence: "high", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
187
|
+
}
|
|
188
|
+
if (ua.trim().length < 10) {
|
|
189
|
+
return { isBot: true, botName: null, confidence: "low", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
190
|
+
}
|
|
191
|
+
const hasAcceptLang = !!headers["accept-language"];
|
|
192
|
+
const hasAcceptEncoding = !!headers["accept-encoding"];
|
|
193
|
+
if (!hasAcceptLang && !hasAcceptEncoding) {
|
|
194
|
+
return { isBot: true, botName: null, confidence: "low", detectionMethod: "heuristic", category: "unknown_bot" };
|
|
195
|
+
}
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
198
|
+
function looksLikeBotUa(ua) {
|
|
199
|
+
return /bot|crawler|spider|scraper|fetch|http|python|curl|java|ruby|go-http|node/i.test(ua) && !/chrome|firefox|safari|edge|opera/i.test(ua);
|
|
200
|
+
}
|
|
341
201
|
export {
|
|
342
202
|
detectBot,
|
|
343
203
|
thirdAudienceMiddleware,
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/detection/known-patterns.ts","../src/detection/bot-detection-pipeline.ts","../src/analytics/geolocation.ts","../src/analytics/visit-tracker.ts","../src/core/config.ts","../src/core/with-third-audience.ts","../src/core/middleware.ts","../src/dashboard/admin-store.ts","../src/index.ts"],"sourcesContent":["/** Known AI crawler and search engine user-agent patterns. */\nexport interface KnownBot {\n name: string\n category: 'ai_crawler' | 'search_engine'\n patterns: RegExp[]\n}\n\nexport const KNOWN_BOTS: KnownBot[] = [\n // AI Crawlers\n { name: 'ClaudeBot', category: 'ai_crawler', patterns: [/claudebot/i, /claude-web/i] },\n { name: 'GPTBot', category: 'ai_crawler', patterns: [/gptbot/i] },\n { name: 'ChatGPT-User', category: 'ai_crawler', patterns: [/chatgpt-user/i] },\n { name: 'PerplexityBot', category: 'ai_crawler', patterns: [/perplexitybot/i] },\n { name: 'Googlebot-AI', category: 'ai_crawler', patterns: [/google-extended/i, /googleother/i] },\n { name: 'FacebookBot', category: 'ai_crawler', patterns: [/facebookbot/i] },\n { name: 'Applebot-Extended',category: 'ai_crawler', patterns: [/applebot-extended/i] },\n { name: 'YouBot', category: 'ai_crawler', patterns: [/youbot/i] },\n { name: 'CCBot', category: 'ai_crawler', patterns: [/ccbot/i] },\n { name: 'CohereCrawler', category: 'ai_crawler', patterns: [/cohere-ai/i] },\n { name: 'AI2Bot', category: 'ai_crawler', patterns: [/ai2bot/i] },\n { name: 'Bytespider', category: 'ai_crawler', patterns: [/bytespider/i] },\n { name: 'Diffbot', category: 'ai_crawler', patterns: [/diffbot/i] },\n\n // Search Engines\n { name: 'Googlebot', category: 'search_engine', patterns: [/googlebot/i] },\n { name: 'Bingbot', category: 'search_engine', patterns: [/bingbot/i, /msnbot/i] },\n { name: 'DuckDuckBot', category: 'search_engine', patterns: [/duckduckbot/i] },\n { name: 'Baiduspider', category: 'search_engine', patterns: [/baiduspider/i] },\n { name: 'YandexBot', category: 'search_engine', patterns: [/yandexbot/i] },\n { name: 'Sogou', category: 'search_engine', patterns: [/sogou/i] },\n { name: 'Exabot', category: 'search_engine', patterns: [/exabot/i] },\n { name: 'ia_archiver', category: 'search_engine', patterns: [/ia_archiver/i] },\n]\n","import type { BotDetectionResult } from './bot-detection-result.js'\nimport { KNOWN_BOTS } from './known-patterns.js'\n\nexport interface DetectBotInput {\n userAgent: string\n /** Optional: headers map for heuristic checks */\n headers?: Record<string, string | string[] | undefined>\n /** Optional: IP address */\n ip?: string\n}\n\n/**\n * Three-layer bot detection pipeline:\n * 1. Known pattern matching (O(n) UA string match)\n * 2. Heuristic signals (missing headers, headless indicators)\n * 3. Auto-learner flag (unknown UAs that behave bot-like)\n */\nexport function detectBot(input: DetectBotInput): BotDetectionResult {\n const ua = input.userAgent ?? ''\n\n // Layer 1: known pattern match\n for (const bot of KNOWN_BOTS) {\n for (const pattern of bot.patterns) {\n if (pattern.test(ua)) {\n return {\n isBot: true,\n botName: bot.name,\n confidence: 'high',\n detectionMethod: 'known_pattern',\n category: bot.category,\n rawUserAgent: ua,\n }\n }\n }\n }\n\n // Layer 2: heuristics\n const heuristicResult = checkHeuristics(ua, input.headers ?? {})\n if (heuristicResult) return { ...heuristicResult, rawUserAgent: ua }\n\n // Layer 3: auto-learner — flag suspicious unknown UAs for review\n if (looksLikeBotUa(ua)) {\n return {\n isBot: true,\n botName: null,\n confidence: 'low',\n detectionMethod: 'auto_learned',\n category: 'unknown_bot',\n rawUserAgent: ua,\n }\n }\n\n return {\n isBot: false,\n botName: null,\n confidence: 'high',\n detectionMethod: 'none',\n category: 'human',\n rawUserAgent: ua,\n }\n}\n\nfunction checkHeuristics(\n ua: string,\n headers: Record<string, string | string[] | undefined>\n): Omit<BotDetectionResult, 'rawUserAgent'> | null {\n // Headless Chrome signals\n if (/headlesschrome/i.test(ua)) {\n return { isBot: true, botName: 'HeadlessChrome', confidence: 'medium', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n if (/phantomjs/i.test(ua)) {\n return { isBot: true, botName: 'PhantomJS', confidence: 'high', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n if (/selenium/i.test(ua)) {\n return { isBot: true, botName: 'Selenium', confidence: 'high', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n\n // Empty or very short UA is suspicious\n if (ua.trim().length < 10) {\n return { isBot: true, botName: null, confidence: 'low', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n\n // Missing typical browser headers\n const hasAcceptLang = !!headers['accept-language']\n const hasAcceptEncoding = !!headers['accept-encoding']\n if (!hasAcceptLang && !hasAcceptEncoding) {\n return { isBot: true, botName: null, confidence: 'low', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n\n return null\n}\n\nfunction looksLikeBotUa(ua: string): boolean {\n return (\n /bot|crawler|spider|scraper|fetch|http|python|curl|java|ruby|go-http|node/i.test(ua) &&\n !/chrome|firefox|safari|edge|opera/i.test(ua)\n )\n}\n","let geoip: typeof import('geoip-lite') | null = null\n\nfunction loadGeoip() {\n if (geoip) return geoip\n try {\n geoip = require('geoip-lite') as typeof import('geoip-lite')\n } catch {\n geoip = null\n }\n return geoip\n}\n\n/** Returns ISO 3166-1 alpha-2 country code, or null if lookup fails. */\nexport function getCountry(ip: string): string | null {\n if (!ip || ip === 'unknown' || ip === '127.0.0.1' || ip.startsWith('::')) return null\n const geo = loadGeoip()\n if (!geo) return null\n try {\n const result = geo.lookup(ip)\n return result?.country ?? null\n } catch {\n return null\n }\n}\n","import fs from 'fs'\nimport path from 'path'\nimport type { NextRequest } from 'next/server'\nimport { detectBot } from '../detection/bot-detection-pipeline.js'\nimport { getCountry } from './geolocation.js'\n\nexport interface VisitRecord {\n timestamp: string\n bot_name: string | null\n bot_category: string\n detection_method: string\n confidence: string\n url: string\n ip: string\n country: string | null\n user_agent: string\n referer: string | null\n response_ms: number | null\n cache_hit: boolean\n content_length: number | null\n}\n\nexport class VisitTracker {\n private static instance: VisitTracker | null = null\n private dataDir: string\n\n private constructor(dataDir: string) {\n this.dataDir = dataDir\n }\n\n static getInstance(dataDir = process.env.TA_DATA_DIR ?? 'data'): VisitTracker {\n if (!VisitTracker.instance) {\n VisitTracker.instance = new VisitTracker(dataDir)\n }\n return VisitTracker.instance\n }\n\n record(req: NextRequest, meta: { responseMs?: number; cacheHit?: boolean; contentLength?: number } = {}): void {\n const ua = req.headers.get('user-agent') ?? ''\n const result = detectBot({ userAgent: ua, headers: Object.fromEntries(req.headers) })\n\n if (!result.isBot) return // only track bots\n\n const ip = req.headers.get('x-forwarded-for')?.split(',')[0]?.trim()\n ?? req.headers.get('x-real-ip')\n ?? 'unknown'\n\n const record: VisitRecord = {\n timestamp: new Date().toISOString(),\n bot_name: result.botName,\n bot_category: result.category,\n detection_method: result.detectionMethod,\n confidence: result.confidence,\n url: req.nextUrl.pathname,\n ip,\n country: getCountry(ip),\n user_agent: ua,\n referer: req.headers.get('referer'),\n response_ms: meta.responseMs ?? null,\n cache_hit: meta.cacheHit ?? false,\n content_length: meta.contentLength ?? null,\n }\n\n this.append('ta-visits.jsonl', record)\n }\n\n private append(filename: string, record: VisitRecord): void {\n try {\n const filePath = path.join(this.dataDir, filename)\n fs.mkdirSync(this.dataDir, { recursive: true })\n fs.appendFileSync(filePath, JSON.stringify(record) + '\\n', 'utf-8')\n } catch {\n // Tracking must never throw\n }\n }\n}\n","export interface ThirdAudienceConfig {\n /** Directory containing .mdx files, relative to project root. Default: 'content' */\n contentDir?: string\n /** Directory for JSONL data files. Default: 'data' */\n dataDir?: string\n /** Mount the /third-audience/ dashboard. Default: true */\n dashboard?: boolean\n /** Secret for dashboard access (HTTP Basic or bearer). Required when dashboard: true */\n dashboardSecret?: string\n notifications?: {\n email?: { smtp: string; to: string; from?: string }\n slack?: { webhookUrl: string }\n }\n bots?: {\n allowlist?: string[]\n blocklist?: string[]\n }\n cache?: {\n /** Cache TTL in seconds. Default: 3600 */\n ttl?: number\n /** Max in-memory entries. Default: 500 */\n maxMemoryEntries?: number\n }\n}\n\nexport const defaultConfig: Required<ThirdAudienceConfig> = {\n contentDir: 'content',\n dataDir: 'data',\n dashboard: true,\n dashboardSecret: '',\n notifications: {},\n bots: { allowlist: [], blocklist: [] },\n cache: { ttl: 3600, maxMemoryEntries: 500 },\n}\n\nexport function resolveConfig(partial: ThirdAudienceConfig = {}): Required<ThirdAudienceConfig> {\n return {\n ...defaultConfig,\n ...partial,\n bots: { ...defaultConfig.bots, ...partial.bots },\n cache: { ...defaultConfig.cache, ...partial.cache },\n notifications: { ...defaultConfig.notifications, ...partial.notifications },\n }\n}\n","import type { NextConfig } from 'next'\nimport { resolveConfig, type ThirdAudienceConfig } from './config.js'\n\n/**\n * Wraps next.config.ts to inject Third Audience rewrites and headers.\n *\n * Usage:\n * import { withThirdAudience } from 'third-audience-mdx'\n * export default withThirdAudience({ contentDir: 'content' })\n */\nexport function withThirdAudience(\n options: ThirdAudienceConfig = {},\n nextConfig: NextConfig = {}\n): NextConfig {\n const config = resolveConfig(options)\n\n return {\n ...nextConfig,\n async headers() {\n const existing = await nextConfig.headers?.() ?? []\n return [\n ...existing,\n {\n source: '/:path*.md',\n headers: [{ key: 'Content-Type', value: 'text/markdown; charset=utf-8' }],\n },\n {\n source: '/llms.txt',\n headers: [{ key: 'Content-Type', value: 'text/plain; charset=utf-8' }],\n },\n {\n source: '/okf/:path*',\n headers: [{ key: 'Content-Type', value: 'text/markdown; charset=utf-8' }],\n },\n ]\n },\n env: {\n ...nextConfig.env,\n TA_CONTENT_DIR: config.contentDir,\n TA_DATA_DIR: config.dataDir,\n TA_DASHBOARD_ENABLED: String(config.dashboard),\n },\n }\n}\n","import { NextResponse, type NextRequest } from 'next/server'\nimport { verifySession } from '../dashboard/admin-store.js'\n\nconst COOKIE_NAME = 'ta_session'\nconst RESET_COOKIE = 'ta_session_reset'\n\n/**\n * Third Audience middleware.\n *\n * Handles:\n * - Dashboard auth: /third-audience/* requires valid session cookie\n * - .md URL requests → serve Markdown of matching MDX file\n * - Accept: text/markdown header → serve Markdown of current page\n * - Bot visit tracking (non-blocking, fire-and-forget)\n * - Citation detection via Referer header\n *\n * Wire up in middleware.ts:\n * export { thirdAudienceMiddleware as middleware } from 'third-audience-mdx'\n * export const config = { matcher: ['/((?!_next|api).*)'] }\n */\nexport async function thirdAudienceMiddleware(req: NextRequest): Promise<NextResponse> {\n const { pathname } = req.nextUrl\n const accept = req.headers.get('accept') ?? ''\n\n // Dashboard auth guard — all /third-audience/* except /login\n if (pathname.startsWith('/third-audience') && !pathname.startsWith('/third-audience/login')) {\n const session = req.cookies.get(COOKIE_NAME)?.value\n if (!session || !verifySession(session)) {\n const loginUrl = req.nextUrl.clone()\n loginUrl.pathname = '/third-audience/login'\n return NextResponse.redirect(loginUrl)\n }\n }\n\n // Password reset guard — /third-audience/login?reset=1 requires reset cookie\n if (pathname === '/third-audience/login' && req.nextUrl.searchParams.get('reset') === '1') {\n const resetCookie = req.cookies.get(RESET_COOKIE)?.value\n const sessionCookie = req.cookies.get(COOKIE_NAME)?.value\n // Allow if they have either a valid session or a valid reset token\n if ((!resetCookie || !verifySession(resetCookie)) && (!sessionCookie || !verifySession(sessionCookie))) {\n const loginUrl = req.nextUrl.clone()\n loginUrl.pathname = '/third-audience/login'\n loginUrl.search = ''\n return NextResponse.redirect(loginUrl)\n }\n }\n\n // /third-audience/login → rewrite to login route handler (GET/POST)\n if (pathname === '/third-audience/login') {\n const url = req.nextUrl.clone()\n url.pathname = '/api/third-audience/login'\n return NextResponse.rewrite(url)\n }\n\n // .md URL → rewrite to our internal markdown route handler\n if (pathname.endsWith('.md')) {\n const slug = pathname.slice(0, -3) // strip .md\n const url = req.nextUrl.clone()\n url.pathname = `/api/third-audience/markdown${slug}`\n return NextResponse.rewrite(url)\n }\n\n // Accept: text/markdown header → rewrite to markdown route\n if (accept.includes('text/markdown')) {\n const url = req.nextUrl.clone()\n url.pathname = `/api/third-audience/markdown${pathname}`\n return NextResponse.rewrite(url)\n }\n\n // /okf/ → rewrite to OKF bundle handler\n if (pathname.startsWith('/okf')) {\n const url = req.nextUrl.clone()\n url.pathname = `/api/third-audience/okf${pathname.slice(4)}`\n return NextResponse.rewrite(url)\n }\n\n // /llms.txt → rewrite to discovery handler\n if (pathname === '/llms.txt') {\n const url = req.nextUrl.clone()\n url.pathname = '/api/third-audience/llms-txt'\n return NextResponse.rewrite(url)\n }\n\n // /sitemap-ai.xml → rewrite to AI sitemap handler\n if (pathname === '/sitemap-ai.xml') {\n const url = req.nextUrl.clone()\n url.pathname = '/api/third-audience/sitemap-ai'\n return NextResponse.rewrite(url)\n }\n\n const response = NextResponse.next()\n\n // Fire-and-forget: track bot visits and citations (non-blocking)\n trackVisitAsync(req)\n\n return response\n}\n\nfunction trackVisitAsync(req: NextRequest): void {\n // Dynamically import to avoid loading analytics on every request sync path.\n // Uses void to intentionally not await — tracking must never block response.\n void import('../analytics/visit-tracker.js').then(({ VisitTracker }) => {\n VisitTracker.getInstance().record(req)\n }).catch(() => { /* never throw from tracking */ })\n}\n","import fs from 'fs'\nimport path from 'path'\nimport crypto from 'crypto'\n\nexport interface AdminRecord {\n passwordHash: string // sha256(secret + password)\n isDefaultPassword: boolean\n createdAt: string\n lastLoginAt: string | null\n apiKey?: string // AES-256-GCM encrypted, for headless/external API callers\n}\n\nfunction adminFilePath(): string {\n const dataDir = process.env.TA_DATA_DIR ?? 'data'\n return path.join(process.cwd(), dataDir, 'ta-admin.json')\n}\n\nexport function generateDefaultPassword(): string {\n return crypto.randomBytes(6).toString('hex') // 12-char hex, easy to type\n}\n\nexport function hashPassword(password: string): string {\n const secret = process.env.THIRD_AUDIENCE_SECRET ?? 'ta-salt'\n return crypto.createHash('sha256').update(secret + password).digest('hex')\n}\n\nexport function loadAdmin(): AdminRecord | null {\n const filePath = adminFilePath()\n if (!fs.existsSync(filePath)) return null\n try {\n return JSON.parse(fs.readFileSync(filePath, 'utf-8')) as AdminRecord\n } catch {\n return null\n }\n}\n\nexport function saveAdmin(record: AdminRecord): void {\n const filePath = adminFilePath()\n const dir = path.dirname(filePath)\n if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true })\n fs.writeFileSync(filePath, JSON.stringify(record, null, 2), 'utf-8')\n}\n\nexport const DEFAULT_PASSWORD = 'Chang3M3Now!'\n\nexport function initAdmin(): { password: string; apiKey: string; isNew: boolean } {\n const existing = loadAdmin()\n if (existing) return { password: '', apiKey: '', isNew: false }\n\n const apiKey = generateApiKey()\n saveAdmin({\n passwordHash: hashPassword(DEFAULT_PASSWORD),\n isDefaultPassword: true,\n createdAt: new Date().toISOString(),\n lastLoginAt: null,\n apiKey: encryptApiKey(apiKey),\n })\n return { password: DEFAULT_PASSWORD, apiKey, isNew: true }\n}\n\nexport function verifyPassword(password: string): boolean {\n const record = loadAdmin()\n if (!record) return false\n return record.passwordHash === hashPassword(password)\n}\n\nexport function updatePassword(newPassword: string): void {\n const record = loadAdmin()\n if (!record) return\n saveAdmin({\n ...record,\n passwordHash: hashPassword(newPassword),\n isDefaultPassword: false,\n })\n}\n\nexport function recordLogin(): void {\n const record = loadAdmin()\n if (!record) return\n saveAdmin({ ...record, lastLoginAt: new Date().toISOString() })\n}\n\n// ---------------------------------------------------------------------------\n// API key — AES-256-GCM encrypted at rest, mirroring WP's SECURE_AUTH_KEY approach\n// ---------------------------------------------------------------------------\n\nconst CIPHER = 'aes-256-gcm'\n\nfunction getEncryptionKey(): Buffer {\n const secret = process.env.THIRD_AUDIENCE_SECRET ?? 'ta-fallback-key-change-me'\n // Derive a 32-byte key from the secret using SHA-256\n return crypto.createHash('sha256').update(secret).digest()\n}\n\nfunction encryptApiKey(plaintext: string): string {\n const iv = crypto.randomBytes(12)\n const key = getEncryptionKey()\n const cipher = crypto.createCipheriv(CIPHER, key, iv) as crypto.CipherGCM\n const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()])\n const tag = cipher.getAuthTag()\n // Format: iv(24 hex) + tag(32 hex) + encrypted(hex)\n return iv.toString('hex') + tag.toString('hex') + encrypted.toString('hex')\n}\n\nfunction decryptApiKey(encoded: string): string | null {\n try {\n const iv = Buffer.from(encoded.slice(0, 24), 'hex')\n const tag = Buffer.from(encoded.slice(24, 56), 'hex')\n const encrypted = Buffer.from(encoded.slice(56), 'hex')\n const key = getEncryptionKey()\n const decipher = crypto.createDecipheriv(CIPHER, key, iv) as crypto.DecipherGCM\n decipher.setAuthTag(tag)\n return decipher.update(encrypted) + decipher.final('utf8')\n } catch {\n return null\n }\n}\n\nexport function generateApiKey(): string {\n return 'ta_' + crypto.randomBytes(24).toString('hex') // 51-char key\n}\n\nexport function getApiKey(): string | null {\n const record = loadAdmin()\n if (!record?.apiKey) return null\n return decryptApiKey(record.apiKey)\n}\n\nexport function rotateApiKey(): string {\n const record = loadAdmin()\n if (!record) throw new Error('Admin store not initialised')\n const newKey = generateApiKey()\n saveAdmin({ ...record, apiKey: encryptApiKey(newKey) })\n return newKey\n}\n\nexport function verifyApiKey(key: string): boolean {\n const stored = getApiKey()\n if (!stored) return false\n if (key.length !== stored.length) return false\n return crypto.timingSafeEqual(Buffer.from(key), Buffer.from(stored))\n}\n\n// ---------------------------------------------------------------------------\n// Session cookie: HMAC-SHA256(secret, userId + timestamp) — stateless, no DB\n// ---------------------------------------------------------------------------\nexport function signSession(payload: string): string {\n const secret = process.env.THIRD_AUDIENCE_SECRET ?? 'ta-salt'\n const sig = crypto.createHmac('sha256', secret).update(payload).digest('hex')\n return `${payload}.${sig}`\n}\n\nexport function verifySession(token: string): boolean {\n const lastDot = token.lastIndexOf('.')\n if (lastDot === -1) return false\n const payload = token.slice(0, lastDot)\n const sig = token.slice(lastDot + 1)\n const expected = crypto.createHmac('sha256', process.env.THIRD_AUDIENCE_SECRET ?? 'ta-salt')\n .update(payload).digest('hex')\n // Constant-time comparison\n if (sig.length !== expected.length) return false\n return crypto.timingSafeEqual(Buffer.from(sig, 'hex'), Buffer.from(expected, 'hex'))\n}\n","/**\n * third-audience-mdx\n * Public API surface for the package.\n */\n\nexport { withThirdAudience } from './core/with-third-audience.js'\nexport { thirdAudienceMiddleware } from './core/middleware.js'\nexport { detectBot } from './detection/bot-detection-pipeline.js'\nexport type { ThirdAudienceConfig } from './core/config.js'\nexport type { BotDetectionResult } from './detection/bot-detection-result.js'\n"],"mappings":";;;;;;;;;;;;;;;;;AAAA,IAOa;AAPb;AAAA;AAAA;AAOO,IAAM,aAAyB;AAAA;AAAA,MAEpC,EAAE,MAAM,aAAoB,UAAU,cAAiB,UAAU,CAAC,cAAc,aAAa,EAAE;AAAA,MAC/F,EAAE,MAAM,UAAoB,UAAU,cAAiB,UAAU,CAAC,SAAS,EAAE;AAAA,MAC7E,EAAE,MAAM,gBAAoB,UAAU,cAAiB,UAAU,CAAC,eAAe,EAAE;AAAA,MACnF,EAAE,MAAM,iBAAoB,UAAU,cAAiB,UAAU,CAAC,gBAAgB,EAAE;AAAA,MACpF,EAAE,MAAM,gBAAoB,UAAU,cAAiB,UAAU,CAAC,oBAAoB,cAAc,EAAE;AAAA,MACtG,EAAE,MAAM,eAAoB,UAAU,cAAiB,UAAU,CAAC,cAAc,EAAE;AAAA,MAClF,EAAE,MAAM,qBAAoB,UAAU,cAAiB,UAAU,CAAC,oBAAoB,EAAE;AAAA,MACxF,EAAE,MAAM,UAAoB,UAAU,cAAiB,UAAU,CAAC,SAAS,EAAE;AAAA,MAC7E,EAAE,MAAM,SAAoB,UAAU,cAAiB,UAAU,CAAC,QAAQ,EAAE;AAAA,MAC5E,EAAE,MAAM,iBAAoB,UAAU,cAAiB,UAAU,CAAC,YAAY,EAAE;AAAA,MAChF,EAAE,MAAM,UAAoB,UAAU,cAAiB,UAAU,CAAC,SAAS,EAAE;AAAA,MAC7E,EAAE,MAAM,cAAoB,UAAU,cAAiB,UAAU,CAAC,aAAa,EAAE;AAAA,MACjF,EAAE,MAAM,WAAoB,UAAU,cAAiB,UAAU,CAAC,UAAU,EAAE;AAAA;AAAA,MAG9E,EAAE,MAAM,aAAoB,UAAU,iBAAiB,UAAU,CAAC,YAAY,EAAE;AAAA,MAChF,EAAE,MAAM,WAAoB,UAAU,iBAAiB,UAAU,CAAC,YAAY,SAAS,EAAE;AAAA,MACzF,EAAE,MAAM,eAAoB,UAAU,iBAAiB,UAAU,CAAC,cAAc,EAAE;AAAA,MAClF,EAAE,MAAM,eAAoB,UAAU,iBAAiB,UAAU,CAAC,cAAc,EAAE;AAAA,MAClF,EAAE,MAAM,aAAoB,UAAU,iBAAiB,UAAU,CAAC,YAAY,EAAE;AAAA,MAChF,EAAE,MAAM,SAAoB,UAAU,iBAAiB,UAAU,CAAC,QAAQ,EAAE;AAAA,MAC5E,EAAE,MAAM,UAAoB,UAAU,iBAAiB,UAAU,CAAC,SAAS,EAAE;AAAA,MAC7E,EAAE,MAAM,eAAoB,UAAU,iBAAiB,UAAU,CAAC,cAAc,EAAE;AAAA,IACpF;AAAA;AAAA;;;ACfO,SAAS,UAAU,OAA2C;AACnE,QAAM,KAAK,MAAM,aAAa;AAG9B,aAAW,OAAO,YAAY;AAC5B,eAAW,WAAW,IAAI,UAAU;AAClC,UAAI,QAAQ,KAAK,EAAE,GAAG;AACpB,eAAO;AAAA,UACL,OAAO;AAAA,UACP,SAAS,IAAI;AAAA,UACb,YAAY;AAAA,UACZ,iBAAiB;AAAA,UACjB,UAAU,IAAI;AAAA,UACd,cAAc;AAAA,QAChB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,kBAAkB,gBAAgB,IAAI,MAAM,WAAW,CAAC,CAAC;AAC/D,MAAI,gBAAiB,QAAO,EAAE,GAAG,iBAAiB,cAAc,GAAG;AAGnE,MAAI,eAAe,EAAE,GAAG;AACtB,WAAO;AAAA,MACL,OAAO;AAAA,MACP,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,iBAAiB;AAAA,MACjB,UAAU;AAAA,MACV,cAAc;AAAA,IAChB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO;AAAA,IACP,SAAS;AAAA,IACT,YAAY;AAAA,IACZ,iBAAiB;AAAA,IACjB,UAAU;AAAA,IACV,cAAc;AAAA,EAChB;AACF;AAEA,SAAS,gBACP,IACA,SACiD;AAEjD,MAAI,kBAAkB,KAAK,EAAE,GAAG;AAC9B,WAAO,EAAE,OAAO,MAAM,SAAS,kBAAkB,YAAY,UAAU,iBAAiB,aAAa,UAAU,cAAc;AAAA,EAC/H;AACA,MAAI,aAAa,KAAK,EAAE,GAAG;AACzB,WAAO,EAAE,OAAO,MAAM,SAAS,aAAa,YAAY,QAAQ,iBAAiB,aAAa,UAAU,cAAc;AAAA,EACxH;AACA,MAAI,YAAY,KAAK,EAAE,GAAG;AACxB,WAAO,EAAE,OAAO,MAAM,SAAS,YAAY,YAAY,QAAQ,iBAAiB,aAAa,UAAU,cAAc;AAAA,EACvH;AAGA,MAAI,GAAG,KAAK,EAAE,SAAS,IAAI;AACzB,WAAO,EAAE,OAAO,MAAM,SAAS,MAAM,YAAY,OAAO,iBAAiB,aAAa,UAAU,cAAc;AAAA,EAChH;AAGA,QAAM,gBAAgB,CAAC,CAAC,QAAQ,iBAAiB;AACjD,QAAM,oBAAoB,CAAC,CAAC,QAAQ,iBAAiB;AACrD,MAAI,CAAC,iBAAiB,CAAC,mBAAmB;AACxC,WAAO,EAAE,OAAO,MAAM,SAAS,MAAM,YAAY,OAAO,iBAAiB,aAAa,UAAU,cAAc;AAAA,EAChH;AAEA,SAAO;AACT;AAEA,SAAS,eAAe,IAAqB;AAC3C,SACE,4EAA4E,KAAK,EAAE,KACnF,CAAC,oCAAoC,KAAK,EAAE;AAEhD;AAjGA;AAAA;AAAA;AACA;AAAA;AAAA;;;ACCA,SAAS,YAAY;AACnB,MAAI,MAAO,QAAO;AAClB,MAAI;AACF,YAAQ,UAAQ,YAAY;AAAA,EAC9B,QAAQ;AACN,YAAQ;AAAA,EACV;AACA,SAAO;AACT;AAGO,SAAS,WAAW,IAA2B;AACpD,MAAI,CAAC,MAAM,OAAO,aAAa,OAAO,eAAe,GAAG,WAAW,IAAI,EAAG,QAAO;AACjF,QAAM,MAAM,UAAU;AACtB,MAAI,CAAC,IAAK,QAAO;AACjB,MAAI;AACF,UAAM,SAAS,IAAI,OAAO,EAAE;AAC5B,WAAO,QAAQ,WAAW;AAAA,EAC5B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAvBA,IAAI;AAAJ;AAAA;AAAA;AAAA,IAAI,QAA4C;AAAA;AAAA;;;ACAhD;AAAA;AAAA;AAAA;AAAA,OAAO,QAAQ;AACf,OAAO,UAAU;AADjB,IAsBa;AAtBb;AAAA;AAAA;AAGA;AACA;AAkBO,IAAM,gBAAN,MAAM,cAAa;AAAA,MAIhB,YAAY,SAAiB;AACnC,aAAK,UAAU;AAAA,MACjB;AAAA,MAEA,OAAO,YAAY,UAAU,QAAQ,IAAI,eAAe,QAAsB;AAC5E,YAAI,CAAC,cAAa,UAAU;AAC1B,wBAAa,WAAW,IAAI,cAAa,OAAO;AAAA,QAClD;AACA,eAAO,cAAa;AAAA,MACtB;AAAA,MAEA,OAAO,KAAkB,OAA4E,CAAC,GAAS;AAC7G,cAAM,KAAK,IAAI,QAAQ,IAAI,YAAY,KAAK;AAC5C,cAAM,SAAS,UAAU,EAAE,WAAW,IAAI,SAAS,OAAO,YAAY,IAAI,OAAO,EAAE,CAAC;AAEpF,YAAI,CAAC,OAAO,MAAO;AAEnB,cAAM,KAAK,IAAI,QAAQ,IAAI,iBAAiB,GAAG,MAAM,GAAG,EAAE,CAAC,GAAG,KAAK,KAC9D,IAAI,QAAQ,IAAI,WAAW,KAC3B;AAEL,cAAM,SAAsB;AAAA,UAC1B,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,UAClC,UAAU,OAAO;AAAA,UACjB,cAAc,OAAO;AAAA,UACrB,kBAAkB,OAAO;AAAA,UACzB,YAAY,OAAO;AAAA,UACnB,KAAK,IAAI,QAAQ;AAAA,UACjB;AAAA,UACA,SAAS,WAAW,EAAE;AAAA,UACtB,YAAY;AAAA,UACZ,SAAS,IAAI,QAAQ,IAAI,SAAS;AAAA,UAClC,aAAa,KAAK,cAAc;AAAA,UAChC,WAAW,KAAK,YAAY;AAAA,UAC5B,gBAAgB,KAAK,iBAAiB;AAAA,QACxC;AAEA,aAAK,OAAO,mBAAmB,MAAM;AAAA,MACvC;AAAA,MAEQ,OAAO,UAAkB,QAA2B;AAC1D,YAAI;AACF,gBAAM,WAAW,KAAK,KAAK,KAAK,SAAS,QAAQ;AACjD,aAAG,UAAU,KAAK,SAAS,EAAE,WAAW,KAAK,CAAC;AAC9C,aAAG,eAAe,UAAU,KAAK,UAAU,MAAM,IAAI,MAAM,OAAO;AAAA,QACpE,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF;AApDE,IADW,cACI,WAAgC;AAD1C,IAAM,eAAN;AAAA;AAAA;;;ACGA,IAAM,gBAA+C;AAAA,EAC1D,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,eAAe,CAAC;AAAA,EAChB,MAAM,EAAE,WAAW,CAAC,GAAG,WAAW,CAAC,EAAE;AAAA,EACrC,OAAO,EAAE,KAAK,MAAM,kBAAkB,IAAI;AAC5C;AAEO,SAAS,cAAc,UAA+B,CAAC,GAAkC;AAC9F,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAG;AAAA,IACH,MAAM,EAAE,GAAG,cAAc,MAAM,GAAG,QAAQ,KAAK;AAAA,IAC/C,OAAO,EAAE,GAAG,cAAc,OAAO,GAAG,QAAQ,MAAM;AAAA,IAClD,eAAe,EAAE,GAAG,cAAc,eAAe,GAAG,QAAQ,cAAc;AAAA,EAC5E;AACF;;;ACjCO,SAAS,kBACd,UAA+B,CAAC,GAChC,aAAyB,CAAC,GACd;AACZ,QAAM,SAAS,cAAc,OAAO;AAEpC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,MAAM,UAAU;AACd,YAAM,WAAW,MAAM,WAAW,UAAU,KAAK,CAAC;AAClD,aAAO;AAAA,QACL,GAAG;AAAA,QACH;AAAA,UACE,QAAQ;AAAA,UACR,SAAS,CAAC,EAAE,KAAK,gBAAgB,OAAO,+BAA+B,CAAC;AAAA,QAC1E;AAAA,QACA;AAAA,UACE,QAAQ;AAAA,UACR,SAAS,CAAC,EAAE,KAAK,gBAAgB,OAAO,4BAA4B,CAAC;AAAA,QACvE;AAAA,QACA;AAAA,UACE,QAAQ;AAAA,UACR,SAAS,CAAC,EAAE,KAAK,gBAAgB,OAAO,+BAA+B,CAAC;AAAA,QAC1E;AAAA,MACF;AAAA,IACF;AAAA,IACA,KAAK;AAAA,MACH,GAAG,WAAW;AAAA,MACd,gBAAgB,OAAO;AAAA,MACvB,aAAa,OAAO;AAAA,MACpB,sBAAsB,OAAO,OAAO,SAAS;AAAA,IAC/C;AAAA,EACF;AACF;;;AC3CA,SAAS,oBAAsC;;;ACE/C,OAAO,YAAY;AAsJZ,SAAS,cAAc,OAAwB;AACpD,QAAM,UAAU,MAAM,YAAY,GAAG;AACrC,MAAI,YAAY,GAAI,QAAO;AAC3B,QAAM,UAAU,MAAM,MAAM,GAAG,OAAO;AACtC,QAAM,MAAM,MAAM,MAAM,UAAU,CAAC;AACnC,QAAM,WAAW,OAAO,WAAW,UAAU,QAAQ,IAAI,yBAAyB,SAAS,EACxF,OAAO,OAAO,EAAE,OAAO,KAAK;AAE/B,MAAI,IAAI,WAAW,SAAS,OAAQ,QAAO;AAC3C,SAAO,OAAO,gBAAgB,OAAO,KAAK,KAAK,KAAK,GAAG,OAAO,KAAK,UAAU,KAAK,CAAC;AACrF;;;AD/JA,IAAM,cAAc;AACpB,IAAM,eAAe;AAgBrB,eAAsB,wBAAwB,KAAyC;AACrF,QAAM,EAAE,SAAS,IAAI,IAAI;AACzB,QAAM,SAAS,IAAI,QAAQ,IAAI,QAAQ,KAAK;AAG5C,MAAI,SAAS,WAAW,iBAAiB,KAAK,CAAC,SAAS,WAAW,uBAAuB,GAAG;AAC3F,UAAM,UAAU,IAAI,QAAQ,IAAI,WAAW,GAAG;AAC9C,QAAI,CAAC,WAAW,CAAC,cAAc,OAAO,GAAG;AACvC,YAAM,WAAW,IAAI,QAAQ,MAAM;AACnC,eAAS,WAAW;AACpB,aAAO,aAAa,SAAS,QAAQ;AAAA,IACvC;AAAA,EACF;AAGA,MAAI,aAAa,2BAA2B,IAAI,QAAQ,aAAa,IAAI,OAAO,MAAM,KAAK;AACzF,UAAM,cAAc,IAAI,QAAQ,IAAI,YAAY,GAAG;AACnD,UAAM,gBAAgB,IAAI,QAAQ,IAAI,WAAW,GAAG;AAEpD,SAAK,CAAC,eAAe,CAAC,cAAc,WAAW,OAAO,CAAC,iBAAiB,CAAC,cAAc,aAAa,IAAI;AACtG,YAAM,WAAW,IAAI,QAAQ,MAAM;AACnC,eAAS,WAAW;AACpB,eAAS,SAAS;AAClB,aAAO,aAAa,SAAS,QAAQ;AAAA,IACvC;AAAA,EACF;AAGA,MAAI,aAAa,yBAAyB;AACxC,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW;AACf,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,SAAS,SAAS,KAAK,GAAG;AAC5B,UAAM,OAAO,SAAS,MAAM,GAAG,EAAE;AACjC,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW,+BAA+B,IAAI;AAClD,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,OAAO,SAAS,eAAe,GAAG;AACpC,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW,+BAA+B,QAAQ;AACtD,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,SAAS,WAAW,MAAM,GAAG;AAC/B,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW,0BAA0B,SAAS,MAAM,CAAC,CAAC;AAC1D,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,aAAa,aAAa;AAC5B,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW;AACf,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,aAAa,mBAAmB;AAClC,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW;AACf,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAEA,QAAM,WAAW,aAAa,KAAK;AAGnC,kBAAgB,GAAG;AAEnB,SAAO;AACT;AAEA,SAAS,gBAAgB,KAAwB;AAG/C,OAAK,4EAAwC,KAAK,CAAC,EAAE,cAAAA,cAAa,MAAM;AACtE,IAAAA,cAAa,YAAY,EAAE,OAAO,GAAG;AAAA,EACvC,CAAC,EAAE,MAAM,MAAM;AAAA,EAAkC,CAAC;AACpD;;;AEjGA;","names":["VisitTracker"]}
|
|
1
|
+
{"version":3,"sources":["../src/core/config.ts","../src/core/with-third-audience.ts","../src/core/middleware.ts","../src/detection/known-patterns.ts","../src/detection/bot-detection-pipeline.ts"],"sourcesContent":["export interface ThirdAudienceConfig {\n /** Directory containing .mdx files, relative to project root. Default: 'content' */\n contentDir?: string\n /** Directory for JSONL data files. Default: 'data' */\n dataDir?: string\n /** Mount the /third-audience/ dashboard. Default: true */\n dashboard?: boolean\n /** Secret for dashboard access (HTTP Basic or bearer). Required when dashboard: true */\n dashboardSecret?: string\n notifications?: {\n email?: { smtp: string; to: string; from?: string }\n slack?: { webhookUrl: string }\n }\n bots?: {\n allowlist?: string[]\n blocklist?: string[]\n }\n cache?: {\n /** Cache TTL in seconds. Default: 3600 */\n ttl?: number\n /** Max in-memory entries. Default: 500 */\n maxMemoryEntries?: number\n }\n}\n\nexport const defaultConfig: Required<ThirdAudienceConfig> = {\n contentDir: 'content',\n dataDir: 'data',\n dashboard: true,\n dashboardSecret: '',\n notifications: {},\n bots: { allowlist: [], blocklist: [] },\n cache: { ttl: 3600, maxMemoryEntries: 500 },\n}\n\nexport function resolveConfig(partial: ThirdAudienceConfig = {}): Required<ThirdAudienceConfig> {\n return {\n ...defaultConfig,\n ...partial,\n bots: { ...defaultConfig.bots, ...partial.bots },\n cache: { ...defaultConfig.cache, ...partial.cache },\n notifications: { ...defaultConfig.notifications, ...partial.notifications },\n }\n}\n","import type { NextConfig } from 'next'\nimport { resolveConfig, type ThirdAudienceConfig } from './config.js'\n\n/**\n * Wraps next.config.ts to inject Third Audience rewrites and headers.\n *\n * Usage:\n * import { withThirdAudience } from 'third-audience-mdx'\n * export default withThirdAudience({ contentDir: 'content' })\n */\nexport function withThirdAudience(\n options: ThirdAudienceConfig = {},\n nextConfig: NextConfig = {}\n): NextConfig {\n const config = resolveConfig(options)\n\n return {\n ...nextConfig,\n async headers() {\n const existing = await nextConfig.headers?.() ?? []\n return [\n ...existing,\n {\n source: '/:path*.md',\n headers: [{ key: 'Content-Type', value: 'text/markdown; charset=utf-8' }],\n },\n {\n source: '/llms.txt',\n headers: [{ key: 'Content-Type', value: 'text/plain; charset=utf-8' }],\n },\n {\n source: '/okf/:path*',\n headers: [{ key: 'Content-Type', value: 'text/markdown; charset=utf-8' }],\n },\n ]\n },\n env: {\n ...nextConfig.env,\n TA_CONTENT_DIR: config.contentDir,\n TA_DATA_DIR: config.dataDir,\n TA_DASHBOARD_ENABLED: String(config.dashboard),\n },\n }\n}\n","import { NextResponse, type NextRequest } from 'next/server'\n\nconst COOKIE_NAME = 'ta_session'\nconst RESET_COOKIE = 'ta_session_reset'\n\n/**\n * Third Audience middleware — Edge-runtime compatible (no Node.js crypto).\n *\n * Auth guard uses cookie presence only; HMAC verification happens in the\n * route handler (Node.js runtime) where crypto is available.\n *\n * Wire up in middleware.ts:\n * export { thirdAudienceMiddleware as middleware } from 'third-audience-mdx'\n * export const config = { matcher: ['/((?!_next|api).*)'] }\n */\nexport function thirdAudienceMiddleware(req: NextRequest): NextResponse | null {\n const { pathname } = req.nextUrl\n const accept = req.headers.get('accept') ?? ''\n\n // Dashboard auth guard — cookie presence check (HMAC verified in route handler)\n if (pathname.startsWith('/third-audience') && !pathname.startsWith('/third-audience/login')) {\n const session = req.cookies.get(COOKIE_NAME)?.value\n if (!session) {\n const loginUrl = req.nextUrl.clone()\n loginUrl.pathname = '/third-audience/login'\n return NextResponse.redirect(loginUrl)\n }\n }\n\n // Password reset guard — /third-audience/login?reset=1 requires reset or session cookie\n if (pathname === '/third-audience/login' && req.nextUrl.searchParams.get('reset') === '1') {\n const resetCookie = req.cookies.get(RESET_COOKIE)?.value\n const sessionCookie = req.cookies.get(COOKIE_NAME)?.value\n if (!resetCookie && !sessionCookie) {\n const loginUrl = req.nextUrl.clone()\n loginUrl.pathname = '/third-audience/login'\n loginUrl.search = ''\n return NextResponse.redirect(loginUrl)\n }\n }\n\n // /third-audience/login → rewrite to login route handler (GET/POST)\n if (pathname === '/third-audience/login') {\n const url = req.nextUrl.clone()\n url.pathname = '/api/third-audience/login'\n return NextResponse.rewrite(url)\n }\n\n // .md URL → rewrite to our internal markdown route handler\n if (pathname.endsWith('.md')) {\n const slug = pathname.slice(0, -3) // strip .md\n const url = req.nextUrl.clone()\n url.pathname = `/api/third-audience/markdown${slug}`\n return NextResponse.rewrite(url)\n }\n\n // Accept: text/markdown header → rewrite to markdown route\n if (accept.includes('text/markdown')) {\n const url = req.nextUrl.clone()\n url.pathname = `/api/third-audience/markdown${pathname}`\n return NextResponse.rewrite(url)\n }\n\n // /okf or /okf/* → rewrite to OKF bundle handler\n // [...path] catch-all requires at least one segment, so /okf → /okf/index\n if (pathname.startsWith('/okf')) {\n const url = req.nextUrl.clone()\n const rest = pathname.slice(4) // '' or '/something'\n url.pathname = `/api/third-audience/okf${rest || '/index'}`\n return NextResponse.rewrite(url)\n }\n\n // /llms.txt → rewrite to discovery handler\n if (pathname === '/llms.txt') {\n const url = req.nextUrl.clone()\n url.pathname = '/api/third-audience/llms-txt'\n return NextResponse.rewrite(url)\n }\n\n // /sitemap-ai.xml → rewrite to AI sitemap handler\n if (pathname === '/sitemap-ai.xml') {\n const url = req.nextUrl.clone()\n url.pathname = '/api/third-audience/sitemap-ai'\n return NextResponse.rewrite(url)\n }\n\n // No match — let the caller's middleware chain continue\n return null\n}\n\n","/** Known AI crawler and search engine user-agent patterns. */\nexport interface KnownBot {\n name: string\n category: 'ai_crawler' | 'search_engine'\n patterns: RegExp[]\n}\n\nexport const KNOWN_BOTS: KnownBot[] = [\n // AI Crawlers\n { name: 'ClaudeBot', category: 'ai_crawler', patterns: [/claudebot/i, /claude-web/i] },\n { name: 'GPTBot', category: 'ai_crawler', patterns: [/gptbot/i] },\n { name: 'ChatGPT-User', category: 'ai_crawler', patterns: [/chatgpt-user/i] },\n { name: 'PerplexityBot', category: 'ai_crawler', patterns: [/perplexitybot/i] },\n { name: 'Googlebot-AI', category: 'ai_crawler', patterns: [/google-extended/i, /googleother/i] },\n { name: 'FacebookBot', category: 'ai_crawler', patterns: [/facebookbot/i] },\n { name: 'Applebot-Extended',category: 'ai_crawler', patterns: [/applebot-extended/i] },\n { name: 'YouBot', category: 'ai_crawler', patterns: [/youbot/i] },\n { name: 'CCBot', category: 'ai_crawler', patterns: [/ccbot/i] },\n { name: 'CohereCrawler', category: 'ai_crawler', patterns: [/cohere-ai/i] },\n { name: 'AI2Bot', category: 'ai_crawler', patterns: [/ai2bot/i] },\n { name: 'Bytespider', category: 'ai_crawler', patterns: [/bytespider/i] },\n { name: 'Diffbot', category: 'ai_crawler', patterns: [/diffbot/i] },\n\n // Search Engines\n { name: 'Googlebot', category: 'search_engine', patterns: [/googlebot/i] },\n { name: 'Bingbot', category: 'search_engine', patterns: [/bingbot/i, /msnbot/i] },\n { name: 'DuckDuckBot', category: 'search_engine', patterns: [/duckduckbot/i] },\n { name: 'Baiduspider', category: 'search_engine', patterns: [/baiduspider/i] },\n { name: 'YandexBot', category: 'search_engine', patterns: [/yandexbot/i] },\n { name: 'Sogou', category: 'search_engine', patterns: [/sogou/i] },\n { name: 'Exabot', category: 'search_engine', patterns: [/exabot/i] },\n { name: 'ia_archiver', category: 'search_engine', patterns: [/ia_archiver/i] },\n]\n","import type { BotDetectionResult } from './bot-detection-result.js'\nimport { KNOWN_BOTS } from './known-patterns.js'\n\nexport interface DetectBotInput {\n userAgent: string\n /** Optional: headers map for heuristic checks */\n headers?: Record<string, string | string[] | undefined>\n /** Optional: IP address */\n ip?: string\n}\n\n/**\n * Three-layer bot detection pipeline:\n * 1. Known pattern matching (O(n) UA string match)\n * 2. Heuristic signals (missing headers, headless indicators)\n * 3. Auto-learner flag (unknown UAs that behave bot-like)\n */\nexport function detectBot(input: DetectBotInput): BotDetectionResult {\n const ua = input.userAgent ?? ''\n\n // Layer 1: known pattern match\n for (const bot of KNOWN_BOTS) {\n for (const pattern of bot.patterns) {\n if (pattern.test(ua)) {\n return {\n isBot: true,\n botName: bot.name,\n confidence: 'high',\n detectionMethod: 'known_pattern',\n category: bot.category,\n rawUserAgent: ua,\n }\n }\n }\n }\n\n // Layer 2: heuristics\n const heuristicResult = checkHeuristics(ua, input.headers ?? {})\n if (heuristicResult) return { ...heuristicResult, rawUserAgent: ua }\n\n // Layer 3: auto-learner — flag suspicious unknown UAs for review\n if (looksLikeBotUa(ua)) {\n return {\n isBot: true,\n botName: null,\n confidence: 'low',\n detectionMethod: 'auto_learned',\n category: 'unknown_bot',\n rawUserAgent: ua,\n }\n }\n\n return {\n isBot: false,\n botName: null,\n confidence: 'high',\n detectionMethod: 'none',\n category: 'human',\n rawUserAgent: ua,\n }\n}\n\nfunction checkHeuristics(\n ua: string,\n headers: Record<string, string | string[] | undefined>\n): Omit<BotDetectionResult, 'rawUserAgent'> | null {\n // Headless Chrome signals\n if (/headlesschrome/i.test(ua)) {\n return { isBot: true, botName: 'HeadlessChrome', confidence: 'medium', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n if (/phantomjs/i.test(ua)) {\n return { isBot: true, botName: 'PhantomJS', confidence: 'high', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n if (/selenium/i.test(ua)) {\n return { isBot: true, botName: 'Selenium', confidence: 'high', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n\n // Empty or very short UA is suspicious\n if (ua.trim().length < 10) {\n return { isBot: true, botName: null, confidence: 'low', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n\n // Missing typical browser headers\n const hasAcceptLang = !!headers['accept-language']\n const hasAcceptEncoding = !!headers['accept-encoding']\n if (!hasAcceptLang && !hasAcceptEncoding) {\n return { isBot: true, botName: null, confidence: 'low', detectionMethod: 'heuristic', category: 'unknown_bot' }\n }\n\n return null\n}\n\nfunction looksLikeBotUa(ua: string): boolean {\n return (\n /bot|crawler|spider|scraper|fetch|http|python|curl|java|ruby|go-http|node/i.test(ua) &&\n !/chrome|firefox|safari|edge|opera/i.test(ua)\n )\n}\n"],"mappings":";AAyBO,IAAM,gBAA+C;AAAA,EAC1D,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,eAAe,CAAC;AAAA,EAChB,MAAM,EAAE,WAAW,CAAC,GAAG,WAAW,CAAC,EAAE;AAAA,EACrC,OAAO,EAAE,KAAK,MAAM,kBAAkB,IAAI;AAC5C;AAEO,SAAS,cAAc,UAA+B,CAAC,GAAkC;AAC9F,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAG;AAAA,IACH,MAAM,EAAE,GAAG,cAAc,MAAM,GAAG,QAAQ,KAAK;AAAA,IAC/C,OAAO,EAAE,GAAG,cAAc,OAAO,GAAG,QAAQ,MAAM;AAAA,IAClD,eAAe,EAAE,GAAG,cAAc,eAAe,GAAG,QAAQ,cAAc;AAAA,EAC5E;AACF;;;ACjCO,SAAS,kBACd,UAA+B,CAAC,GAChC,aAAyB,CAAC,GACd;AACZ,QAAM,SAAS,cAAc,OAAO;AAEpC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,MAAM,UAAU;AACd,YAAM,WAAW,MAAM,WAAW,UAAU,KAAK,CAAC;AAClD,aAAO;AAAA,QACL,GAAG;AAAA,QACH;AAAA,UACE,QAAQ;AAAA,UACR,SAAS,CAAC,EAAE,KAAK,gBAAgB,OAAO,+BAA+B,CAAC;AAAA,QAC1E;AAAA,QACA;AAAA,UACE,QAAQ;AAAA,UACR,SAAS,CAAC,EAAE,KAAK,gBAAgB,OAAO,4BAA4B,CAAC;AAAA,QACvE;AAAA,QACA;AAAA,UACE,QAAQ;AAAA,UACR,SAAS,CAAC,EAAE,KAAK,gBAAgB,OAAO,+BAA+B,CAAC;AAAA,QAC1E;AAAA,MACF;AAAA,IACF;AAAA,IACA,KAAK;AAAA,MACH,GAAG,WAAW;AAAA,MACd,gBAAgB,OAAO;AAAA,MACvB,aAAa,OAAO;AAAA,MACpB,sBAAsB,OAAO,OAAO,SAAS;AAAA,IAC/C;AAAA,EACF;AACF;;;AC3CA,SAAS,oBAAsC;AAE/C,IAAM,cAAc;AACpB,IAAM,eAAe;AAYd,SAAS,wBAAwB,KAAuC;AAC7E,QAAM,EAAE,SAAS,IAAI,IAAI;AACzB,QAAM,SAAS,IAAI,QAAQ,IAAI,QAAQ,KAAK;AAG5C,MAAI,SAAS,WAAW,iBAAiB,KAAK,CAAC,SAAS,WAAW,uBAAuB,GAAG;AAC3F,UAAM,UAAU,IAAI,QAAQ,IAAI,WAAW,GAAG;AAC9C,QAAI,CAAC,SAAS;AACZ,YAAM,WAAW,IAAI,QAAQ,MAAM;AACnC,eAAS,WAAW;AACpB,aAAO,aAAa,SAAS,QAAQ;AAAA,IACvC;AAAA,EACF;AAGA,MAAI,aAAa,2BAA2B,IAAI,QAAQ,aAAa,IAAI,OAAO,MAAM,KAAK;AACzF,UAAM,cAAc,IAAI,QAAQ,IAAI,YAAY,GAAG;AACnD,UAAM,gBAAgB,IAAI,QAAQ,IAAI,WAAW,GAAG;AACpD,QAAI,CAAC,eAAe,CAAC,eAAe;AAClC,YAAM,WAAW,IAAI,QAAQ,MAAM;AACnC,eAAS,WAAW;AACpB,eAAS,SAAS;AAClB,aAAO,aAAa,SAAS,QAAQ;AAAA,IACvC;AAAA,EACF;AAGA,MAAI,aAAa,yBAAyB;AACxC,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW;AACf,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,SAAS,SAAS,KAAK,GAAG;AAC5B,UAAM,OAAO,SAAS,MAAM,GAAG,EAAE;AACjC,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW,+BAA+B,IAAI;AAClD,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,OAAO,SAAS,eAAe,GAAG;AACpC,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW,+BAA+B,QAAQ;AACtD,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAIA,MAAI,SAAS,WAAW,MAAM,GAAG;AAC/B,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,UAAM,OAAO,SAAS,MAAM,CAAC;AAC7B,QAAI,WAAW,0BAA0B,QAAQ,QAAQ;AACzD,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,aAAa,aAAa;AAC5B,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW;AACf,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,MAAI,aAAa,mBAAmB;AAClC,UAAM,MAAM,IAAI,QAAQ,MAAM;AAC9B,QAAI,WAAW;AACf,WAAO,aAAa,QAAQ,GAAG;AAAA,EACjC;AAGA,SAAO;AACT;;;ACjFO,IAAM,aAAyB;AAAA;AAAA,EAEpC,EAAE,MAAM,aAAoB,UAAU,cAAiB,UAAU,CAAC,cAAc,aAAa,EAAE;AAAA,EAC/F,EAAE,MAAM,UAAoB,UAAU,cAAiB,UAAU,CAAC,SAAS,EAAE;AAAA,EAC7E,EAAE,MAAM,gBAAoB,UAAU,cAAiB,UAAU,CAAC,eAAe,EAAE;AAAA,EACnF,EAAE,MAAM,iBAAoB,UAAU,cAAiB,UAAU,CAAC,gBAAgB,EAAE;AAAA,EACpF,EAAE,MAAM,gBAAoB,UAAU,cAAiB,UAAU,CAAC,oBAAoB,cAAc,EAAE;AAAA,EACtG,EAAE,MAAM,eAAoB,UAAU,cAAiB,UAAU,CAAC,cAAc,EAAE;AAAA,EAClF,EAAE,MAAM,qBAAoB,UAAU,cAAiB,UAAU,CAAC,oBAAoB,EAAE;AAAA,EACxF,EAAE,MAAM,UAAoB,UAAU,cAAiB,UAAU,CAAC,SAAS,EAAE;AAAA,EAC7E,EAAE,MAAM,SAAoB,UAAU,cAAiB,UAAU,CAAC,QAAQ,EAAE;AAAA,EAC5E,EAAE,MAAM,iBAAoB,UAAU,cAAiB,UAAU,CAAC,YAAY,EAAE;AAAA,EAChF,EAAE,MAAM,UAAoB,UAAU,cAAiB,UAAU,CAAC,SAAS,EAAE;AAAA,EAC7E,EAAE,MAAM,cAAoB,UAAU,cAAiB,UAAU,CAAC,aAAa,EAAE;AAAA,EACjF,EAAE,MAAM,WAAoB,UAAU,cAAiB,UAAU,CAAC,UAAU,EAAE;AAAA;AAAA,EAG9E,EAAE,MAAM,aAAoB,UAAU,iBAAiB,UAAU,CAAC,YAAY,EAAE;AAAA,EAChF,EAAE,MAAM,WAAoB,UAAU,iBAAiB,UAAU,CAAC,YAAY,SAAS,EAAE;AAAA,EACzF,EAAE,MAAM,eAAoB,UAAU,iBAAiB,UAAU,CAAC,cAAc,EAAE;AAAA,EAClF,EAAE,MAAM,eAAoB,UAAU,iBAAiB,UAAU,CAAC,cAAc,EAAE;AAAA,EAClF,EAAE,MAAM,aAAoB,UAAU,iBAAiB,UAAU,CAAC,YAAY,EAAE;AAAA,EAChF,EAAE,MAAM,SAAoB,UAAU,iBAAiB,UAAU,CAAC,QAAQ,EAAE;AAAA,EAC5E,EAAE,MAAM,UAAoB,UAAU,iBAAiB,UAAU,CAAC,SAAS,EAAE;AAAA,EAC7E,EAAE,MAAM,eAAoB,UAAU,iBAAiB,UAAU,CAAC,cAAc,EAAE;AACpF;;;ACfO,SAAS,UAAU,OAA2C;AACnE,QAAM,KAAK,MAAM,aAAa;AAG9B,aAAW,OAAO,YAAY;AAC5B,eAAW,WAAW,IAAI,UAAU;AAClC,UAAI,QAAQ,KAAK,EAAE,GAAG;AACpB,eAAO;AAAA,UACL,OAAO;AAAA,UACP,SAAS,IAAI;AAAA,UACb,YAAY;AAAA,UACZ,iBAAiB;AAAA,UACjB,UAAU,IAAI;AAAA,UACd,cAAc;AAAA,QAChB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,kBAAkB,gBAAgB,IAAI,MAAM,WAAW,CAAC,CAAC;AAC/D,MAAI,gBAAiB,QAAO,EAAE,GAAG,iBAAiB,cAAc,GAAG;AAGnE,MAAI,eAAe,EAAE,GAAG;AACtB,WAAO;AAAA,MACL,OAAO;AAAA,MACP,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,iBAAiB;AAAA,MACjB,UAAU;AAAA,MACV,cAAc;AAAA,IAChB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO;AAAA,IACP,SAAS;AAAA,IACT,YAAY;AAAA,IACZ,iBAAiB;AAAA,IACjB,UAAU;AAAA,IACV,cAAc;AAAA,EAChB;AACF;AAEA,SAAS,gBACP,IACA,SACiD;AAEjD,MAAI,kBAAkB,KAAK,EAAE,GAAG;AAC9B,WAAO,EAAE,OAAO,MAAM,SAAS,kBAAkB,YAAY,UAAU,iBAAiB,aAAa,UAAU,cAAc;AAAA,EAC/H;AACA,MAAI,aAAa,KAAK,EAAE,GAAG;AACzB,WAAO,EAAE,OAAO,MAAM,SAAS,aAAa,YAAY,QAAQ,iBAAiB,aAAa,UAAU,cAAc;AAAA,EACxH;AACA,MAAI,YAAY,KAAK,EAAE,GAAG;AACxB,WAAO,EAAE,OAAO,MAAM,SAAS,YAAY,YAAY,QAAQ,iBAAiB,aAAa,UAAU,cAAc;AAAA,EACvH;AAGA,MAAI,GAAG,KAAK,EAAE,SAAS,IAAI;AACzB,WAAO,EAAE,OAAO,MAAM,SAAS,MAAM,YAAY,OAAO,iBAAiB,aAAa,UAAU,cAAc;AAAA,EAChH;AAGA,QAAM,gBAAgB,CAAC,CAAC,QAAQ,iBAAiB;AACjD,QAAM,oBAAoB,CAAC,CAAC,QAAQ,iBAAiB;AACrD,MAAI,CAAC,iBAAiB,CAAC,mBAAmB;AACxC,WAAO,EAAE,OAAO,MAAM,SAAS,MAAM,YAAY,OAAO,iBAAiB,aAAa,UAAU,cAAc;AAAA,EAChH;AAEA,SAAO;AACT;AAEA,SAAS,eAAe,IAAqB;AAC3C,SACE,4EAA4E,KAAK,EAAE,KACnF,CAAC,oCAAoC,KAAK,EAAE;AAEhD;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "third-audience-mdx",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.3",
|
|
4
4
|
"description": "Serve AI-optimized Markdown to LLM crawlers from MDX content sites. Track bot visits, citations, and AI discoverability.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -91,6 +91,16 @@
|
|
|
91
91
|
"import": "./dist/dashboard/ui/pages/SystemHealthPage.mjs",
|
|
92
92
|
"require": "./dist/dashboard/ui/pages/SystemHealthPage.js",
|
|
93
93
|
"types": "./dist/dashboard/ui/pages/SystemHealthPage.d.ts"
|
|
94
|
+
},
|
|
95
|
+
"./dashboard/ui/pages/OkfPage": {
|
|
96
|
+
"import": "./dist/dashboard/ui/pages/OkfPage.mjs",
|
|
97
|
+
"require": "./dist/dashboard/ui/pages/OkfPage.js",
|
|
98
|
+
"types": "./dist/dashboard/ui/pages/OkfPage.d.ts"
|
|
99
|
+
},
|
|
100
|
+
"./routes/okf-graph": {
|
|
101
|
+
"import": "./dist/dashboard/routes/okf-graph-route.mjs",
|
|
102
|
+
"require": "./dist/dashboard/routes/okf-graph-route.js",
|
|
103
|
+
"types": "./dist/dashboard/routes/okf-graph-route.d.ts"
|
|
94
104
|
}
|
|
95
105
|
},
|
|
96
106
|
"bin": {
|
|
@@ -103,7 +113,15 @@
|
|
|
103
113
|
"test": "vitest run",
|
|
104
114
|
"test:watch": "vitest"
|
|
105
115
|
},
|
|
106
|
-
"keywords": [
|
|
116
|
+
"keywords": [
|
|
117
|
+
"mdx",
|
|
118
|
+
"ai",
|
|
119
|
+
"llm",
|
|
120
|
+
"markdown",
|
|
121
|
+
"next.js",
|
|
122
|
+
"crawler",
|
|
123
|
+
"analytics"
|
|
124
|
+
],
|
|
107
125
|
"license": "GPL-2.0-or-later",
|
|
108
126
|
"peerDependencies": {
|
|
109
127
|
"next": ">=13.0.0",
|