@morphika/andami 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/(site)/[slug]/page.tsx +3 -2
- package/app/(site)/page.tsx +3 -2
- package/app/(site)/work/[slug]/page.tsx +3 -3
- package/app/robots.ts +38 -1
- package/lib/bot-guard.ts +138 -0
- package/package.json +2 -1
|
@@ -10,8 +10,9 @@ import { assetUrl } from "../../../lib/assets";
|
|
|
10
10
|
|
|
11
11
|
const cfg = getSiteConfig();
|
|
12
12
|
|
|
13
|
-
// ISR: cache for
|
|
14
|
-
|
|
13
|
+
// ISR: cache for 24 hours. Content changes are rare; admin can trigger
|
|
14
|
+
// on-demand revalidation via /api/admin/revalidate after edits.
|
|
15
|
+
export const revalidate = 86400;
|
|
15
16
|
|
|
16
17
|
interface PageProps {
|
|
17
18
|
params: Promise<{ slug: string }>;
|
package/app/(site)/page.tsx
CHANGED
|
@@ -4,8 +4,9 @@ import type { Page } from "../../lib/sanity/types";
|
|
|
4
4
|
import { PageRenderer } from "../../components/blocks";
|
|
5
5
|
import { getSiteConfig } from "../../lib/config";
|
|
6
6
|
|
|
7
|
-
// ISR: cache for
|
|
8
|
-
|
|
7
|
+
// ISR: cache for 24 hours. Content changes are rare; admin can trigger
|
|
8
|
+
// on-demand revalidation via /api/admin/revalidate after edits.
|
|
9
|
+
export const revalidate = 86400;
|
|
9
10
|
|
|
10
11
|
async function getHomePage(): Promise<Page | null> {
|
|
11
12
|
try {
|
|
@@ -10,9 +10,9 @@ import { assetUrl } from "../../../../lib/assets";
|
|
|
10
10
|
|
|
11
11
|
const cfg = getSiteConfig();
|
|
12
12
|
|
|
13
|
-
// ISR: cache for
|
|
14
|
-
//
|
|
15
|
-
export const revalidate =
|
|
13
|
+
// ISR: cache for 24 hours. Content changes are rare; admin can trigger
|
|
14
|
+
// on-demand revalidation via /api/admin/revalidate after edits.
|
|
15
|
+
export const revalidate = 86400;
|
|
16
16
|
|
|
17
17
|
interface ProjectPageProps {
|
|
18
18
|
params: Promise<{ slug: string }>;
|
package/app/robots.ts
CHANGED
|
@@ -3,13 +3,50 @@ import { getSiteConfig } from "../lib/config";
|
|
|
3
3
|
|
|
4
4
|
const cfg = getSiteConfig();
|
|
5
5
|
|
|
6
|
+
/**
|
|
7
|
+
* robots.txt — Controls crawler access and rate.
|
|
8
|
+
*
|
|
9
|
+
* Crawl-delay (seconds between requests) is honoured by Bing, Yandex, Baidu
|
|
10
|
+
* and most well-behaved bots. Googlebot ignores it but respects the rate
|
|
11
|
+
* configured in Search Console. The 10-second delay drastically reduces
|
|
12
|
+
* serverless CPU usage from bot traffic on Hobby-tier hosting.
|
|
13
|
+
*
|
|
14
|
+
* Aggressive AI scrapers (GPTBot, CCBot, etc.) are blocked entirely.
|
|
15
|
+
*/
|
|
6
16
|
export default function robots(): MetadataRoute.Robots {
|
|
7
17
|
return {
|
|
8
18
|
rules: [
|
|
19
|
+
// Block known AI scrapers / aggressive bots
|
|
20
|
+
{
|
|
21
|
+
userAgent: "GPTBot",
|
|
22
|
+
disallow: ["/"],
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
userAgent: "CCBot",
|
|
26
|
+
disallow: ["/"],
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
userAgent: "anthropic-ai",
|
|
30
|
+
disallow: ["/"],
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
userAgent: "ClaudeBot",
|
|
34
|
+
disallow: ["/"],
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
userAgent: "Bytespider",
|
|
38
|
+
disallow: ["/"],
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
userAgent: "PetalBot",
|
|
42
|
+
disallow: ["/"],
|
|
43
|
+
},
|
|
44
|
+
// Default: allow with crawl delay
|
|
9
45
|
{
|
|
10
46
|
userAgent: "*",
|
|
11
47
|
allow: "/",
|
|
12
|
-
disallow: ["/admin/", "/studio/", "/api/admin/"],
|
|
48
|
+
disallow: ["/admin/", "/studio/", "/api/admin/", "/api/"],
|
|
49
|
+
crawlDelay: 10,
|
|
13
50
|
},
|
|
14
51
|
],
|
|
15
52
|
sitemap: `${cfg.domain}/sitemap.xml`,
|
package/lib/bot-guard.ts
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bot detection & rate-limiting for Edge Middleware.
|
|
3
|
+
*
|
|
4
|
+
* Runs on Vercel's Edge Runtime (cheap) to block aggressive crawlers
|
|
5
|
+
* BEFORE they invoke expensive serverless functions (Fluid Active CPU).
|
|
6
|
+
*
|
|
7
|
+
* Strategy:
|
|
8
|
+
* 1. Block known AI scrapers / aggressive bots by User-Agent
|
|
9
|
+
* 2. Detect bot-like rapid crawling patterns (many unique paths from same IP)
|
|
10
|
+
* 3. Return 429 for rate-limited requests
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { NextRequest, NextResponse } from "next/server";
|
|
14
|
+
|
|
15
|
+
// ── Known aggressive bot User-Agents ────────────────────────────────────
|
|
16
|
+
// These bots ignore robots.txt or crawl too aggressively for Hobby-tier hosting.
|
|
17
|
+
const BLOCKED_BOT_PATTERNS = [
|
|
18
|
+
"GPTBot",
|
|
19
|
+
"CCBot",
|
|
20
|
+
"anthropic-ai",
|
|
21
|
+
"ClaudeBot",
|
|
22
|
+
"Bytespider",
|
|
23
|
+
"PetalBot",
|
|
24
|
+
"Sogou",
|
|
25
|
+
"AhrefsBot",
|
|
26
|
+
"SemrushBot",
|
|
27
|
+
"DotBot",
|
|
28
|
+
"MJ12bot",
|
|
29
|
+
"BLEXBot",
|
|
30
|
+
"DataForSeoBot",
|
|
31
|
+
"serpstatbot",
|
|
32
|
+
"Amazonbot",
|
|
33
|
+
"Barkrowler",
|
|
34
|
+
"YandexBot",
|
|
35
|
+
"MegaIndex",
|
|
36
|
+
"Applebot", // Apple's crawler — not needed for most portfolio sites
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
// ── Simple in-memory rate limiter for Edge ─────────────────────────────
|
|
40
|
+
// Edge functions are short-lived, so this map resets frequently.
|
|
41
|
+
// It won't catch all abuse but will throttle burst patterns within
|
|
42
|
+
// a single Edge instance lifetime (typically several minutes).
|
|
43
|
+
interface RateEntry {
|
|
44
|
+
count: number;
|
|
45
|
+
windowStart: number;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const ipHits = new Map<string, RateEntry>();
|
|
49
|
+
|
|
50
|
+
// Max public page requests per IP per window (generous for humans, tight for bots)
|
|
51
|
+
const PUBLIC_PAGE_LIMIT = 30;
|
|
52
|
+
const WINDOW_MS = 60_000; // 1 minute
|
|
53
|
+
|
|
54
|
+
// Garbage-collect stale entries every 100 checks
|
|
55
|
+
let gcCounter = 0;
|
|
56
|
+
|
|
57
|
+
function isRateLimited(ip: string): boolean {
|
|
58
|
+
const now = Date.now();
|
|
59
|
+
|
|
60
|
+
// Periodic cleanup
|
|
61
|
+
if (++gcCounter >= 100) {
|
|
62
|
+
gcCounter = 0;
|
|
63
|
+
for (const [key, entry] of ipHits) {
|
|
64
|
+
if (now - entry.windowStart > WINDOW_MS * 2) {
|
|
65
|
+
ipHits.delete(key);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const entry = ipHits.get(ip);
|
|
71
|
+
if (!entry || now - entry.windowStart > WINDOW_MS) {
|
|
72
|
+
ipHits.set(ip, { count: 1, windowStart: now });
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
entry.count++;
|
|
77
|
+
return entry.count > PUBLIC_PAGE_LIMIT;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ── Main guard function ────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Call this at the TOP of your middleware, before any other logic.
|
|
84
|
+
* Returns a Response if the request should be blocked, or null to continue.
|
|
85
|
+
*
|
|
86
|
+
* Only applies to public (non-admin) GET requests for pages.
|
|
87
|
+
*/
|
|
88
|
+
export function guardAgainstBots(request: NextRequest): NextResponse | null {
|
|
89
|
+
const { pathname } = request.nextUrl;
|
|
90
|
+
|
|
91
|
+
// Skip admin routes, API routes (except public ones), and static assets
|
|
92
|
+
if (
|
|
93
|
+
pathname.startsWith("/admin") ||
|
|
94
|
+
pathname.startsWith("/api/admin") ||
|
|
95
|
+
pathname.startsWith("/studio") ||
|
|
96
|
+
pathname.startsWith("/_next") ||
|
|
97
|
+
pathname.startsWith("/fonts")
|
|
98
|
+
) {
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Only guard GET requests (mutations already have their own rate limiter)
|
|
103
|
+
if (request.method !== "GET") {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const ua = request.headers.get("user-agent") || "";
|
|
108
|
+
|
|
109
|
+
// 1. Block known aggressive bots
|
|
110
|
+
const isBlockedBot = BLOCKED_BOT_PATTERNS.some(
|
|
111
|
+
(pattern) => ua.includes(pattern)
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
if (isBlockedBot) {
|
|
115
|
+
return new NextResponse("Forbidden", {
|
|
116
|
+
status: 403,
|
|
117
|
+
headers: { "X-Robots-Tag": "noindex, nofollow" },
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// 2. Rate-limit public page requests per IP
|
|
122
|
+
const ip =
|
|
123
|
+
request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ||
|
|
124
|
+
request.headers.get("x-real-ip") ||
|
|
125
|
+
"unknown";
|
|
126
|
+
|
|
127
|
+
if (isRateLimited(ip)) {
|
|
128
|
+
return new NextResponse("Too Many Requests", {
|
|
129
|
+
status: 429,
|
|
130
|
+
headers: {
|
|
131
|
+
"Retry-After": "60",
|
|
132
|
+
"X-Robots-Tag": "noindex, nofollow",
|
|
133
|
+
},
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return null;
|
|
138
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@morphika/andami",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
4
|
"description": "Visual Page Builder — core library. A reusable website builder with visual editing, CMS integration, and asset management.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -91,6 +91,7 @@
|
|
|
91
91
|
"./lib/csrf": "./lib/csrf.ts",
|
|
92
92
|
"./lib/csrf-client": "./lib/csrf-client.ts",
|
|
93
93
|
"./lib/security": "./lib/security.ts",
|
|
94
|
+
"./lib/bot-guard": "./lib/bot-guard.ts",
|
|
94
95
|
"./lib/sanitize": "./lib/sanitize.ts",
|
|
95
96
|
"./lib/logger": "./lib/logger.ts",
|
|
96
97
|
"./lib/audit": "./lib/audit.ts",
|