@casoon/astro-crawler-policy 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/dist/presets.js +12 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -53,7 +53,9 @@ Presets are the primary way to express intent. Each preset sets default content
|
|
|
53
53
|
| `blockTraining` | allow | allow | disallow | disallow |
|
|
54
54
|
| `lockdown` | disallow | disallow | disallow | disallow |
|
|
55
55
|
|
|
56
|
-
`
|
|
56
|
+
`citationFriendly` allows bots that do citation or summarization but blocks bots whose only purpose is training data collection (GPTBot, Google-Extended, CCBot, Bytespider, Applebot-Extended). Bots with mixed roles like ClaudeBot are allowed.
|
|
57
|
+
|
|
58
|
+
`blockTraining` goes further and blocks every bot with any training category, including mixed bots like ClaudeBot and meta-externalagent.
|
|
57
59
|
|
|
58
60
|
`lockdown` adds a global `User-agent: * / Disallow: /` rule, overriding everything.
|
|
59
61
|
|
|
@@ -499,3 +501,7 @@ User-agent: *
|
|
|
499
501
|
Disallow: /
|
|
500
502
|
Content-signal: search=no, ai-input=no, ai-train=no
|
|
501
503
|
```
|
|
504
|
+
|
|
505
|
+
---
|
|
506
|
+
|
|
507
|
+
> This tool only works for crawlers and AI bots that actually respect robots.txt. Respect, however, is rare these days.
|
package/dist/presets.js
CHANGED
|
@@ -21,6 +21,13 @@ export const presetDefaults = {
|
|
|
21
21
|
searchEngines: 'allow',
|
|
22
22
|
verifiedAi: 'allow',
|
|
23
23
|
unknownAi: 'disallow'
|
|
24
|
+
},
|
|
25
|
+
bots: {
|
|
26
|
+
GPTBot: 'disallow',
|
|
27
|
+
'Google-Extended': 'disallow',
|
|
28
|
+
CCBot: 'disallow',
|
|
29
|
+
Bytespider: 'disallow',
|
|
30
|
+
'Applebot-Extended': 'disallow'
|
|
24
31
|
}
|
|
25
32
|
},
|
|
26
33
|
openToAi: {
|
|
@@ -48,8 +55,12 @@ export const presetDefaults = {
|
|
|
48
55
|
},
|
|
49
56
|
bots: {
|
|
50
57
|
GPTBot: 'disallow',
|
|
58
|
+
ClaudeBot: 'disallow',
|
|
51
59
|
'Google-Extended': 'disallow',
|
|
52
|
-
CCBot: 'disallow'
|
|
60
|
+
CCBot: 'disallow',
|
|
61
|
+
Bytespider: 'disallow',
|
|
62
|
+
'meta-externalagent': 'disallow',
|
|
63
|
+
'Applebot-Extended': 'disallow'
|
|
53
64
|
}
|
|
54
65
|
},
|
|
55
66
|
lockdown: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@casoon/astro-crawler-policy",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "Policy-first crawler control for Astro — generates robots.txt and llms.txt with presets, per-bot rules, AI crawler registry, and build-time audits.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|