@casoon/astro-crawler-policy 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -5
- package/dist/render.js +17 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -64,7 +64,9 @@ Presets are the primary way to express intent. Each preset sets default content
|
|
|
64
64
|
Content signals are non-standard directives appended to the wildcard `User-agent: *` block:
|
|
65
65
|
|
|
66
66
|
```
|
|
67
|
-
|
|
67
|
+
User-agent: *
|
|
68
|
+
Content-Signal: search=yes, ai-input=yes, ai-train=no
|
|
69
|
+
Allow: /
|
|
68
70
|
```
|
|
69
71
|
|
|
70
72
|
They communicate intent to crawlers that support them. The three signals map to:
|
|
@@ -75,7 +77,7 @@ They communicate intent to crawlers that support them. The three signals map to:
|
|
|
75
77
|
| `aiInput` | Using content as input for AI responses (citation, summarization) |
|
|
76
78
|
| `aiTrain` | Using content as AI training data |
|
|
77
79
|
|
|
78
|
-
|
|
80
|
+
The directive name and signal keys follow the [contentsignals.org](https://contentsignals.org) specification (proposed IETF aipref standard). Google Search Console may flag them as unrecognised directives — the audit system emits an `info` message when they are present.
|
|
79
81
|
|
|
80
82
|
Each preset sets default values for all three signals. You can override them individually:
|
|
81
83
|
|
|
@@ -381,8 +383,8 @@ crawlerPolicy({
|
|
|
381
383
|
# preset: citationFriendly
|
|
382
384
|
|
|
383
385
|
User-agent: *
|
|
386
|
+
Content-Signal: search=yes, ai-input=yes, ai-train=no
|
|
384
387
|
Allow: /
|
|
385
|
-
Content-signal: search=yes, ai-input=yes, ai-train=no
|
|
386
388
|
|
|
387
389
|
User-agent: GPTBot
|
|
388
390
|
Disallow: /
|
|
@@ -437,8 +439,8 @@ crawlerPolicy({ preset: 'seoOnly' })
|
|
|
437
439
|
# preset: seoOnly
|
|
438
440
|
|
|
439
441
|
User-agent: *
|
|
442
|
+
Content-Signal: search=yes, ai-input=no, ai-train=no
|
|
440
443
|
Allow: /
|
|
441
|
-
Content-signal: search=yes, ai-input=no, ai-train=no
|
|
442
444
|
|
|
443
445
|
User-agent: GPTBot
|
|
444
446
|
Disallow: /
|
|
@@ -498,8 +500,8 @@ When `CONTEXT=staging` or `NODE_ENV=staging`:
|
|
|
498
500
|
# preset: lockdown
|
|
499
501
|
|
|
500
502
|
User-agent: *
|
|
503
|
+
Content-Signal: search=no, ai-input=no, ai-train=no
|
|
501
504
|
Disallow: /
|
|
502
|
-
Content-signal: search=no, ai-input=no, ai-train=no
|
|
503
505
|
```
|
|
504
506
|
|
|
505
507
|
---
|
package/dist/render.js
CHANGED
|
@@ -77,7 +77,7 @@ function renderContentSignals(contentSignals) {
|
|
|
77
77
|
if (contentSignals.aiTrain !== undefined) {
|
|
78
78
|
parts.push(`ai-train=${contentSignals.aiTrain ? 'yes' : 'no'}`);
|
|
79
79
|
}
|
|
80
|
-
return parts.length ? [`Content-
|
|
80
|
+
return parts.length ? [`Content-Signal: ${parts.join(', ')}`] : [];
|
|
81
81
|
}
|
|
82
82
|
function renderRule(rule) {
|
|
83
83
|
const lines = [];
|
|
@@ -109,8 +109,23 @@ export function renderRobotsTxt(policy) {
|
|
|
109
109
|
allow: ['/'],
|
|
110
110
|
disallow: []
|
|
111
111
|
};
|
|
112
|
-
|
|
112
|
+
// Render wildcard block with Content-Signal before Allow/Disallow (per spec)
|
|
113
|
+
for (const userAgent of wildcardRule.userAgent) {
|
|
114
|
+
lines.push(`User-agent: ${userAgent}`);
|
|
115
|
+
}
|
|
116
|
+
if (wildcardRule.comment) {
|
|
117
|
+
lines.push(`# ${wildcardRule.comment}`);
|
|
118
|
+
}
|
|
113
119
|
lines.push(...renderContentSignals(policy.contentSignals));
|
|
120
|
+
for (const allow of wildcardRule.allow ?? []) {
|
|
121
|
+
lines.push(`Allow: ${allow}`);
|
|
122
|
+
}
|
|
123
|
+
for (const disallow of wildcardRule.disallow ?? []) {
|
|
124
|
+
lines.push(`Disallow: ${disallow}`);
|
|
125
|
+
}
|
|
126
|
+
if (wildcardRule.crawlDelay !== undefined) {
|
|
127
|
+
lines.push(`Crawl-delay: ${wildcardRule.crawlDelay}`);
|
|
128
|
+
}
|
|
114
129
|
lines.push('');
|
|
115
130
|
for (const rule of policy.rules) {
|
|
116
131
|
if (rule === wildcardRule) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@casoon/astro-crawler-policy",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Policy-first crawler control for Astro — generates robots.txt and llms.txt with presets, per-bot rules, AI crawler registry, and build-time audits.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|