@casoon/astro-crawler-policy 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -64,7 +64,9 @@ Presets are the primary way to express intent. Each preset sets default content
64
64
  Content signals are non-standard directives appended to the wildcard `User-agent: *` block:
65
65
 
66
66
  ```
67
- Content-signal: search=yes, ai-input=yes, ai-train=no
67
+ User-agent: *
68
+ Content-Signal: search=yes, ai-input=yes, ai-train=no
69
+ Allow: /
68
70
  ```
69
71
 
70
72
  They communicate intent to crawlers that support them. The three signals map to:
@@ -75,7 +77,7 @@ They communicate intent to crawlers that support them. The three signals map to:
75
77
  | `aiInput` | Using content as input for AI responses (citation, summarization) |
76
78
  | `aiTrain` | Using content as AI training data |
77
79
 
78
- Content signals are not yet a web standard. Google Search Console may flag them as unrecognised directives — the audit system emits an `info` message when they are present.
80
+ The directive name and signal keys follow the [contentsignals.org](https://contentsignals.org) specification (proposed IETF aipref standard). Google Search Console may flag them as unrecognised directives — the audit system emits an `info` message when they are present.
79
81
 
80
82
  Each preset sets default values for all three signals. You can override them individually:
81
83
 
@@ -381,8 +383,8 @@ crawlerPolicy({
381
383
  # preset: citationFriendly
382
384
 
383
385
  User-agent: *
386
+ Content-Signal: search=yes, ai-input=yes, ai-train=no
384
387
  Allow: /
385
- Content-signal: search=yes, ai-input=yes, ai-train=no
386
388
 
387
389
  User-agent: GPTBot
388
390
  Disallow: /
@@ -437,8 +439,8 @@ crawlerPolicy({ preset: 'seoOnly' })
437
439
  # preset: seoOnly
438
440
 
439
441
  User-agent: *
442
+ Content-Signal: search=yes, ai-input=no, ai-train=no
440
443
  Allow: /
441
- Content-signal: search=yes, ai-input=no, ai-train=no
442
444
 
443
445
  User-agent: GPTBot
444
446
  Disallow: /
@@ -498,8 +500,8 @@ When `CONTEXT=staging` or `NODE_ENV=staging`:
498
500
  # preset: lockdown
499
501
 
500
502
  User-agent: *
503
+ Content-Signal: search=no, ai-input=no, ai-train=no
501
504
  Disallow: /
502
- Content-signal: search=no, ai-input=no, ai-train=no
503
505
  ```
504
506
 
505
507
  ---
package/dist/render.js CHANGED
@@ -77,7 +77,7 @@ function renderContentSignals(contentSignals) {
77
77
  if (contentSignals.aiTrain !== undefined) {
78
78
  parts.push(`ai-train=${contentSignals.aiTrain ? 'yes' : 'no'}`);
79
79
  }
80
- return parts.length ? [`Content-signal: ${parts.join(', ')}`] : [];
80
+ return parts.length ? [`Content-Signal: ${parts.join(', ')}`] : [];
81
81
  }
82
82
  function renderRule(rule) {
83
83
  const lines = [];
@@ -109,8 +109,23 @@ export function renderRobotsTxt(policy) {
109
109
  allow: ['/'],
110
110
  disallow: []
111
111
  };
112
- lines.push(...renderRule(wildcardRule));
112
+ // Render wildcard block with Content-Signal before Allow/Disallow (per spec)
113
+ for (const userAgent of wildcardRule.userAgent) {
114
+ lines.push(`User-agent: ${userAgent}`);
115
+ }
116
+ if (wildcardRule.comment) {
117
+ lines.push(`# ${wildcardRule.comment}`);
118
+ }
113
119
  lines.push(...renderContentSignals(policy.contentSignals));
120
+ for (const allow of wildcardRule.allow ?? []) {
121
+ lines.push(`Allow: ${allow}`);
122
+ }
123
+ for (const disallow of wildcardRule.disallow ?? []) {
124
+ lines.push(`Disallow: ${disallow}`);
125
+ }
126
+ if (wildcardRule.crawlDelay !== undefined) {
127
+ lines.push(`Crawl-delay: ${wildcardRule.crawlDelay}`);
128
+ }
114
129
  lines.push('');
115
130
  for (const rule of policy.rules) {
116
131
  if (rule === wildcardRule) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@casoon/astro-crawler-policy",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Policy-first crawler control for Astro — generates robots.txt and llms.txt with presets, per-bot rules, AI crawler registry, and build-time audits.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",