@casoon/astro-crawler-policy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 casoon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,501 @@
1
+ # @casoon/astro-crawler-policy
2
+
3
+ Policy-first crawler control for Astro. Generates `robots.txt` (and optionally `llms.txt`) from a typed configuration at build time.
4
+
5
+ ## What it does
6
+
7
+ - Generates `robots.txt` from a typed configuration — no manual file editing required
8
+ - Applies one of five built-in **presets** covering the most common use cases
9
+ - Supports **content signals** (`search`, `ai-input`, `ai-train`) for newer crawler directives
10
+ - Includes a **bot registry** with 13 known crawlers for per-bot and group-based rules
11
+ - **Merges** the generated output with an existing `public/robots.txt` (replace / prepend / append)
12
+ - Runs **build-time audits** that warn about common misconfigurations
13
+ - Optionally generates **`llms.txt`** — a markdown summary of the AI content policy
14
+ - Supports **environment-specific overrides** (e.g. lockdown on staging)
15
+
16
+ This plugin renders crawler policy. It does not enforce blocking at the network, WAF, or edge layer.
17
+
18
+ ## Installation
19
+
20
+ ```sh
21
+ npm install @casoon/astro-crawler-policy
22
+ ```
23
+
24
+ ## Quick start
25
+
26
+ ```ts
27
+ // astro.config.ts
28
+ import { defineConfig } from 'astro/config';
29
+ import crawlerPolicy from '@casoon/astro-crawler-policy';
30
+
31
+ export default defineConfig({
32
+ site: 'https://example.com',
33
+ integrations: [
34
+ crawlerPolicy({
35
+ preset: 'citationFriendly',
36
+ sitemaps: ['/sitemap-index.xml']
37
+ })
38
+ ]
39
+ });
40
+ ```
41
+
42
+ The plugin hooks into `astro:build:done` and writes `dist/robots.txt`. With just these two options you get sensible defaults: search engines allowed, verified AI bots allowed for citation, AI training bots blocked.
43
+
44
+ ## Presets
45
+
46
+ Presets are the primary way to express intent. Each preset sets default content signals and group-level rules.
47
+
48
+ | Preset | Search | AI citation | AI training | Unknown AI |
49
+ |---|---|---|---|---|
50
+ | `seoOnly` | allow | disallow | disallow | disallow |
51
+ | `citationFriendly` *(default)* | allow | allow | disallow | disallow |
52
+ | `openToAi` | allow | allow | allow | allow |
53
+ | `blockTraining` | allow | allow | disallow | disallow |
54
+ | `lockdown` | disallow | disallow | disallow | disallow |
55
+
56
+ `blockTraining` additionally adds explicit `Disallow` rules for GPTBot, Google-Extended, and CCBot on top of the group-level setting.
57
+
58
+ `lockdown` adds a global `User-agent: * / Disallow: /` rule, overriding everything.
59
+
60
+ ## Content signals
61
+
62
+ Content signals are non-standard directives appended to the wildcard `User-agent: *` block:
63
+
64
+ ```
65
+ Content-signal: search=yes, ai-input=yes, ai-train=no
66
+ ```
67
+
68
+ They communicate intent to crawlers that support them. The three signals map to:
69
+
70
+ | Signal | Meaning |
71
+ |---|---|
72
+ | `search` | Indexing for traditional search engines |
73
+ | `aiInput` | Using content as input for AI responses (citation, summarization) |
74
+ | `aiTrain` | Using content as AI training data |
75
+
76
+ Content signals are not yet a web standard. Google Search Console may flag them as unrecognised directives — the audit system emits an `info` message when they are present.
77
+
78
+ Each preset sets default values for all three signals. You can override them individually:
79
+
80
+ ```ts
81
+ crawlerPolicy({
82
+ preset: 'citationFriendly',
83
+ contentSignals: {
84
+ aiTrain: true // override just this one; search and aiInput come from the preset
85
+ }
86
+ })
87
+ ```
88
+
89
+ ## Groups and per-bot rules
90
+
91
+ Rules are resolved in layers, from least to most specific:
92
+
93
+ 1. **Preset** — sets group-level defaults
94
+ 2. **`groups`** — overrides for entire bot categories
95
+ 3. **`bots`** — overrides for individual bots by registry ID
96
+
97
+ A bot's final action is the most specific rule that applies to it. An explicit entry in `bots` always wins over a `groups` setting.
98
+
99
+ ```ts
100
+ crawlerPolicy({
101
+ preset: 'citationFriendly',
102
+
103
+ // Override an entire group
104
+ groups: {
105
+ searchEngines: 'allow', // default
106
+ verifiedAi: 'allow', // default
107
+ unknownAi: 'disallow' // default
108
+ },
109
+
110
+ // Override individual bots (takes precedence over groups)
111
+ bots: {
112
+ GPTBot: 'disallow', // blocks this bot even if verifiedAi is 'allow'
113
+ ClaudeBot: 'allow' // allows this bot even if verifiedAi were 'disallow'
114
+ }
115
+ })
116
+ ```
117
+
118
+ The three groups are:
119
+ - **`searchEngines`** — bots with category `search` (Googlebot, Bingbot)
120
+ - **`verifiedAi`** — verified bots with AI categories (`ai-search`, `ai-input`, `ai-training`)
121
+ - **`unknownAi`** — unverified bots or bots with category `unknown-ai`
122
+
123
+ When a bot's action resolves to `'inherit'` (no group or preset covers it), the bot is omitted from the output.
124
+
125
+ ## Custom rules
126
+
127
+ For anything not covered by the preset or registry, use `rules` to add raw robots.txt directives:
128
+
129
+ ```ts
130
+ crawlerPolicy({
131
+ rules: [
132
+ {
133
+ userAgent: '*',
134
+ disallow: ['/admin/', '/internal/'],
135
+ crawlDelay: 2
136
+ },
137
+ {
138
+ userAgent: 'Slurp',
139
+ disallow: ['/']
140
+ }
141
+ ]
142
+ })
143
+ ```
144
+
145
+ A `userAgent: '*'` rule in `rules` is merged with the wildcard block that the preset generates — it does not create a second `User-agent: *` section.
146
+
147
+ Available fields per rule:
148
+
149
+ | Field | Type | Description |
150
+ |---|---|---|
151
+ | `userAgent` | `string \| string[]` | One or more User-agent values |
152
+ | `allow` | `string[]` | Paths to allow |
153
+ | `disallow` | `string[]` | Paths to disallow |
154
+ | `crawlDelay` | `number` | Crawl-delay in seconds |
155
+ | `comment` | `string` | Inline comment above the rule |
156
+
157
+ ## Merge strategy
158
+
159
+ When a `public/robots.txt` already exists, the merge strategy controls how it is combined with the generated output.
160
+
161
+ | Strategy | Result |
162
+ |---|---|
163
+ | `prepend` *(default)* | Generated output first, then existing file |
164
+ | `append` | Existing file first, then generated output |
165
+ | `replace` | Generated output only, existing file ignored |
166
+
167
+ ```ts
168
+ crawlerPolicy({
169
+ mergeStrategy: 'prepend'
170
+ })
171
+ ```
172
+
173
+ Use `prepend` to let the generated policy take precedence. Use `append` to keep hand-written rules at the top. Use `replace` when you want full control from config and no manual overrides.
174
+
175
+ ## Environment overrides
176
+
177
+ The plugin detects the current environment from these variables, in order:
178
+
179
+ 1. `CONTEXT` (Netlify)
180
+ 2. `DEPLOYMENT_ENVIRONMENT`
181
+ 3. `NODE_ENV`
182
+ 4. Falls back to `'production'`
183
+
184
+ Use `env` to apply different settings per environment:
185
+
186
+ ```ts
187
+ crawlerPolicy({
188
+ preset: 'citationFriendly',
189
+ env: {
190
+ staging: { preset: 'lockdown' },
191
+ preview: { preset: 'lockdown' }
192
+ }
193
+ })
194
+ ```
195
+
196
+ Any option can be overridden per environment. Nested objects (`contentSignals`, `bots`, `groups`) are merged — not replaced — with the base config.
197
+
198
+ ## Output files
199
+
200
+ ```ts
201
+ crawlerPolicy({
202
+ output: {
203
+ robotsTxt: true, // default — writes dist/robots.txt
204
+ llmsTxt: true // opt-in — writes dist/llms.txt
205
+ }
206
+ })
207
+ ```
208
+
209
+ ### llms.txt
210
+
211
+ When `output.llmsTxt: true` is set, the plugin generates `dist/llms.txt` alongside `robots.txt`. The file is a Markdown summary of the AI content policy — which crawlers are allowed or blocked, what signals are active, and where the sitemap is:
212
+
213
+ ```md
214
+ # example.com
215
+
216
+ > AI content access policy for example.com.
217
+ > Generated by @casoon/astro-crawler-policy (preset: citationFriendly).
218
+
219
+ ## Content Policy
220
+
221
+ - Search indexing: allowed
222
+ - AI citation and summarization: allowed
223
+ - AI training data collection: not allowed
224
+
225
+ ## AI Systems
226
+
227
+ ### Allowed
228
+ - OAI-SearchBot (OpenAI)
229
+ - ClaudeBot (Anthropic)
230
+ - claude-web (Anthropic)
231
+ - PerplexityBot (Perplexity)
232
+ - meta-externalagent (Meta)
233
+ - Amazonbot (Amazon)
234
+ - Googlebot (Google)
235
+ - Bingbot (Microsoft)
236
+
237
+ ### Blocked
238
+ - GPTBot (OpenAI)
239
+ - Google-Extended (Google)
240
+ - CCBot (Common Crawl)
241
+ - Bytespider (ByteDance)
242
+ - Applebot-Extended (Apple)
243
+
244
+ ## Sitemaps
245
+
246
+ - https://example.com/sitemap-index.xml
247
+ ```
248
+
249
+ ## Debug mode
250
+
251
+ Set `debug: true` to print the resolved configuration to the build log:
252
+
253
+ ```ts
254
+ crawlerPolicy({ debug: true })
255
+ ```
256
+
257
+ Build output:
258
+
259
+ ```
260
+ [@casoon/astro-crawler-policy] [debug] registry version: 2026-04-09
261
+ [@casoon/astro-crawler-policy] [debug] environment: production
262
+ [@casoon/astro-crawler-policy] [debug] preset: citationFriendly
263
+ [@casoon/astro-crawler-policy] [debug] content signals: search=yes, aiInput=yes, aiTrain=no
264
+ [@casoon/astro-crawler-policy] [debug] bot: GPTBot → disallow
265
+ [@casoon/astro-crawler-policy] [debug] bot: OAI-SearchBot → allow
266
+ ...
267
+ [@casoon/astro-crawler-policy] [debug] sitemap: https://example.com/sitemap-index.xml
268
+ ```
269
+
270
+ ## Bot registry
271
+
272
+ The following bots are known and can be referenced by ID in `bots: {}`:
273
+
274
+ | ID | Provider | Categories | Group |
275
+ |---|---|---|---|
276
+ | `GPTBot` | OpenAI | ai-training | verifiedAi |
277
+ | `OAI-SearchBot` | OpenAI | ai-search, ai-input | verifiedAi |
278
+ | `ClaudeBot` | Anthropic | ai-input, ai-training | verifiedAi |
279
+ | `claude-web` | Anthropic | ai-input | verifiedAi |
280
+ | `Google-Extended` | Google | ai-training | verifiedAi |
281
+ | `CCBot` | Common Crawl | ai-training | verifiedAi |
282
+ | `PerplexityBot` | Perplexity | ai-search, ai-input | verifiedAi |
283
+ | `Bytespider` | ByteDance | ai-training | verifiedAi |
284
+ | `meta-externalagent` | Meta | ai-input, ai-training | verifiedAi |
285
+ | `Amazonbot` | Amazon | ai-search, ai-input | verifiedAi |
286
+ | `Applebot-Extended` | Apple | ai-training | verifiedAi |
287
+ | `Googlebot` | Google | search | searchEngines |
288
+ | `Bingbot` | Microsoft | search | searchEngines |
289
+
290
+ ## Extending the registry
291
+
292
+ The built-in registry covers the most common crawlers. To support bots not yet listed, use `extraBots`:
293
+
294
+ ```ts
295
+ crawlerPolicy({
296
+ extraBots: [
297
+ {
298
+ id: 'MyCustomBot',
299
+ provider: 'Acme Corp',
300
+ userAgents: ['MyCustomBot/1.0'],
301
+ categories: ['ai-training'],
302
+ verified: true
303
+ }
304
+ ],
305
+ bots: {
306
+ MyCustomBot: 'disallow'
307
+ }
308
+ })
309
+ ```
310
+
311
+ Extra bots participate in group rules, per-bot overrides, audit checks, and `llms.txt` output — the same as built-in bots.
312
+
313
+ **Keeping the registry up to date:** The registry ships as part of the package. As new crawlers emerge, updates are released as patch versions. Run `npm update @casoon/astro-crawler-policy` to get the latest bot data. The `REGISTRY_VERSION` export contains the date of the last registry update.
314
+
315
+ ## Audit warnings
316
+
317
+ The plugin emits warnings and info messages during the build:
318
+
319
+ | Code | Level | Condition |
320
+ |---|---|---|
321
+ | `MISSING_SITE_URL` | warn | No `site` set in Astro config |
322
+ | `NO_SITEMAP` | info | No sitemaps configured |
323
+ | `DUPLICATE_USER_AGENT_RULE` | warn | Two rules share the same User-agent |
324
+ | `UNLOCKED_NON_PRODUCTION_ENVIRONMENT` | warn | Staging/preview not globally blocked |
325
+ | `NON_STANDARD_DIRECTIVES` | info | Content signals may trigger GSC syntax warnings |
326
+ | `AI_INPUT_WITHOUT_ALLOWED_BOTS` | warn | `aiInput` enabled but all AI bots blocked |
327
+ | `UNKNOWN_BOT_ID` | warn | A bot ID in `bots: {}` is not in the registry |
328
+ | `GROUP_BOT_OVERRIDE_CONFLICT` | info | Bot override contradicts its group rule |
329
+
330
+ Audit settings:
331
+
332
+ ```ts
333
+ crawlerPolicy({
334
+ audit: {
335
+ warnOnMissingSitemap: true, // default
336
+ warnOnConflicts: true // default
337
+ }
338
+ })
339
+ ```
340
+
341
+ ## Programmatic usage
342
+
343
+ The core modules are exported for use outside of the Astro integration:
344
+
345
+ ```ts
346
+ import {
347
+ compilePolicy,
348
+ renderRobotsTxt,
349
+ renderLlmsTxt,
350
+ auditPolicy,
351
+ defaultRegistry,
352
+ REGISTRY_VERSION
353
+ } from '@casoon/astro-crawler-policy';
354
+
355
+ const policy = compilePolicy({
356
+ options: { preset: 'citationFriendly', sitemaps: ['/sitemap-index.xml'] },
357
+ site: 'https://example.com',
358
+ environment: 'production'
359
+ });
360
+
361
+ const robotsTxt = renderRobotsTxt(policy);
362
+ const llmsTxt = renderLlmsTxt(policy, 'https://example.com');
363
+ const issues = auditPolicy(policy, { site: 'https://example.com', registry: defaultRegistry });
364
+ ```
365
+
366
+ ## Generated output examples
367
+
368
+ ### citationFriendly (default)
369
+
370
+ ```ts
371
+ crawlerPolicy({
372
+ preset: 'citationFriendly',
373
+ sitemaps: ['/sitemap-index.xml']
374
+ })
375
+ ```
376
+
377
+ ```
378
+ # Generated by @casoon/astro-crawler-policy
379
+ # preset: citationFriendly
380
+
381
+ User-agent: *
382
+ Allow: /
383
+ Content-signal: search=yes, ai-input=yes, ai-train=no
384
+
385
+ User-agent: GPTBot
386
+ Disallow: /
387
+
388
+ User-agent: OAI-SearchBot
389
+ Allow: /
390
+
391
+ User-agent: ClaudeBot
392
+ Allow: /
393
+
394
+ User-agent: claude-web
395
+ Allow: /
396
+
397
+ User-agent: Google-Extended
398
+ Disallow: /
399
+
400
+ User-agent: CCBot
401
+ Disallow: /
402
+
403
+ User-agent: PerplexityBot
404
+ Allow: /
405
+
406
+ User-agent: Bytespider
407
+ Disallow: /
408
+
409
+ User-agent: meta-externalagent
410
+ Allow: /
411
+
412
+ User-agent: Amazonbot
413
+ Allow: /
414
+
415
+ User-agent: Applebot-Extended
416
+ Disallow: /
417
+
418
+ User-agent: Googlebot
419
+ Allow: /
420
+
421
+ User-agent: Bingbot
422
+ Allow: /
423
+
424
+ Sitemap: https://example.com/sitemap-index.xml
425
+ ```
426
+
427
+ ### seoOnly
428
+
429
+ ```ts
430
+ crawlerPolicy({ preset: 'seoOnly' })
431
+ ```
432
+
433
+ ```
434
+ # Generated by @casoon/astro-crawler-policy
435
+ # preset: seoOnly
436
+
437
+ User-agent: *
438
+ Allow: /
439
+ Content-signal: search=yes, ai-input=no, ai-train=no
440
+
441
+ User-agent: GPTBot
442
+ Disallow: /
443
+
444
+ User-agent: OAI-SearchBot
445
+ Disallow: /
446
+
447
+ User-agent: ClaudeBot
448
+ Disallow: /
449
+
450
+ User-agent: claude-web
451
+ Disallow: /
452
+
453
+ User-agent: Google-Extended
454
+ Disallow: /
455
+
456
+ User-agent: CCBot
457
+ Disallow: /
458
+
459
+ User-agent: PerplexityBot
460
+ Disallow: /
461
+
462
+ User-agent: Bytespider
463
+ Disallow: /
464
+
465
+ User-agent: meta-externalagent
466
+ Disallow: /
467
+
468
+ User-agent: Amazonbot
469
+ Disallow: /
470
+
471
+ User-agent: Applebot-Extended
472
+ Disallow: /
473
+
474
+ User-agent: Googlebot
475
+ Allow: /
476
+
477
+ User-agent: Bingbot
478
+ Allow: /
479
+ ```
480
+
481
+ ### lockdown (staging/preview)
482
+
483
+ ```ts
484
+ crawlerPolicy({
485
+ env: {
486
+ staging: { preset: 'lockdown' },
487
+ preview: { preset: 'lockdown' }
488
+ }
489
+ })
490
+ ```
491
+
492
+ When `CONTEXT=staging` or `NODE_ENV=staging`:
493
+
494
+ ```
495
+ # Generated by @casoon/astro-crawler-policy
496
+ # preset: lockdown
497
+
498
+ User-agent: *
499
+ Disallow: /
500
+ Content-signal: search=no, ai-input=no, ai-train=no
501
+ ```
@@ -0,0 +1,9 @@
1
+ import type { AstroCrawlerPolicyOptions, AuditIssue, RegistryBot, ResolvedPolicy } from './types.js';
2
+ export declare function auditPolicy(policy: ResolvedPolicy, options?: {
3
+ site?: string;
4
+ environment?: string;
5
+ registry?: RegistryBot[];
6
+ rawOptions?: AstroCrawlerPolicyOptions;
7
+ warnOnMissingSitemap?: boolean;
8
+ warnOnConflicts?: boolean;
9
+ }): AuditIssue[];
package/dist/audit.js ADDED
@@ -0,0 +1,113 @@
1
+ function hasDuplicateUserAgents(policy) {
2
+ const seen = new Set();
3
+ for (const rule of policy.rules) {
4
+ const agents = Array.isArray(rule.userAgent) ? rule.userAgent : [rule.userAgent];
5
+ for (const userAgent of agents) {
6
+ const key = userAgent.toLowerCase();
7
+ if (seen.has(key)) {
8
+ return true;
9
+ }
10
+ seen.add(key);
11
+ }
12
+ }
13
+ return false;
14
+ }
15
+ export function auditPolicy(policy, options = {}) {
16
+ const issues = [];
17
+ const allowedAiBots = policy.botRules.filter((rule) => rule.action === 'allow');
18
+ const blockedAiBots = policy.botRules.filter((rule) => rule.action === 'disallow');
19
+ const registry = options.registry ?? [];
20
+ const rawOptions = options.rawOptions;
21
+ if (policy.contentSignals.aiInput && allowedAiBots.length === 0 && blockedAiBots.length > 0) {
22
+ issues.push({
23
+ level: 'warn',
24
+ code: 'AI_INPUT_WITHOUT_ALLOWED_BOTS',
25
+ message: 'ai-input is enabled, but all known AI bots are blocked.'
26
+ });
27
+ }
28
+ if (options.warnOnMissingSitemap !== false && policy.sitemaps.length === 0) {
29
+ issues.push({
30
+ level: 'info',
31
+ code: 'NO_SITEMAP',
32
+ message: 'No sitemap configured. This reduces discoverability for crawlers.'
33
+ });
34
+ }
35
+ if (!options.site) {
36
+ issues.push({
37
+ level: 'warn',
38
+ code: 'MISSING_SITE_URL',
39
+ message: 'No site URL found. Relative sitemap entries may be incomplete.'
40
+ });
41
+ }
42
+ if (hasDuplicateUserAgents(policy)) {
43
+ issues.push({
44
+ level: 'warn',
45
+ code: 'DUPLICATE_USER_AGENT_RULE',
46
+ message: 'Multiple rules share the same User-agent. This may result in conflicting behavior.'
47
+ });
48
+ }
49
+ if (['preview', 'staging'].includes(options.environment ?? '') &&
50
+ !policy.rules.some((rule) => {
51
+ const agents = Array.isArray(rule.userAgent) ? rule.userAgent : [rule.userAgent];
52
+ return agents.includes('*') && rule.disallow?.includes('/');
53
+ })) {
54
+ issues.push({
55
+ level: 'warn',
56
+ code: 'UNLOCKED_NON_PRODUCTION_ENVIRONMENT',
57
+ message: 'Preview or staging environment is not globally blocked with Disallow: /.'
58
+ });
59
+ }
60
+ if (policy.contentSignals.search !== undefined ||
61
+ policy.contentSignals.aiInput !== undefined ||
62
+ policy.contentSignals.aiTrain !== undefined) {
63
+ issues.push({
64
+ level: 'info',
65
+ code: 'NON_STANDARD_DIRECTIVES',
66
+ message: 'Newer directives like Content-signal may trigger syntax warnings in tools like GSC.'
67
+ });
68
+ }
69
+ if (rawOptions?.bots && registry.length > 0) {
70
+ const knownIds = new Set(registry.map((b) => b.id));
71
+ for (const id of Object.keys(rawOptions.bots)) {
72
+ if (!knownIds.has(id)) {
73
+ issues.push({
74
+ level: 'warn',
75
+ code: 'UNKNOWN_BOT_ID',
76
+ message: `Bot ID "${id}" is not in the registry and will have no effect.`
77
+ });
78
+ }
79
+ }
80
+ }
81
+ if (options.warnOnConflicts !== false && rawOptions?.bots && rawOptions?.groups && registry.length > 0) {
82
+ for (const [id, explicitAction] of Object.entries(rawOptions.bots)) {
83
+ if (explicitAction === 'inherit')
84
+ continue;
85
+ const bot = registry.find((b) => b.id === id);
86
+ if (!bot)
87
+ continue;
88
+ let groupAction;
89
+ if (bot.verified === false && rawOptions.groups.unknownAi) {
90
+ groupAction = rawOptions.groups.unknownAi;
91
+ }
92
+ else if (bot.categories.includes('search') && rawOptions.groups.searchEngines) {
93
+ groupAction = rawOptions.groups.searchEngines;
94
+ }
95
+ else if (bot.verified &&
96
+ bot.categories.some((c) => ['ai-search', 'ai-input', 'ai-training'].includes(c)) &&
97
+ rawOptions.groups.verifiedAi) {
98
+ groupAction = rawOptions.groups.verifiedAi;
99
+ }
100
+ else if (bot.categories.includes('unknown-ai') && rawOptions.groups.unknownAi) {
101
+ groupAction = rawOptions.groups.unknownAi;
102
+ }
103
+ if (groupAction && groupAction !== explicitAction) {
104
+ issues.push({
105
+ level: 'info',
106
+ code: 'GROUP_BOT_OVERRIDE_CONFLICT',
107
+ message: `Bot "${id}" is explicitly set to "${explicitAction}" but its group rule says "${groupAction}".`
108
+ });
109
+ }
110
+ }
111
+ }
112
+ return issues;
113
+ }
@@ -0,0 +1,2 @@
1
+ import type { CompilePolicyInput, ResolvedPolicy } from './types.js';
2
+ export declare function compilePolicy(input: CompilePolicyInput): ResolvedPolicy;