vite-robots-txt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +135 -0
- package/dist/index.cjs +144 -0
- package/dist/index.d.cts +102 -0
- package/dist/index.d.mts +102 -0
- package/dist/index.mjs +137 -0
- package/package.json +62 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kaj Kowalski
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# vite-robots-txt
|
|
2
|
+
|
|
3
|
+
Vite plugin to generate `robots.txt` with presets, per-bot rules, and dev mode blocking.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
bun add -d vite-robots-txt
|
|
9
|
+
# or
|
|
10
|
+
npm install -D vite-robots-txt
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```ts
|
|
16
|
+
// vite.config.ts
|
|
17
|
+
import { defineConfig } from 'vite';
|
|
18
|
+
import robotsTxt from 'vite-robots-txt';
|
|
19
|
+
|
|
20
|
+
export default defineConfig({
|
|
21
|
+
plugins: [
|
|
22
|
+
robotsTxt({ preset: 'allowAll' }),
|
|
23
|
+
],
|
|
24
|
+
});
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Presets
|
|
28
|
+
|
|
29
|
+
| Preset | Description |
|
|
30
|
+
| ------------- | ---------------------------------------------------- |
|
|
31
|
+
| `allowAll` | Allow all crawlers |
|
|
32
|
+
| `disallowAll` | Block all crawlers |
|
|
33
|
+
| `blockAI` | Allow search engines, block AI/LLM training crawlers |
|
|
34
|
+
| `searchOnly` | Allow only major search engines |
|
|
35
|
+
|
|
36
|
+
### Block AI crawlers
|
|
37
|
+
|
|
38
|
+
```ts
|
|
39
|
+
robotsTxt({ preset: 'blockAI' });
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Generates:
|
|
43
|
+
|
|
44
|
+
```txt
|
|
45
|
+
# Allow all crawlers by default
|
|
46
|
+
User-agent: *
|
|
47
|
+
Allow: /
|
|
48
|
+
|
|
49
|
+
# Block AI/LLM training crawlers
|
|
50
|
+
User-agent: GPTBot
|
|
51
|
+
User-agent: ChatGPT-User
|
|
52
|
+
User-agent: Claude-Web
|
|
53
|
+
User-agent: ClaudeBot
|
|
54
|
+
User-agent: anthropic-ai
|
|
55
|
+
User-agent: Google-Extended
|
|
56
|
+
User-agent: PerplexityBot
|
|
57
|
+
User-agent: Bytespider
|
|
58
|
+
User-agent: CCBot
|
|
59
|
+
User-agent: Cohere-ai
|
|
60
|
+
User-agent: Amazonbot
|
|
61
|
+
User-agent: YouBot
|
|
62
|
+
Disallow: /
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Custom policies
|
|
66
|
+
|
|
67
|
+
```ts
|
|
68
|
+
robotsTxt({
|
|
69
|
+
policies: [
|
|
70
|
+
{ userAgent: '*', allow: '/', disallow: ['/admin', '/api'] },
|
|
71
|
+
{ userAgent: 'GPTBot', disallow: '/' },
|
|
72
|
+
],
|
|
73
|
+
sitemap: 'https://example.com/sitemap.xml',
|
|
74
|
+
});
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Merge preset + custom rules
|
|
78
|
+
|
|
79
|
+
```ts
|
|
80
|
+
robotsTxt({
|
|
81
|
+
preset: 'blockAI',
|
|
82
|
+
policies: { userAgent: 'Baiduspider', disallow: '/', crawlDelay: 10 },
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Options
|
|
87
|
+
|
|
88
|
+
| Option | Type | Default | Description |
|
|
89
|
+
| ---------- | ---------------------------------------------------------- | --------------- | ---------------------------------------- |
|
|
90
|
+
| `preset` | `'allowAll' \| 'disallowAll' \| 'blockAI' \| 'searchOnly'` | — | Start from a preset |
|
|
91
|
+
| `policies` | `PolicyRule \| PolicyRule[]` | — | Custom policy rules |
|
|
92
|
+
| `sitemap` | `string \| string[] \| boolean` | — | Sitemap URL(s) or `true` for auto-detect |
|
|
93
|
+
| `host` | `string` | — | Yandex `Host:` directive |
|
|
94
|
+
| `fileName` | `string` | `'robots.txt'` | Output file name |
|
|
95
|
+
| `devMode` | `'disallowAll' \| 'same' \| false` | `'disallowAll'` | Dev server behavior |
|
|
96
|
+
| `header` | `string` | — | Comment at top of file |
|
|
97
|
+
|
|
98
|
+
### PolicyRule
|
|
99
|
+
|
|
100
|
+
| Field | Type | Description |
|
|
101
|
+
| ------------ | -------------------- | -------------------------------------- |
|
|
102
|
+
| `userAgent` | `string \| string[]` | Bot name(s), `'*'` for all |
|
|
103
|
+
| `allow` | `string \| string[]` | Paths to allow |
|
|
104
|
+
| `disallow` | `string \| string[]` | Paths to disallow |
|
|
105
|
+
| `crawlDelay` | `number` | Seconds between requests (Bing/Yandex) |
|
|
106
|
+
| `comment` | `string \| string[]` | Comments above the rule group |
|
|
107
|
+
|
|
108
|
+
## Dev mode
|
|
109
|
+
|
|
110
|
+
By default, the plugin serves a `Disallow: /` robots.txt during development to prevent indexing of your dev server. Set `devMode: 'same'` to serve the same config as production, or `false` to disable.
|
|
111
|
+
|
|
112
|
+
## Standalone serializer
|
|
113
|
+
|
|
114
|
+
```ts
|
|
115
|
+
import { serialize } from 'vite-robots-txt';
|
|
116
|
+
|
|
117
|
+
const txt = serialize({
|
|
118
|
+
preset: 'blockAI',
|
|
119
|
+
sitemap: 'https://example.com/sitemap.xml',
|
|
120
|
+
});
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Exports
|
|
124
|
+
|
|
125
|
+
| Export | Description |
|
|
126
|
+
| --------------------- | ---------------------------------------- |
|
|
127
|
+
| `robotsTxt` (default) | Vite plugin factory |
|
|
128
|
+
| `serialize` | Standalone robots.txt serializer |
|
|
129
|
+
| `AI_BOTS` | Array of known AI crawler user-agents |
|
|
130
|
+
| `SEARCH_ENGINES` | Array of major search engine user-agents |
|
|
131
|
+
| `presetPolicies` | Preset policy definitions |
|
|
132
|
+
|
|
133
|
+
## License
|
|
134
|
+
|
|
135
|
+
MIT
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Object.defineProperties(exports, { __esModule: { value: true }, [Symbol.toStringTag]: { value: 'Module' } });
|
|
2
|
+
|
|
3
|
+
//#region src/presets.ts
|
|
4
|
+
/** AI/LLM training crawlers to block */
|
|
5
|
+
const AI_BOTS = [
|
|
6
|
+
"GPTBot",
|
|
7
|
+
"ChatGPT-User",
|
|
8
|
+
"Claude-Web",
|
|
9
|
+
"ClaudeBot",
|
|
10
|
+
"anthropic-ai",
|
|
11
|
+
"Google-Extended",
|
|
12
|
+
"PerplexityBot",
|
|
13
|
+
"Bytespider",
|
|
14
|
+
"CCBot",
|
|
15
|
+
"Cohere-ai",
|
|
16
|
+
"Amazonbot",
|
|
17
|
+
"YouBot"
|
|
18
|
+
];
|
|
19
|
+
/** Major search engine crawlers */
|
|
20
|
+
const SEARCH_ENGINES = [
|
|
21
|
+
"Googlebot",
|
|
22
|
+
"Bingbot",
|
|
23
|
+
"DuckDuckBot",
|
|
24
|
+
"Slurp",
|
|
25
|
+
"Applebot",
|
|
26
|
+
"Baiduspider",
|
|
27
|
+
"YandexBot"
|
|
28
|
+
];
|
|
29
|
+
const presetPolicies = {
|
|
30
|
+
allowAll: [{
|
|
31
|
+
userAgent: "*",
|
|
32
|
+
allow: "/"
|
|
33
|
+
}],
|
|
34
|
+
disallowAll: [{
|
|
35
|
+
userAgent: "*",
|
|
36
|
+
disallow: "/"
|
|
37
|
+
}],
|
|
38
|
+
blockAI: [{
|
|
39
|
+
userAgent: "*",
|
|
40
|
+
allow: "/",
|
|
41
|
+
comment: "Allow all crawlers by default"
|
|
42
|
+
}, {
|
|
43
|
+
userAgent: [...AI_BOTS],
|
|
44
|
+
disallow: "/",
|
|
45
|
+
comment: "Block AI/LLM training crawlers"
|
|
46
|
+
}],
|
|
47
|
+
searchOnly: [{
|
|
48
|
+
userAgent: "*",
|
|
49
|
+
disallow: "/",
|
|
50
|
+
comment: "Block all by default"
|
|
51
|
+
}, {
|
|
52
|
+
userAgent: [...SEARCH_ENGINES],
|
|
53
|
+
allow: "/",
|
|
54
|
+
comment: "Allow major search engines"
|
|
55
|
+
}]
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
//#endregion
|
|
59
|
+
//#region src/serialize.ts
|
|
60
|
+
/** Normalize `OneOrMany<T>` to `T[]` */
|
|
61
|
+
function toArray(value) {
|
|
62
|
+
if (value === void 0) return [];
|
|
63
|
+
return Array.isArray(value) ? value : [value];
|
|
64
|
+
}
|
|
65
|
+
/** Serialize a single policy rule group into robots.txt lines */
|
|
66
|
+
function serializePolicy(rule) {
|
|
67
|
+
const lines = [];
|
|
68
|
+
for (const c of toArray(rule.comment)) lines.push(`# ${c}`);
|
|
69
|
+
for (const ua of toArray(rule.userAgent)) lines.push(`User-agent: ${ua}`);
|
|
70
|
+
for (const path of toArray(rule.disallow)) lines.push(`Disallow: ${path}`);
|
|
71
|
+
for (const path of toArray(rule.allow)) lines.push(`Allow: ${path}`);
|
|
72
|
+
if (rule.crawlDelay !== void 0) lines.push(`Crawl-delay: ${rule.crawlDelay}`);
|
|
73
|
+
return lines.join("\n");
|
|
74
|
+
}
|
|
75
|
+
/** Build the full robots.txt content from resolved options */
|
|
76
|
+
function serialize(options) {
|
|
77
|
+
const sections = [];
|
|
78
|
+
if (options.header) sections.push(`# ${options.header}`);
|
|
79
|
+
const policies = [];
|
|
80
|
+
if (options.preset) {
|
|
81
|
+
const presetRules = presetPolicies[options.preset];
|
|
82
|
+
if (presetRules) policies.push(...presetRules);
|
|
83
|
+
}
|
|
84
|
+
for (const p of toArray(options.policies)) policies.push(p);
|
|
85
|
+
if (policies.length === 0) policies.push({
|
|
86
|
+
userAgent: "*",
|
|
87
|
+
allow: "/"
|
|
88
|
+
});
|
|
89
|
+
for (const policy of policies) sections.push(serializePolicy(policy));
|
|
90
|
+
if (options.host) sections.push(`Host: ${options.host}`);
|
|
91
|
+
if (options.sitemap && options.sitemap !== true) for (const url of toArray(options.sitemap)) sections.push(`Sitemap: ${url}`);
|
|
92
|
+
return `${sections.join("\n\n")}\n`;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
//#endregion
|
|
96
|
+
//#region src/plugin.ts
|
|
97
|
+
const PLUGIN_NAME = "vite-robots-txt";
|
|
98
|
+
const DEV_ROBOTS = "User-agent: *\nDisallow: /\n";
|
|
99
|
+
function createMiddleware(fileName, content) {
|
|
100
|
+
return (req, res, next) => {
|
|
101
|
+
if (req.url !== `/${fileName}`) return next();
|
|
102
|
+
res.setHeader("Content-Type", "text/plain");
|
|
103
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
104
|
+
res.end(content);
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
function robotsTxt(options = {}) {
|
|
108
|
+
const fileName = options.fileName ?? "robots.txt";
|
|
109
|
+
const devMode = options.devMode ?? "disallowAll";
|
|
110
|
+
let siteBase = "/";
|
|
111
|
+
const devContent = devMode === "disallowAll" ? DEV_ROBOTS : serialize(options);
|
|
112
|
+
return {
|
|
113
|
+
name: PLUGIN_NAME,
|
|
114
|
+
enforce: "post",
|
|
115
|
+
configResolved(config) {
|
|
116
|
+
siteBase = config.base ?? "/";
|
|
117
|
+
},
|
|
118
|
+
configureServer(server) {
|
|
119
|
+
if (devMode === false) return;
|
|
120
|
+
server.middlewares.use(createMiddleware(fileName, devContent));
|
|
121
|
+
},
|
|
122
|
+
configurePreviewServer(server) {
|
|
123
|
+
if (devMode === false) return;
|
|
124
|
+
server.middlewares.use(createMiddleware(fileName, devContent));
|
|
125
|
+
},
|
|
126
|
+
generateBundle() {
|
|
127
|
+
const resolved = { ...options };
|
|
128
|
+
if (resolved.sitemap === true) resolved.sitemap = `${siteBase}sitemap.xml`.replace(/\/+/g, "/");
|
|
129
|
+
this.emitFile({
|
|
130
|
+
type: "asset",
|
|
131
|
+
fileName,
|
|
132
|
+
source: serialize(resolved)
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
//#endregion
|
|
139
|
+
exports.AI_BOTS = AI_BOTS;
|
|
140
|
+
exports.SEARCH_ENGINES = SEARCH_ENGINES;
|
|
141
|
+
exports.default = robotsTxt;
|
|
142
|
+
exports.presetPolicies = presetPolicies;
|
|
143
|
+
exports.robotsTxt = robotsTxt;
|
|
144
|
+
exports.serialize = serialize;
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { Plugin } from "vite";
|
|
2
|
+
|
|
3
|
+
//#region src/types.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* vite-robots-txt — Type definitions
|
|
6
|
+
*
|
|
7
|
+
* Robots.txt spec: https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt
|
|
8
|
+
* Non-standard extensions: Crawl-delay (Bing/Yandex), Host (Yandex), Clean-param (Yandex)
|
|
9
|
+
*/
|
|
10
|
+
/** A single value or array of values — for ergonomic config */
|
|
11
|
+
type OneOrMany<T> = T | T[];
|
|
12
|
+
/** Known bot identifiers for type-safe presets */
|
|
13
|
+
type KnownBot = 'Googlebot' | 'Googlebot-Image' | 'Googlebot-News' | 'Googlebot-Video' | 'Bingbot' | 'Slurp' | 'DuckDuckBot' | 'Baiduspider' | 'YandexBot' | 'facebookexternalhit' | 'Twitterbot' | 'LinkedInBot' | 'Applebot' | 'GPTBot' | 'ChatGPT-User' | 'Claude-Web' | 'ClaudeBot' | 'Amazonbot' | 'anthropic-ai' | 'Bytespider' | 'CCBot' | 'Google-Extended' | 'PerplexityBot' | 'Cohere-ai' | 'YouBot';
|
|
14
|
+
/** User-agent string — known bots get autocomplete, but any string is valid */
|
|
15
|
+
type UserAgent = KnownBot | (string & {});
|
|
16
|
+
/** Rules for one or more user-agents */
|
|
17
|
+
interface PolicyRule {
|
|
18
|
+
/** Which user-agent(s) this rule applies to. `'*'` = all crawlers. */
|
|
19
|
+
userAgent: OneOrMany<UserAgent>;
|
|
20
|
+
/** Paths to allow crawling. Evaluated after disallow (more specific wins). */
|
|
21
|
+
allow?: OneOrMany<string>;
|
|
22
|
+
/** Paths to disallow crawling. */
|
|
23
|
+
disallow?: OneOrMany<string>;
|
|
24
|
+
/**
|
|
25
|
+
* Seconds between successive requests.
|
|
26
|
+
* Non-standard — supported by Bing, Yandex. Ignored by Google.
|
|
27
|
+
*/
|
|
28
|
+
crawlDelay?: number;
|
|
29
|
+
/** Inline comments placed above this rule group */
|
|
30
|
+
comment?: OneOrMany<string>;
|
|
31
|
+
}
|
|
32
|
+
/** Built-in presets for common configurations */
|
|
33
|
+
type Preset = /** `User-agent: * \n Allow: /` */'allowAll' /** `User-agent: * \n Disallow: /` */ | 'disallowAll' /** Block known AI/LLM training crawlers while allowing search engines */ | 'blockAI' /** Allow only major search engines (Google, Bing, DuckDuckGo, Yahoo, Apple) */ | 'searchOnly';
|
|
34
|
+
interface RobotsTxtOptions {
|
|
35
|
+
/**
|
|
36
|
+
* Start from a preset, then override with `policies`.
|
|
37
|
+
* Preset rules come first; your policies are appended.
|
|
38
|
+
*
|
|
39
|
+
* @default undefined (no preset — you define everything)
|
|
40
|
+
*/
|
|
41
|
+
preset?: Preset;
|
|
42
|
+
/**
|
|
43
|
+
* Custom policy rules. Merged after preset rules.
|
|
44
|
+
*
|
|
45
|
+
* Shorthand: pass a single `PolicyRule` instead of an array.
|
|
46
|
+
*/
|
|
47
|
+
policies?: OneOrMany<PolicyRule>;
|
|
48
|
+
/**
|
|
49
|
+
* Sitemap URL(s) — absolute URLs written as global `Sitemap:` directives.
|
|
50
|
+
*
|
|
51
|
+
* Set to `false` to explicitly suppress sitemap output.
|
|
52
|
+
* Set to `true` to auto-detect from `sitemap.xml` at the site root.
|
|
53
|
+
*
|
|
54
|
+
* @default undefined (no sitemap directive)
|
|
55
|
+
*/
|
|
56
|
+
sitemap?: OneOrMany<string> | boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Preferred host (Yandex `Host:` directive).
|
|
59
|
+
* Non-standard — only used by Yandex.
|
|
60
|
+
*
|
|
61
|
+
* @default undefined
|
|
62
|
+
*/
|
|
63
|
+
host?: string;
|
|
64
|
+
/**
|
|
65
|
+
* File name to write. Almost always `robots.txt`.
|
|
66
|
+
*
|
|
67
|
+
* @default 'robots.txt'
|
|
68
|
+
*/
|
|
69
|
+
fileName?: string;
|
|
70
|
+
/**
|
|
71
|
+
* What to do in dev/serve mode.
|
|
72
|
+
*
|
|
73
|
+
* - `'disallowAll'` — serve a `Disallow: /` robots.txt (prevent dev indexing)
|
|
74
|
+
* - `'same'` — serve the same robots.txt as build
|
|
75
|
+
* - `false` — don't serve anything in dev mode
|
|
76
|
+
*
|
|
77
|
+
* @default 'disallowAll'
|
|
78
|
+
*/
|
|
79
|
+
devMode?: 'disallowAll' | 'same' | false;
|
|
80
|
+
/**
|
|
81
|
+
* Header comment placed at the top of the file.
|
|
82
|
+
*
|
|
83
|
+
* @example 'Generated by vite-robots-txt'
|
|
84
|
+
*/
|
|
85
|
+
header?: string;
|
|
86
|
+
}
|
|
87
|
+
//#endregion
|
|
88
|
+
//#region src/plugin.d.ts
|
|
89
|
+
declare function robotsTxt(options?: RobotsTxtOptions): Plugin;
|
|
90
|
+
//#endregion
|
|
91
|
+
//#region src/presets.d.ts
|
|
92
|
+
/** AI/LLM training crawlers to block */
|
|
93
|
+
declare const AI_BOTS: readonly ["GPTBot", "ChatGPT-User", "Claude-Web", "ClaudeBot", "anthropic-ai", "Google-Extended", "PerplexityBot", "Bytespider", "CCBot", "Cohere-ai", "Amazonbot", "YouBot"];
|
|
94
|
+
/** Major search engine crawlers */
|
|
95
|
+
declare const SEARCH_ENGINES: readonly ["Googlebot", "Bingbot", "DuckDuckBot", "Slurp", "Applebot", "Baiduspider", "YandexBot"];
|
|
96
|
+
declare const presetPolicies: Record<Preset, PolicyRule[]>;
|
|
97
|
+
//#endregion
|
|
98
|
+
//#region src/serialize.d.ts
|
|
99
|
+
/** Build the full robots.txt content from resolved options */
|
|
100
|
+
declare function serialize(options: RobotsTxtOptions): string;
|
|
101
|
+
//#endregion
|
|
102
|
+
export { AI_BOTS, type KnownBot, type OneOrMany, type PolicyRule, type Preset, type RobotsTxtOptions, SEARCH_ENGINES, type UserAgent, robotsTxt as default, robotsTxt, presetPolicies, serialize };
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { Plugin } from "vite";
|
|
2
|
+
|
|
3
|
+
//#region src/types.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* vite-robots-txt — Type definitions
|
|
6
|
+
*
|
|
7
|
+
* Robots.txt spec: https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt
|
|
8
|
+
* Non-standard extensions: Crawl-delay (Bing/Yandex), Host (Yandex), Clean-param (Yandex)
|
|
9
|
+
*/
|
|
10
|
+
/** A single value or array of values — for ergonomic config */
|
|
11
|
+
type OneOrMany<T> = T | T[];
|
|
12
|
+
/** Known bot identifiers for type-safe presets */
|
|
13
|
+
type KnownBot = 'Googlebot' | 'Googlebot-Image' | 'Googlebot-News' | 'Googlebot-Video' | 'Bingbot' | 'Slurp' | 'DuckDuckBot' | 'Baiduspider' | 'YandexBot' | 'facebookexternalhit' | 'Twitterbot' | 'LinkedInBot' | 'Applebot' | 'GPTBot' | 'ChatGPT-User' | 'Claude-Web' | 'ClaudeBot' | 'Amazonbot' | 'anthropic-ai' | 'Bytespider' | 'CCBot' | 'Google-Extended' | 'PerplexityBot' | 'Cohere-ai' | 'YouBot';
|
|
14
|
+
/** User-agent string — known bots get autocomplete, but any string is valid */
|
|
15
|
+
type UserAgent = KnownBot | (string & {});
|
|
16
|
+
/** Rules for one or more user-agents */
|
|
17
|
+
interface PolicyRule {
|
|
18
|
+
/** Which user-agent(s) this rule applies to. `'*'` = all crawlers. */
|
|
19
|
+
userAgent: OneOrMany<UserAgent>;
|
|
20
|
+
/** Paths to allow crawling. Evaluated after disallow (more specific wins). */
|
|
21
|
+
allow?: OneOrMany<string>;
|
|
22
|
+
/** Paths to disallow crawling. */
|
|
23
|
+
disallow?: OneOrMany<string>;
|
|
24
|
+
/**
|
|
25
|
+
* Seconds between successive requests.
|
|
26
|
+
* Non-standard — supported by Bing, Yandex. Ignored by Google.
|
|
27
|
+
*/
|
|
28
|
+
crawlDelay?: number;
|
|
29
|
+
/** Inline comments placed above this rule group */
|
|
30
|
+
comment?: OneOrMany<string>;
|
|
31
|
+
}
|
|
32
|
+
/** Built-in presets for common configurations */
|
|
33
|
+
type Preset = /** `User-agent: * \n Allow: /` */'allowAll' /** `User-agent: * \n Disallow: /` */ | 'disallowAll' /** Block known AI/LLM training crawlers while allowing search engines */ | 'blockAI' /** Allow only major search engines (Google, Bing, DuckDuckGo, Yahoo, Apple) */ | 'searchOnly';
|
|
34
|
+
interface RobotsTxtOptions {
|
|
35
|
+
/**
|
|
36
|
+
* Start from a preset, then override with `policies`.
|
|
37
|
+
* Preset rules come first; your policies are appended.
|
|
38
|
+
*
|
|
39
|
+
* @default undefined (no preset — you define everything)
|
|
40
|
+
*/
|
|
41
|
+
preset?: Preset;
|
|
42
|
+
/**
|
|
43
|
+
* Custom policy rules. Merged after preset rules.
|
|
44
|
+
*
|
|
45
|
+
* Shorthand: pass a single `PolicyRule` instead of an array.
|
|
46
|
+
*/
|
|
47
|
+
policies?: OneOrMany<PolicyRule>;
|
|
48
|
+
/**
|
|
49
|
+
* Sitemap URL(s) — absolute URLs written as global `Sitemap:` directives.
|
|
50
|
+
*
|
|
51
|
+
* Set to `false` to explicitly suppress sitemap output.
|
|
52
|
+
* Set to `true` to auto-detect from `sitemap.xml` at the site root.
|
|
53
|
+
*
|
|
54
|
+
* @default undefined (no sitemap directive)
|
|
55
|
+
*/
|
|
56
|
+
sitemap?: OneOrMany<string> | boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Preferred host (Yandex `Host:` directive).
|
|
59
|
+
* Non-standard — only used by Yandex.
|
|
60
|
+
*
|
|
61
|
+
* @default undefined
|
|
62
|
+
*/
|
|
63
|
+
host?: string;
|
|
64
|
+
/**
|
|
65
|
+
* File name to write. Almost always `robots.txt`.
|
|
66
|
+
*
|
|
67
|
+
* @default 'robots.txt'
|
|
68
|
+
*/
|
|
69
|
+
fileName?: string;
|
|
70
|
+
/**
|
|
71
|
+
* What to do in dev/serve mode.
|
|
72
|
+
*
|
|
73
|
+
* - `'disallowAll'` — serve a `Disallow: /` robots.txt (prevent dev indexing)
|
|
74
|
+
* - `'same'` — serve the same robots.txt as build
|
|
75
|
+
* - `false` — don't serve anything in dev mode
|
|
76
|
+
*
|
|
77
|
+
* @default 'disallowAll'
|
|
78
|
+
*/
|
|
79
|
+
devMode?: 'disallowAll' | 'same' | false;
|
|
80
|
+
/**
|
|
81
|
+
* Header comment placed at the top of the file.
|
|
82
|
+
*
|
|
83
|
+
* @example 'Generated by vite-robots-txt'
|
|
84
|
+
*/
|
|
85
|
+
header?: string;
|
|
86
|
+
}
|
|
87
|
+
//#endregion
|
|
88
|
+
//#region src/plugin.d.ts
|
|
89
|
+
declare function robotsTxt(options?: RobotsTxtOptions): Plugin;
|
|
90
|
+
//#endregion
|
|
91
|
+
//#region src/presets.d.ts
|
|
92
|
+
/** AI/LLM training crawlers to block */
|
|
93
|
+
declare const AI_BOTS: readonly ["GPTBot", "ChatGPT-User", "Claude-Web", "ClaudeBot", "anthropic-ai", "Google-Extended", "PerplexityBot", "Bytespider", "CCBot", "Cohere-ai", "Amazonbot", "YouBot"];
|
|
94
|
+
/** Major search engine crawlers */
|
|
95
|
+
declare const SEARCH_ENGINES: readonly ["Googlebot", "Bingbot", "DuckDuckBot", "Slurp", "Applebot", "Baiduspider", "YandexBot"];
|
|
96
|
+
declare const presetPolicies: Record<Preset, PolicyRule[]>;
|
|
97
|
+
//#endregion
|
|
98
|
+
//#region src/serialize.d.ts
|
|
99
|
+
/** Build the full robots.txt content from resolved options */
|
|
100
|
+
declare function serialize(options: RobotsTxtOptions): string;
|
|
101
|
+
//#endregion
|
|
102
|
+
export { AI_BOTS, type KnownBot, type OneOrMany, type PolicyRule, type Preset, type RobotsTxtOptions, SEARCH_ENGINES, type UserAgent, robotsTxt as default, robotsTxt, presetPolicies, serialize };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
//#region src/presets.ts
|
|
2
|
+
/** AI/LLM training crawlers to block */
|
|
3
|
+
const AI_BOTS = [
|
|
4
|
+
"GPTBot",
|
|
5
|
+
"ChatGPT-User",
|
|
6
|
+
"Claude-Web",
|
|
7
|
+
"ClaudeBot",
|
|
8
|
+
"anthropic-ai",
|
|
9
|
+
"Google-Extended",
|
|
10
|
+
"PerplexityBot",
|
|
11
|
+
"Bytespider",
|
|
12
|
+
"CCBot",
|
|
13
|
+
"Cohere-ai",
|
|
14
|
+
"Amazonbot",
|
|
15
|
+
"YouBot"
|
|
16
|
+
];
|
|
17
|
+
/** Major search engine crawlers */
|
|
18
|
+
const SEARCH_ENGINES = [
|
|
19
|
+
"Googlebot",
|
|
20
|
+
"Bingbot",
|
|
21
|
+
"DuckDuckBot",
|
|
22
|
+
"Slurp",
|
|
23
|
+
"Applebot",
|
|
24
|
+
"Baiduspider",
|
|
25
|
+
"YandexBot"
|
|
26
|
+
];
|
|
27
|
+
const presetPolicies = {
|
|
28
|
+
allowAll: [{
|
|
29
|
+
userAgent: "*",
|
|
30
|
+
allow: "/"
|
|
31
|
+
}],
|
|
32
|
+
disallowAll: [{
|
|
33
|
+
userAgent: "*",
|
|
34
|
+
disallow: "/"
|
|
35
|
+
}],
|
|
36
|
+
blockAI: [{
|
|
37
|
+
userAgent: "*",
|
|
38
|
+
allow: "/",
|
|
39
|
+
comment: "Allow all crawlers by default"
|
|
40
|
+
}, {
|
|
41
|
+
userAgent: [...AI_BOTS],
|
|
42
|
+
disallow: "/",
|
|
43
|
+
comment: "Block AI/LLM training crawlers"
|
|
44
|
+
}],
|
|
45
|
+
searchOnly: [{
|
|
46
|
+
userAgent: "*",
|
|
47
|
+
disallow: "/",
|
|
48
|
+
comment: "Block all by default"
|
|
49
|
+
}, {
|
|
50
|
+
userAgent: [...SEARCH_ENGINES],
|
|
51
|
+
allow: "/",
|
|
52
|
+
comment: "Allow major search engines"
|
|
53
|
+
}]
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
//#endregion
|
|
57
|
+
//#region src/serialize.ts
|
|
58
|
+
/** Normalize `OneOrMany<T>` to `T[]` */
|
|
59
|
+
function toArray(value) {
|
|
60
|
+
if (value === void 0) return [];
|
|
61
|
+
return Array.isArray(value) ? value : [value];
|
|
62
|
+
}
|
|
63
|
+
/** Serialize a single policy rule group into robots.txt lines */
|
|
64
|
+
function serializePolicy(rule) {
|
|
65
|
+
const lines = [];
|
|
66
|
+
for (const c of toArray(rule.comment)) lines.push(`# ${c}`);
|
|
67
|
+
for (const ua of toArray(rule.userAgent)) lines.push(`User-agent: ${ua}`);
|
|
68
|
+
for (const path of toArray(rule.disallow)) lines.push(`Disallow: ${path}`);
|
|
69
|
+
for (const path of toArray(rule.allow)) lines.push(`Allow: ${path}`);
|
|
70
|
+
if (rule.crawlDelay !== void 0) lines.push(`Crawl-delay: ${rule.crawlDelay}`);
|
|
71
|
+
return lines.join("\n");
|
|
72
|
+
}
|
|
73
|
+
/** Build the full robots.txt content from resolved options */
|
|
74
|
+
function serialize(options) {
|
|
75
|
+
const sections = [];
|
|
76
|
+
if (options.header) sections.push(`# ${options.header}`);
|
|
77
|
+
const policies = [];
|
|
78
|
+
if (options.preset) {
|
|
79
|
+
const presetRules = presetPolicies[options.preset];
|
|
80
|
+
if (presetRules) policies.push(...presetRules);
|
|
81
|
+
}
|
|
82
|
+
for (const p of toArray(options.policies)) policies.push(p);
|
|
83
|
+
if (policies.length === 0) policies.push({
|
|
84
|
+
userAgent: "*",
|
|
85
|
+
allow: "/"
|
|
86
|
+
});
|
|
87
|
+
for (const policy of policies) sections.push(serializePolicy(policy));
|
|
88
|
+
if (options.host) sections.push(`Host: ${options.host}`);
|
|
89
|
+
if (options.sitemap && options.sitemap !== true) for (const url of toArray(options.sitemap)) sections.push(`Sitemap: ${url}`);
|
|
90
|
+
return `${sections.join("\n\n")}\n`;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
//#endregion
|
|
94
|
+
//#region src/plugin.ts
|
|
95
|
+
const PLUGIN_NAME = "vite-robots-txt";
|
|
96
|
+
const DEV_ROBOTS = "User-agent: *\nDisallow: /\n";
|
|
97
|
+
function createMiddleware(fileName, content) {
|
|
98
|
+
return (req, res, next) => {
|
|
99
|
+
if (req.url !== `/${fileName}`) return next();
|
|
100
|
+
res.setHeader("Content-Type", "text/plain");
|
|
101
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
102
|
+
res.end(content);
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function robotsTxt(options = {}) {
|
|
106
|
+
const fileName = options.fileName ?? "robots.txt";
|
|
107
|
+
const devMode = options.devMode ?? "disallowAll";
|
|
108
|
+
let siteBase = "/";
|
|
109
|
+
const devContent = devMode === "disallowAll" ? DEV_ROBOTS : serialize(options);
|
|
110
|
+
return {
|
|
111
|
+
name: PLUGIN_NAME,
|
|
112
|
+
enforce: "post",
|
|
113
|
+
configResolved(config) {
|
|
114
|
+
siteBase = config.base ?? "/";
|
|
115
|
+
},
|
|
116
|
+
configureServer(server) {
|
|
117
|
+
if (devMode === false) return;
|
|
118
|
+
server.middlewares.use(createMiddleware(fileName, devContent));
|
|
119
|
+
},
|
|
120
|
+
configurePreviewServer(server) {
|
|
121
|
+
if (devMode === false) return;
|
|
122
|
+
server.middlewares.use(createMiddleware(fileName, devContent));
|
|
123
|
+
},
|
|
124
|
+
generateBundle() {
|
|
125
|
+
const resolved = { ...options };
|
|
126
|
+
if (resolved.sitemap === true) resolved.sitemap = `${siteBase}sitemap.xml`.replace(/\/+/g, "/");
|
|
127
|
+
this.emitFile({
|
|
128
|
+
type: "asset",
|
|
129
|
+
fileName,
|
|
130
|
+
source: serialize(resolved)
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
//#endregion
|
|
137
|
+
export { AI_BOTS, SEARCH_ENGINES, robotsTxt as default, robotsTxt, presetPolicies, serialize };
|
package/package.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "vite-robots-txt",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Vite plugin to generate robots.txt with presets, per-bot rules, and dev mode blocking",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"vite",
|
|
7
|
+
"vite-plugin",
|
|
8
|
+
"robots.txt",
|
|
9
|
+
"robots",
|
|
10
|
+
"seo",
|
|
11
|
+
"crawlers",
|
|
12
|
+
"ai-crawlers"
|
|
13
|
+
],
|
|
14
|
+
"repository": {
|
|
15
|
+
"type": "git",
|
|
16
|
+
"url": "https://github.com/kjanat/vite-robots-txt"
|
|
17
|
+
},
|
|
18
|
+
"license": "MIT",
|
|
19
|
+
"author": "Kaj Kowalski",
|
|
20
|
+
"type": "module",
|
|
21
|
+
"exports": {
|
|
22
|
+
".": {
|
|
23
|
+
"import": {
|
|
24
|
+
"types": "./dist/index.d.mts",
|
|
25
|
+
"default": "./dist/index.mjs"
|
|
26
|
+
},
|
|
27
|
+
"require": {
|
|
28
|
+
"types": "./dist/index.d.cts",
|
|
29
|
+
"default": "./dist/index.cjs"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"main": "dist/index.cjs",
|
|
34
|
+
"module": "dist/index.mjs",
|
|
35
|
+
"types": "dist/index.d.mts",
|
|
36
|
+
"files": [
|
|
37
|
+
"dist"
|
|
38
|
+
],
|
|
39
|
+
"scripts": {
|
|
40
|
+
"build": "tsdown",
|
|
41
|
+
"build:pkg": "tsdown",
|
|
42
|
+
"dev": "tsdown --watch",
|
|
43
|
+
"fmt": "dprint fmt",
|
|
44
|
+
"lint": "biome check src/",
|
|
45
|
+
"tar": "TARBALL=$(bun pm pack --quiet | tr -d '\\n'); echo \"tar=${TARBALL}\" >> ${GITHUB_OUTPUT:-/dev/stdout}",
|
|
46
|
+
"test": "vitest run",
|
|
47
|
+
"test:watch": "vitest",
|
|
48
|
+
"typecheck": "tsc --noEmit"
|
|
49
|
+
},
|
|
50
|
+
"devDependencies": {
|
|
51
|
+
"@biomejs/biome": "^2.4.4",
|
|
52
|
+
"@types/node": "^25.3.3",
|
|
53
|
+
"dprint": "^0.52.0",
|
|
54
|
+
"tsdown": "^0.21.0-beta.2",
|
|
55
|
+
"typescript": "^5.9.3",
|
|
56
|
+
"vite": "^7.3.1",
|
|
57
|
+
"vitest": "^4.0.18"
|
|
58
|
+
},
|
|
59
|
+
"peerDependencies": {
|
|
60
|
+
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
|
|
61
|
+
}
|
|
62
|
+
}
|