vite-robots-txt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kaj Kowalski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,135 @@
1
+ # vite-robots-txt
2
+
3
+ Vite plugin to generate `robots.txt` with presets, per-bot rules, and dev mode blocking.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ bun add -d vite-robots-txt
9
+ # or
10
+ npm install -D vite-robots-txt
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```ts
16
+ // vite.config.ts
17
+ import { defineConfig } from 'vite';
18
+ import robotsTxt from 'vite-robots-txt';
19
+
20
+ export default defineConfig({
21
+ plugins: [
22
+ robotsTxt({ preset: 'allowAll' }),
23
+ ],
24
+ });
25
+ ```
26
+
27
+ ## Presets
28
+
29
+ | Preset | Description |
30
+ | ------------- | ---------------------------------------------------- |
31
+ | `allowAll` | Allow all crawlers |
32
+ | `disallowAll` | Block all crawlers |
33
+ | `blockAI` | Allow search engines, block AI/LLM training crawlers |
34
+ | `searchOnly` | Allow only major search engines |
35
+
36
+ ### Block AI crawlers
37
+
38
+ ```ts
39
+ robotsTxt({ preset: 'blockAI' });
40
+ ```
41
+
42
+ Generates:
43
+
44
+ ```txt
45
+ # Allow all crawlers by default
46
+ User-agent: *
47
+ Allow: /
48
+
49
+ # Block AI/LLM training crawlers
50
+ User-agent: GPTBot
51
+ User-agent: ChatGPT-User
52
+ User-agent: Claude-Web
53
+ User-agent: ClaudeBot
54
+ User-agent: anthropic-ai
55
+ User-agent: Google-Extended
56
+ User-agent: PerplexityBot
57
+ User-agent: Bytespider
58
+ User-agent: CCBot
59
+ User-agent: Cohere-ai
60
+ User-agent: Amazonbot
61
+ User-agent: YouBot
62
+ Disallow: /
63
+ ```
64
+
65
+ ## Custom policies
66
+
67
+ ```ts
68
+ robotsTxt({
69
+ policies: [
70
+ { userAgent: '*', allow: '/', disallow: ['/admin', '/api'] },
71
+ { userAgent: 'GPTBot', disallow: '/' },
72
+ ],
73
+ sitemap: 'https://example.com/sitemap.xml',
74
+ });
75
+ ```
76
+
77
+ ### Merge preset + custom rules
78
+
79
+ ```ts
80
+ robotsTxt({
81
+ preset: 'blockAI',
82
+ policies: { userAgent: 'Baiduspider', disallow: '/', crawlDelay: 10 },
83
+ });
84
+ ```
85
+
86
+ ## Options
87
+
88
+ | Option | Type | Default | Description |
89
+ | ---------- | ---------------------------------------------------------- | --------------- | ---------------------------------------- |
90
+ | `preset` | `'allowAll' \| 'disallowAll' \| 'blockAI' \| 'searchOnly'` | — | Start from a preset |
91
+ | `policies` | `PolicyRule \| PolicyRule[]` | — | Custom policy rules |
92
+ | `sitemap` | `string \| string[] \| boolean` | — | Sitemap URL(s) or `true` for auto-detect |
93
+ | `host` | `string` | — | Yandex `Host:` directive |
94
+ | `fileName` | `string` | `'robots.txt'` | Output file name |
95
+ | `devMode` | `'disallowAll' \| 'same' \| false` | `'disallowAll'` | Dev server behavior |
96
+ | `header` | `string` | — | Comment at top of file |
97
+
98
+ ### PolicyRule
99
+
100
+ | Field | Type | Description |
101
+ | ------------ | -------------------- | -------------------------------------- |
102
+ | `userAgent` | `string \| string[]` | Bot name(s), `'*'` for all |
103
+ | `allow` | `string \| string[]` | Paths to allow |
104
+ | `disallow` | `string \| string[]` | Paths to disallow |
105
+ | `crawlDelay` | `number` | Seconds between requests (Bing/Yandex) |
106
+ | `comment` | `string \| string[]` | Comments above the rule group |
107
+
108
+ ## Dev mode
109
+
110
+ By default, the plugin serves a `Disallow: /` robots.txt during development to prevent indexing of your dev server. Set `devMode: 'same'` to serve the same config as production, or `false` to disable.
111
+
112
+ ## Standalone serializer
113
+
114
+ ```ts
115
+ import { serialize } from 'vite-robots-txt';
116
+
117
+ const txt = serialize({
118
+ preset: 'blockAI',
119
+ sitemap: 'https://example.com/sitemap.xml',
120
+ });
121
+ ```
122
+
123
+ ## Exports
124
+
125
+ | Export | Description |
126
+ | --------------------- | ---------------------------------------- |
127
+ | `robotsTxt` (default) | Vite plugin factory |
128
+ | `serialize` | Standalone robots.txt serializer |
129
+ | `AI_BOTS` | Array of known AI crawler user-agents |
130
+ | `SEARCH_ENGINES` | Array of major search engine user-agents |
131
+ | `presetPolicies` | Preset policy definitions |
132
+
133
+ ## License
134
+
135
+ MIT
package/dist/index.cjs ADDED
@@ -0,0 +1,144 @@
1
+ Object.defineProperties(exports, { __esModule: { value: true }, [Symbol.toStringTag]: { value: 'Module' } });
2
+
3
+ //#region src/presets.ts
4
+ /** AI/LLM training crawlers to block */
5
+ const AI_BOTS = [
6
+ "GPTBot",
7
+ "ChatGPT-User",
8
+ "Claude-Web",
9
+ "ClaudeBot",
10
+ "anthropic-ai",
11
+ "Google-Extended",
12
+ "PerplexityBot",
13
+ "Bytespider",
14
+ "CCBot",
15
+ "Cohere-ai",
16
+ "Amazonbot",
17
+ "YouBot"
18
+ ];
19
+ /** Major search engine crawlers */
20
+ const SEARCH_ENGINES = [
21
+ "Googlebot",
22
+ "Bingbot",
23
+ "DuckDuckBot",
24
+ "Slurp",
25
+ "Applebot",
26
+ "Baiduspider",
27
+ "YandexBot"
28
+ ];
29
+ const presetPolicies = {
30
+ allowAll: [{
31
+ userAgent: "*",
32
+ allow: "/"
33
+ }],
34
+ disallowAll: [{
35
+ userAgent: "*",
36
+ disallow: "/"
37
+ }],
38
+ blockAI: [{
39
+ userAgent: "*",
40
+ allow: "/",
41
+ comment: "Allow all crawlers by default"
42
+ }, {
43
+ userAgent: [...AI_BOTS],
44
+ disallow: "/",
45
+ comment: "Block AI/LLM training crawlers"
46
+ }],
47
+ searchOnly: [{
48
+ userAgent: "*",
49
+ disallow: "/",
50
+ comment: "Block all by default"
51
+ }, {
52
+ userAgent: [...SEARCH_ENGINES],
53
+ allow: "/",
54
+ comment: "Allow major search engines"
55
+ }]
56
+ };
57
+
58
+ //#endregion
59
+ //#region src/serialize.ts
60
+ /** Normalize `OneOrMany<T>` to `T[]` */
61
+ function toArray(value) {
62
+ if (value === void 0) return [];
63
+ return Array.isArray(value) ? value : [value];
64
+ }
65
+ /** Serialize a single policy rule group into robots.txt lines */
66
+ function serializePolicy(rule) {
67
+ const lines = [];
68
+ for (const c of toArray(rule.comment)) lines.push(`# ${c}`);
69
+ for (const ua of toArray(rule.userAgent)) lines.push(`User-agent: ${ua}`);
70
+ for (const path of toArray(rule.disallow)) lines.push(`Disallow: ${path}`);
71
+ for (const path of toArray(rule.allow)) lines.push(`Allow: ${path}`);
72
+ if (rule.crawlDelay !== void 0) lines.push(`Crawl-delay: ${rule.crawlDelay}`);
73
+ return lines.join("\n");
74
+ }
75
+ /** Build the full robots.txt content from resolved options */
76
+ function serialize(options) {
77
+ const sections = [];
78
+ if (options.header) sections.push(`# ${options.header}`);
79
+ const policies = [];
80
+ if (options.preset) {
81
+ const presetRules = presetPolicies[options.preset];
82
+ if (presetRules) policies.push(...presetRules);
83
+ }
84
+ for (const p of toArray(options.policies)) policies.push(p);
85
+ if (policies.length === 0) policies.push({
86
+ userAgent: "*",
87
+ allow: "/"
88
+ });
89
+ for (const policy of policies) sections.push(serializePolicy(policy));
90
+ if (options.host) sections.push(`Host: ${options.host}`);
91
+ if (options.sitemap && options.sitemap !== true) for (const url of toArray(options.sitemap)) sections.push(`Sitemap: ${url}`);
92
+ return `${sections.join("\n\n")}\n`;
93
+ }
94
+
95
+ //#endregion
96
+ //#region src/plugin.ts
97
+ const PLUGIN_NAME = "vite-robots-txt";
98
+ const DEV_ROBOTS = "User-agent: *\nDisallow: /\n";
99
+ function createMiddleware(fileName, content) {
100
+ return (req, res, next) => {
101
+ if (req.url !== `/${fileName}`) return next();
102
+ res.setHeader("Content-Type", "text/plain");
103
+ res.setHeader("Cache-Control", "no-cache");
104
+ res.end(content);
105
+ };
106
+ }
107
+ function robotsTxt(options = {}) {
108
+ const fileName = options.fileName ?? "robots.txt";
109
+ const devMode = options.devMode ?? "disallowAll";
110
+ let siteBase = "/";
111
+ const devContent = devMode === "disallowAll" ? DEV_ROBOTS : serialize(options);
112
+ return {
113
+ name: PLUGIN_NAME,
114
+ enforce: "post",
115
+ configResolved(config) {
116
+ siteBase = config.base ?? "/";
117
+ },
118
+ configureServer(server) {
119
+ if (devMode === false) return;
120
+ server.middlewares.use(createMiddleware(fileName, devContent));
121
+ },
122
+ configurePreviewServer(server) {
123
+ if (devMode === false) return;
124
+ server.middlewares.use(createMiddleware(fileName, devContent));
125
+ },
126
+ generateBundle() {
127
+ const resolved = { ...options };
128
+ if (resolved.sitemap === true) resolved.sitemap = `${siteBase}sitemap.xml`.replace(/\/+/g, "/");
129
+ this.emitFile({
130
+ type: "asset",
131
+ fileName,
132
+ source: serialize(resolved)
133
+ });
134
+ }
135
+ };
136
+ }
137
+
138
+ //#endregion
139
+ exports.AI_BOTS = AI_BOTS;
140
+ exports.SEARCH_ENGINES = SEARCH_ENGINES;
141
+ exports.default = robotsTxt;
142
+ exports.presetPolicies = presetPolicies;
143
+ exports.robotsTxt = robotsTxt;
144
+ exports.serialize = serialize;
@@ -0,0 +1,102 @@
1
+ import { Plugin } from "vite";
2
+
3
+ //#region src/types.d.ts
4
+ /**
5
+ * vite-robots-txt — Type definitions
6
+ *
7
+ * Robots.txt spec: https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt
8
+ * Non-standard extensions: Crawl-delay (Bing/Yandex), Host (Yandex), Clean-param (Yandex)
9
+ */
10
+ /** A single value or array of values — for ergonomic config */
11
+ type OneOrMany<T> = T | T[];
12
+ /** Known bot identifiers for type-safe presets */
13
+ type KnownBot = 'Googlebot' | 'Googlebot-Image' | 'Googlebot-News' | 'Googlebot-Video' | 'Bingbot' | 'Slurp' | 'DuckDuckBot' | 'Baiduspider' | 'YandexBot' | 'facebookexternalhit' | 'Twitterbot' | 'LinkedInBot' | 'Applebot' | 'GPTBot' | 'ChatGPT-User' | 'Claude-Web' | 'ClaudeBot' | 'Amazonbot' | 'anthropic-ai' | 'Bytespider' | 'CCBot' | 'Google-Extended' | 'PerplexityBot' | 'Cohere-ai' | 'YouBot';
14
+ /** User-agent string — known bots get autocomplete, but any string is valid */
15
+ type UserAgent = KnownBot | (string & {});
16
+ /** Rules for one or more user-agents */
17
+ interface PolicyRule {
18
+ /** Which user-agent(s) this rule applies to. `'*'` = all crawlers. */
19
+ userAgent: OneOrMany<UserAgent>;
20
+ /** Paths to allow crawling. Evaluated after disallow (more specific wins). */
21
+ allow?: OneOrMany<string>;
22
+ /** Paths to disallow crawling. */
23
+ disallow?: OneOrMany<string>;
24
+ /**
25
+ * Seconds between successive requests.
26
+ * Non-standard — supported by Bing, Yandex. Ignored by Google.
27
+ */
28
+ crawlDelay?: number;
29
+ /** Inline comments placed above this rule group */
30
+ comment?: OneOrMany<string>;
31
+ }
32
+ /** Built-in presets for common configurations */
33
+ type Preset = /** `User-agent: * \n Allow: /` */'allowAll' /** `User-agent: * \n Disallow: /` */ | 'disallowAll' /** Block known AI/LLM training crawlers while allowing search engines */ | 'blockAI' /** Allow only major search engines (Google, Bing, DuckDuckGo, Yahoo, Apple) */ | 'searchOnly';
34
+ interface RobotsTxtOptions {
35
+ /**
36
+ * Start from a preset, then override with `policies`.
37
+ * Preset rules come first; your policies are appended.
38
+ *
39
+ * @default undefined (no preset — you define everything)
40
+ */
41
+ preset?: Preset;
42
+ /**
43
+ * Custom policy rules. Merged after preset rules.
44
+ *
45
+ * Shorthand: pass a single `PolicyRule` instead of an array.
46
+ */
47
+ policies?: OneOrMany<PolicyRule>;
48
+ /**
49
+ * Sitemap URL(s) — absolute URLs written as global `Sitemap:` directives.
50
+ *
51
+ * Set to `false` to explicitly suppress sitemap output.
52
+ * Set to `true` to auto-detect from `sitemap.xml` at the site root.
53
+ *
54
+ * @default undefined (no sitemap directive)
55
+ */
56
+ sitemap?: OneOrMany<string> | boolean;
57
+ /**
58
+ * Preferred host (Yandex `Host:` directive).
59
+ * Non-standard — only used by Yandex.
60
+ *
61
+ * @default undefined
62
+ */
63
+ host?: string;
64
+ /**
65
+ * File name to write. Almost always `robots.txt`.
66
+ *
67
+ * @default 'robots.txt'
68
+ */
69
+ fileName?: string;
70
+ /**
71
+ * What to do in dev/serve mode.
72
+ *
73
+ * - `'disallowAll'` — serve a `Disallow: /` robots.txt (prevent dev indexing)
74
+ * - `'same'` — serve the same robots.txt as build
75
+ * - `false` — don't serve anything in dev mode
76
+ *
77
+ * @default 'disallowAll'
78
+ */
79
+ devMode?: 'disallowAll' | 'same' | false;
80
+ /**
81
+ * Header comment placed at the top of the file.
82
+ *
83
+ * @example 'Generated by vite-robots-txt'
84
+ */
85
+ header?: string;
86
+ }
87
+ //#endregion
88
+ //#region src/plugin.d.ts
89
+ declare function robotsTxt(options?: RobotsTxtOptions): Plugin;
90
+ //#endregion
91
+ //#region src/presets.d.ts
92
+ /** AI/LLM training crawlers to block */
93
+ declare const AI_BOTS: readonly ["GPTBot", "ChatGPT-User", "Claude-Web", "ClaudeBot", "anthropic-ai", "Google-Extended", "PerplexityBot", "Bytespider", "CCBot", "Cohere-ai", "Amazonbot", "YouBot"];
94
+ /** Major search engine crawlers */
95
+ declare const SEARCH_ENGINES: readonly ["Googlebot", "Bingbot", "DuckDuckBot", "Slurp", "Applebot", "Baiduspider", "YandexBot"];
96
+ declare const presetPolicies: Record<Preset, PolicyRule[]>;
97
+ //#endregion
98
+ //#region src/serialize.d.ts
99
+ /** Build the full robots.txt content from resolved options */
100
+ declare function serialize(options: RobotsTxtOptions): string;
101
+ //#endregion
102
+ export { AI_BOTS, type KnownBot, type OneOrMany, type PolicyRule, type Preset, type RobotsTxtOptions, SEARCH_ENGINES, type UserAgent, robotsTxt as default, robotsTxt, presetPolicies, serialize };
@@ -0,0 +1,102 @@
1
+ import { Plugin } from "vite";
2
+
3
+ //#region src/types.d.ts
4
+ /**
5
+ * vite-robots-txt — Type definitions
6
+ *
7
+ * Robots.txt spec: https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt
8
+ * Non-standard extensions: Crawl-delay (Bing/Yandex), Host (Yandex), Clean-param (Yandex)
9
+ */
10
+ /** A single value or array of values — for ergonomic config */
11
+ type OneOrMany<T> = T | T[];
12
+ /** Known bot identifiers for type-safe presets */
13
+ type KnownBot = 'Googlebot' | 'Googlebot-Image' | 'Googlebot-News' | 'Googlebot-Video' | 'Bingbot' | 'Slurp' | 'DuckDuckBot' | 'Baiduspider' | 'YandexBot' | 'facebookexternalhit' | 'Twitterbot' | 'LinkedInBot' | 'Applebot' | 'GPTBot' | 'ChatGPT-User' | 'Claude-Web' | 'ClaudeBot' | 'Amazonbot' | 'anthropic-ai' | 'Bytespider' | 'CCBot' | 'Google-Extended' | 'PerplexityBot' | 'Cohere-ai' | 'YouBot';
14
+ /** User-agent string — known bots get autocomplete, but any string is valid */
15
+ type UserAgent = KnownBot | (string & {});
16
+ /** Rules for one or more user-agents */
17
+ interface PolicyRule {
18
+ /** Which user-agent(s) this rule applies to. `'*'` = all crawlers. */
19
+ userAgent: OneOrMany<UserAgent>;
20
+ /** Paths to allow crawling. Evaluated after disallow (more specific wins). */
21
+ allow?: OneOrMany<string>;
22
+ /** Paths to disallow crawling. */
23
+ disallow?: OneOrMany<string>;
24
+ /**
25
+ * Seconds between successive requests.
26
+ * Non-standard — supported by Bing, Yandex. Ignored by Google.
27
+ */
28
+ crawlDelay?: number;
29
+ /** Inline comments placed above this rule group */
30
+ comment?: OneOrMany<string>;
31
+ }
32
+ /** Built-in presets for common configurations */
33
+ type Preset = /** `User-agent: * \n Allow: /` */'allowAll' /** `User-agent: * \n Disallow: /` */ | 'disallowAll' /** Block known AI/LLM training crawlers while allowing search engines */ | 'blockAI' /** Allow only major search engines (Google, Bing, DuckDuckGo, Yahoo, Apple) */ | 'searchOnly';
34
+ interface RobotsTxtOptions {
35
+ /**
36
+ * Start from a preset, then override with `policies`.
37
+ * Preset rules come first; your policies are appended.
38
+ *
39
+ * @default undefined (no preset — you define everything)
40
+ */
41
+ preset?: Preset;
42
+ /**
43
+ * Custom policy rules. Merged after preset rules.
44
+ *
45
+ * Shorthand: pass a single `PolicyRule` instead of an array.
46
+ */
47
+ policies?: OneOrMany<PolicyRule>;
48
+ /**
49
+ * Sitemap URL(s) — absolute URLs written as global `Sitemap:` directives.
50
+ *
51
+ * Set to `false` to explicitly suppress sitemap output.
52
+ * Set to `true` to auto-detect from `sitemap.xml` at the site root.
53
+ *
54
+ * @default undefined (no sitemap directive)
55
+ */
56
+ sitemap?: OneOrMany<string> | boolean;
57
+ /**
58
+ * Preferred host (Yandex `Host:` directive).
59
+ * Non-standard — only used by Yandex.
60
+ *
61
+ * @default undefined
62
+ */
63
+ host?: string;
64
+ /**
65
+ * File name to write. Almost always `robots.txt`.
66
+ *
67
+ * @default 'robots.txt'
68
+ */
69
+ fileName?: string;
70
+ /**
71
+ * What to do in dev/serve mode.
72
+ *
73
+ * - `'disallowAll'` — serve a `Disallow: /` robots.txt (prevent dev indexing)
74
+ * - `'same'` — serve the same robots.txt as build
75
+ * - `false` — don't serve anything in dev mode
76
+ *
77
+ * @default 'disallowAll'
78
+ */
79
+ devMode?: 'disallowAll' | 'same' | false;
80
+ /**
81
+ * Header comment placed at the top of the file.
82
+ *
83
+ * @example 'Generated by vite-robots-txt'
84
+ */
85
+ header?: string;
86
+ }
87
+ //#endregion
88
+ //#region src/plugin.d.ts
89
+ declare function robotsTxt(options?: RobotsTxtOptions): Plugin;
90
+ //#endregion
91
+ //#region src/presets.d.ts
92
+ /** AI/LLM training crawlers to block */
93
+ declare const AI_BOTS: readonly ["GPTBot", "ChatGPT-User", "Claude-Web", "ClaudeBot", "anthropic-ai", "Google-Extended", "PerplexityBot", "Bytespider", "CCBot", "Cohere-ai", "Amazonbot", "YouBot"];
94
+ /** Major search engine crawlers */
95
+ declare const SEARCH_ENGINES: readonly ["Googlebot", "Bingbot", "DuckDuckBot", "Slurp", "Applebot", "Baiduspider", "YandexBot"];
96
+ declare const presetPolicies: Record<Preset, PolicyRule[]>;
97
+ //#endregion
98
+ //#region src/serialize.d.ts
99
+ /** Build the full robots.txt content from resolved options */
100
+ declare function serialize(options: RobotsTxtOptions): string;
101
+ //#endregion
102
+ export { AI_BOTS, type KnownBot, type OneOrMany, type PolicyRule, type Preset, type RobotsTxtOptions, SEARCH_ENGINES, type UserAgent, robotsTxt as default, robotsTxt, presetPolicies, serialize };
package/dist/index.mjs ADDED
@@ -0,0 +1,137 @@
1
+ //#region src/presets.ts
2
+ /** AI/LLM training crawlers to block */
3
+ const AI_BOTS = [
4
+ "GPTBot",
5
+ "ChatGPT-User",
6
+ "Claude-Web",
7
+ "ClaudeBot",
8
+ "anthropic-ai",
9
+ "Google-Extended",
10
+ "PerplexityBot",
11
+ "Bytespider",
12
+ "CCBot",
13
+ "Cohere-ai",
14
+ "Amazonbot",
15
+ "YouBot"
16
+ ];
17
+ /** Major search engine crawlers */
18
+ const SEARCH_ENGINES = [
19
+ "Googlebot",
20
+ "Bingbot",
21
+ "DuckDuckBot",
22
+ "Slurp",
23
+ "Applebot",
24
+ "Baiduspider",
25
+ "YandexBot"
26
+ ];
27
+ const presetPolicies = {
28
+ allowAll: [{
29
+ userAgent: "*",
30
+ allow: "/"
31
+ }],
32
+ disallowAll: [{
33
+ userAgent: "*",
34
+ disallow: "/"
35
+ }],
36
+ blockAI: [{
37
+ userAgent: "*",
38
+ allow: "/",
39
+ comment: "Allow all crawlers by default"
40
+ }, {
41
+ userAgent: [...AI_BOTS],
42
+ disallow: "/",
43
+ comment: "Block AI/LLM training crawlers"
44
+ }],
45
+ searchOnly: [{
46
+ userAgent: "*",
47
+ disallow: "/",
48
+ comment: "Block all by default"
49
+ }, {
50
+ userAgent: [...SEARCH_ENGINES],
51
+ allow: "/",
52
+ comment: "Allow major search engines"
53
+ }]
54
+ };
55
+
56
+ //#endregion
57
+ //#region src/serialize.ts
58
+ /** Normalize `OneOrMany<T>` to `T[]` */
59
+ function toArray(value) {
60
+ if (value === void 0) return [];
61
+ return Array.isArray(value) ? value : [value];
62
+ }
63
+ /** Serialize a single policy rule group into robots.txt lines */
64
+ function serializePolicy(rule) {
65
+ const lines = [];
66
+ for (const c of toArray(rule.comment)) lines.push(`# ${c}`);
67
+ for (const ua of toArray(rule.userAgent)) lines.push(`User-agent: ${ua}`);
68
+ for (const path of toArray(rule.disallow)) lines.push(`Disallow: ${path}`);
69
+ for (const path of toArray(rule.allow)) lines.push(`Allow: ${path}`);
70
+ if (rule.crawlDelay !== void 0) lines.push(`Crawl-delay: ${rule.crawlDelay}`);
71
+ return lines.join("\n");
72
+ }
73
+ /** Build the full robots.txt content from resolved options */
74
+ function serialize(options) {
75
+ const sections = [];
76
+ if (options.header) sections.push(`# ${options.header}`);
77
+ const policies = [];
78
+ if (options.preset) {
79
+ const presetRules = presetPolicies[options.preset];
80
+ if (presetRules) policies.push(...presetRules);
81
+ }
82
+ for (const p of toArray(options.policies)) policies.push(p);
83
+ if (policies.length === 0) policies.push({
84
+ userAgent: "*",
85
+ allow: "/"
86
+ });
87
+ for (const policy of policies) sections.push(serializePolicy(policy));
88
+ if (options.host) sections.push(`Host: ${options.host}`);
89
+ if (options.sitemap && options.sitemap !== true) for (const url of toArray(options.sitemap)) sections.push(`Sitemap: ${url}`);
90
+ return `${sections.join("\n\n")}\n`;
91
+ }
92
+
93
+ //#endregion
94
+ //#region src/plugin.ts
95
+ const PLUGIN_NAME = "vite-robots-txt";
96
+ const DEV_ROBOTS = "User-agent: *\nDisallow: /\n";
97
+ function createMiddleware(fileName, content) {
98
+ return (req, res, next) => {
99
+ if (req.url !== `/${fileName}`) return next();
100
+ res.setHeader("Content-Type", "text/plain");
101
+ res.setHeader("Cache-Control", "no-cache");
102
+ res.end(content);
103
+ };
104
+ }
105
+ function robotsTxt(options = {}) {
106
+ const fileName = options.fileName ?? "robots.txt";
107
+ const devMode = options.devMode ?? "disallowAll";
108
+ let siteBase = "/";
109
+ const devContent = devMode === "disallowAll" ? DEV_ROBOTS : serialize(options);
110
+ return {
111
+ name: PLUGIN_NAME,
112
+ enforce: "post",
113
+ configResolved(config) {
114
+ siteBase = config.base ?? "/";
115
+ },
116
+ configureServer(server) {
117
+ if (devMode === false) return;
118
+ server.middlewares.use(createMiddleware(fileName, devContent));
119
+ },
120
+ configurePreviewServer(server) {
121
+ if (devMode === false) return;
122
+ server.middlewares.use(createMiddleware(fileName, devContent));
123
+ },
124
+ generateBundle() {
125
+ const resolved = { ...options };
126
+ if (resolved.sitemap === true) resolved.sitemap = `${siteBase}sitemap.xml`.replace(/\/+/g, "/");
127
+ this.emitFile({
128
+ type: "asset",
129
+ fileName,
130
+ source: serialize(resolved)
131
+ });
132
+ }
133
+ };
134
+ }
135
+
136
+ //#endregion
137
+ export { AI_BOTS, SEARCH_ENGINES, robotsTxt as default, robotsTxt, presetPolicies, serialize };
package/package.json ADDED
@@ -0,0 +1,62 @@
1
+ {
2
+ "name": "vite-robots-txt",
3
+ "version": "0.1.0",
4
+ "description": "Vite plugin to generate robots.txt with presets, per-bot rules, and dev mode blocking",
5
+ "keywords": [
6
+ "vite",
7
+ "vite-plugin",
8
+ "robots.txt",
9
+ "robots",
10
+ "seo",
11
+ "crawlers",
12
+ "ai-crawlers"
13
+ ],
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "https://github.com/kjanat/vite-robots-txt"
17
+ },
18
+ "license": "MIT",
19
+ "author": "Kaj Kowalski",
20
+ "type": "module",
21
+ "exports": {
22
+ ".": {
23
+ "import": {
24
+ "types": "./dist/index.d.mts",
25
+ "default": "./dist/index.mjs"
26
+ },
27
+ "require": {
28
+ "types": "./dist/index.d.cts",
29
+ "default": "./dist/index.cjs"
30
+ }
31
+ }
32
+ },
33
+ "main": "dist/index.cjs",
34
+ "module": "dist/index.mjs",
35
+ "types": "dist/index.d.mts",
36
+ "files": [
37
+ "dist"
38
+ ],
39
+ "scripts": {
40
+ "build": "tsdown",
41
+ "build:pkg": "tsdown",
42
+ "dev": "tsdown --watch",
43
+ "fmt": "dprint fmt",
44
+ "lint": "biome check src/",
45
+ "tar": "TARBALL=$(bun pm pack --quiet | tr -d '\\n'); echo \"tar=${TARBALL}\" >> ${GITHUB_OUTPUT:-/dev/stdout}",
46
+ "test": "vitest run",
47
+ "test:watch": "vitest",
48
+ "typecheck": "tsc --noEmit"
49
+ },
50
+ "devDependencies": {
51
+ "@biomejs/biome": "^2.4.4",
52
+ "@types/node": "^25.3.3",
53
+ "dprint": "^0.52.0",
54
+ "tsdown": "^0.21.0-beta.2",
55
+ "typescript": "^5.9.3",
56
+ "vite": "^7.3.1",
57
+ "vitest": "^4.0.18"
58
+ },
59
+ "peerDependencies": {
60
+ "vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
61
+ }
62
+ }