@hkdigital/lib-core 0.5.59 → 0.5.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- /** @typedef {import('./typedef.js').RobotsConfig} RobotsConfig */
2
1
  /**
3
2
  * Check if hostname matches allowed hosts pattern
4
3
  *
@@ -1,5 +1,58 @@
1
1
  /** @typedef {import('./typedef.js').RobotsConfig} RobotsConfig */
2
2
 
3
+ /**
4
+ * AI training bots (scrape content for model training)
5
+ */
6
+ const AI_TRAINING_BOTS = [
7
+ 'GPTBot', // OpenAI training
8
+ 'Google-Extended', // Google AI training
9
+ 'CCBot', // Common Crawl
10
+ 'anthropic-ai', // Anthropic training
11
+ 'Omgilibot', // Omgili
12
+ 'FacebookBot', // Meta AI training
13
+ 'Diffbot', // Diffbot
14
+ 'Bytespider', // ByteDance
15
+ 'ImagesiftBot', // ImageSift
16
+ 'PerplexityBot', // Perplexity AI training
17
+ 'YouBot', // You.com training
18
+ 'Applebot-Extended' // Apple AI training
19
+ ];
20
+
21
+ /**
22
+ * AI reading bots (retrieve content for user queries)
23
+ */
24
+ const AI_READING_BOTS = [
25
+ 'ChatGPT-User', // ChatGPT browsing
26
+ 'Claude-Web', // Claude browsing
27
+ 'cohere-ai' // Cohere browsing
28
+ ];
29
+
30
+ /**
31
+ * Generate user-agent blocks for AI bots
32
+ *
33
+ * @param {boolean} allowAiTraining - Allow AI training bots
34
+ * @param {boolean} allowAiReading - Allow AI reading bots
35
+ *
36
+ * @returns {string} User-agent blocks
37
+ */
38
+ function generateAiBotBlocks(allowAiTraining, allowAiReading) {
39
+ let blocks = '';
40
+
41
+ if (allowAiTraining === false) {
42
+ blocks += AI_TRAINING_BOTS.map(bot =>
43
+ `\n\nUser-agent: ${bot}\nDisallow: /`
44
+ ).join('');
45
+ }
46
+
47
+ if (allowAiReading === false) {
48
+ blocks += AI_READING_BOTS.map(bot =>
49
+ `\n\nUser-agent: ${bot}\nDisallow: /`
50
+ ).join('');
51
+ }
52
+
53
+ return blocks;
54
+ }
55
+
3
56
  /**
4
57
  * Check if hostname matches allowed hosts pattern
5
58
  *
@@ -74,10 +127,15 @@ export function generateRobotsTxt(url, config = {}) {
74
127
  });
75
128
  }
76
129
 
77
- // Always add sitemap reference
130
+ // Add sitemap reference
78
131
  if (url.origin) {
79
132
  content += `\nSitemap: ${url.origin}/sitemap.xml`;
80
133
  }
81
134
 
135
+ // Add AI bot blocks (defaults to allowing both if not specified)
136
+ const allowAiTraining = config.allowAiTraining !== false;
137
+ const allowAiReading = config.allowAiReading !== false;
138
+ content += generateAiBotBlocks(allowAiTraining, allowAiReading);
139
+
82
140
  return content;
83
141
  }
@@ -10,4 +10,14 @@ export type RobotsConfig = {
10
10
  * Paths to block from indexing (e.g., '/admin', '/api/*')
11
11
  */
12
12
  disallowedPaths?: string[] | undefined;
13
+ /**
14
+ * Allow AI training bots to crawl content for model training.
15
+ * Set to false to block bots like GPTBot, Google-Extended, CCBot, etc.
16
+ */
17
+ allowAiTraining?: boolean | undefined;
18
+ /**
19
+ * Allow AI assistants/chatbots to read content for user responses.
20
+ * Set to false to block bots like ChatGPT-User, Claude-Web, etc.
21
+ */
22
+ allowAiReading?: boolean | undefined;
13
23
  };
@@ -5,6 +5,12 @@
5
5
  * Supports wildcards (e.g., '*.example.com')
6
6
  * @property {string[]} [disallowedPaths]
7
7
  * Paths to block from indexing (e.g., '/admin', '/api/*')
8
+ * @property {boolean} [allowAiTraining=true]
9
+ * Allow AI training bots to crawl content for model training.
10
+ * Set to false to block bots like GPTBot, Google-Extended, CCBot, etc.
11
+ * @property {boolean} [allowAiReading=true]
12
+ * Allow AI assistants/chatbots to read content for user responses.
13
+ * Set to false to block bots like ChatGPT-User, Claude-Web, etc.
8
14
  */
9
15
 
10
16
  export default {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hkdigital/lib-core",
3
- "version": "0.5.59",
3
+ "version": "0.5.60",
4
4
  "author": {
5
5
  "name": "HKdigital",
6
6
  "url": "https://hkdigital.nl"