@hkdigital/lib-core 0.5.59 → 0.5.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,5 +1,58 @@
|
|
|
1
1
|
/** @typedef {import('./typedef.js').RobotsConfig} RobotsConfig */
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* AI training bots (scrape content for model training)
|
|
5
|
+
*/
|
|
6
|
+
const AI_TRAINING_BOTS = [
|
|
7
|
+
'GPTBot', // OpenAI training
|
|
8
|
+
'Google-Extended', // Google AI training
|
|
9
|
+
'CCBot', // Common Crawl
|
|
10
|
+
'anthropic-ai', // Anthropic training
|
|
11
|
+
'Omgilibot', // Omgili
|
|
12
|
+
'FacebookBot', // Meta AI training
|
|
13
|
+
'Diffbot', // Diffbot
|
|
14
|
+
'Bytespider', // ByteDance
|
|
15
|
+
'ImagesiftBot', // ImageSift
|
|
16
|
+
'PerplexityBot', // Perplexity AI training
|
|
17
|
+
'YouBot', // You.com training
|
|
18
|
+
'Applebot-Extended' // Apple AI training
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* AI reading bots (retrieve content for user queries)
|
|
23
|
+
*/
|
|
24
|
+
const AI_READING_BOTS = [
|
|
25
|
+
'ChatGPT-User', // ChatGPT browsing
|
|
26
|
+
'Claude-Web', // Claude browsing
|
|
27
|
+
'cohere-ai' // Cohere browsing
|
|
28
|
+
];
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Generate user-agent blocks for AI bots
|
|
32
|
+
*
|
|
33
|
+
* @param {boolean} allowAiTraining - Allow AI training bots
|
|
34
|
+
* @param {boolean} allowAiReading - Allow AI reading bots
|
|
35
|
+
*
|
|
36
|
+
* @returns {string} User-agent blocks
|
|
37
|
+
*/
|
|
38
|
+
function generateAiBotBlocks(allowAiTraining, allowAiReading) {
|
|
39
|
+
let blocks = '';
|
|
40
|
+
|
|
41
|
+
if (allowAiTraining === false) {
|
|
42
|
+
blocks += AI_TRAINING_BOTS.map(bot =>
|
|
43
|
+
`\n\nUser-agent: ${bot}\nDisallow: /`
|
|
44
|
+
).join('');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (allowAiReading === false) {
|
|
48
|
+
blocks += AI_READING_BOTS.map(bot =>
|
|
49
|
+
`\n\nUser-agent: ${bot}\nDisallow: /`
|
|
50
|
+
).join('');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return blocks;
|
|
54
|
+
}
|
|
55
|
+
|
|
3
56
|
/**
|
|
4
57
|
* Check if hostname matches allowed hosts pattern
|
|
5
58
|
*
|
|
@@ -74,10 +127,15 @@ export function generateRobotsTxt(url, config = {}) {
|
|
|
74
127
|
});
|
|
75
128
|
}
|
|
76
129
|
|
|
77
|
-
//
|
|
130
|
+
// Add sitemap reference
|
|
78
131
|
if (url.origin) {
|
|
79
132
|
content += `\nSitemap: ${url.origin}/sitemap.xml`;
|
|
80
133
|
}
|
|
81
134
|
|
|
135
|
+
// Add AI bot blocks (defaults to allowing both if not specified)
|
|
136
|
+
const allowAiTraining = config.allowAiTraining !== false;
|
|
137
|
+
const allowAiReading = config.allowAiReading !== false;
|
|
138
|
+
content += generateAiBotBlocks(allowAiTraining, allowAiReading);
|
|
139
|
+
|
|
82
140
|
return content;
|
|
83
141
|
}
|
|
@@ -10,4 +10,14 @@ export type RobotsConfig = {
|
|
|
10
10
|
* Paths to block from indexing (e.g., '/admin', '/api/*')
|
|
11
11
|
*/
|
|
12
12
|
disallowedPaths?: string[] | undefined;
|
|
13
|
+
/**
|
|
14
|
+
* Allow AI training bots to crawl content for model training.
|
|
15
|
+
* Set to false to block bots like GPTBot, Google-Extended, CCBot, etc.
|
|
16
|
+
*/
|
|
17
|
+
allowAiTraining?: boolean | undefined;
|
|
18
|
+
/**
|
|
19
|
+
* Allow AI assistants/chatbots to read content for user responses.
|
|
20
|
+
* Set to false to block bots like ChatGPT-User, Claude-Web, etc.
|
|
21
|
+
*/
|
|
22
|
+
allowAiReading?: boolean | undefined;
|
|
13
23
|
};
|
|
@@ -5,6 +5,12 @@
|
|
|
5
5
|
* Supports wildcards (e.g., '*.example.com')
|
|
6
6
|
* @property {string[]} [disallowedPaths]
|
|
7
7
|
* Paths to block from indexing (e.g., '/admin', '/api/*')
|
|
8
|
+
* @property {boolean} [allowAiTraining=true]
|
|
9
|
+
* Allow AI training bots to crawl content for model training.
|
|
10
|
+
* Set to false to block bots like GPTBot, Google-Extended, CCBot, etc.
|
|
11
|
+
* @property {boolean} [allowAiReading=true]
|
|
12
|
+
* Allow AI assistants/chatbots to read content for user responses.
|
|
13
|
+
* Set to false to block bots like ChatGPT-User, Claude-Web, etc.
|
|
8
14
|
*/
|
|
9
15
|
|
|
10
16
|
export default {};
|