s3db.js 18.0.11-next.1534f717 → 18.0.11-next.e8e71b5b

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/dist/clients/recker-http-handler.js +56 -8
  2. package/dist/clients/recker-http-handler.js.map +1 -1
  3. package/dist/concerns/high-performance-inserter.js +6 -34
  4. package/dist/concerns/high-performance-inserter.js.map +1 -1
  5. package/dist/concerns/id/alphabets.js +150 -0
  6. package/dist/concerns/id/alphabets.js.map +1 -0
  7. package/dist/concerns/id/entropy.js +243 -0
  8. package/dist/concerns/id/entropy.js.map +1 -0
  9. package/dist/concerns/id/generators/nanoid.js +74 -0
  10. package/dist/concerns/id/generators/nanoid.js.map +1 -0
  11. package/dist/concerns/id/generators/sid.js +73 -0
  12. package/dist/concerns/id/generators/sid.js.map +1 -0
  13. package/dist/concerns/id/generators/ulid.js +208 -0
  14. package/dist/concerns/id/generators/ulid.js.map +1 -0
  15. package/dist/concerns/id/generators/uuid-v7.js +150 -0
  16. package/dist/concerns/id/generators/uuid-v7.js.map +1 -0
  17. package/dist/concerns/id/index.js +74 -0
  18. package/dist/concerns/id/index.js.map +1 -0
  19. package/dist/concerns/plugin-storage.js +114 -0
  20. package/dist/concerns/plugin-storage.js.map +1 -1
  21. package/dist/concerns/s3-errors.js +72 -0
  22. package/dist/concerns/s3-errors.js.map +1 -0
  23. package/dist/concerns/s3-key.js +54 -0
  24. package/dist/concerns/s3-key.js.map +1 -0
  25. package/dist/concerns/safe-merge.js +47 -0
  26. package/dist/concerns/safe-merge.js.map +1 -0
  27. package/dist/core/resource-config-validator.js +12 -2
  28. package/dist/core/resource-config-validator.js.map +1 -1
  29. package/dist/core/resource-partitions.class.js +12 -1
  30. package/dist/core/resource-partitions.class.js.map +1 -1
  31. package/dist/core/resource-persistence.class.js +41 -12
  32. package/dist/core/resource-persistence.class.js.map +1 -1
  33. package/dist/core/resource-query.class.js +21 -47
  34. package/dist/core/resource-query.class.js.map +1 -1
  35. package/dist/database/database-connection.class.js +3 -6
  36. package/dist/database/database-connection.class.js.map +1 -1
  37. package/dist/database/database-plugins.class.js +7 -13
  38. package/dist/database/database-plugins.class.js.map +1 -1
  39. package/dist/plugins/concerns/s3-mutex.class.js +155 -0
  40. package/dist/plugins/concerns/s3-mutex.class.js.map +1 -0
  41. package/dist/plugins/eventual-consistency/consolidation.js +4 -7
  42. package/dist/plugins/eventual-consistency/consolidation.js.map +1 -1
  43. package/dist/plugins/eventual-consistency/garbage-collection.js +3 -6
  44. package/dist/plugins/eventual-consistency/garbage-collection.js.map +1 -1
  45. package/dist/plugins/queue-consumer.plugin.js +10 -16
  46. package/dist/plugins/queue-consumer.plugin.js.map +1 -1
  47. package/dist/plugins/recon/managers/scheduler-manager.js +3 -5
  48. package/dist/plugins/recon/managers/scheduler-manager.js.map +1 -1
  49. package/dist/plugins/recon/stages/recker-asn-stage.js +279 -0
  50. package/dist/plugins/recon/stages/recker-asn-stage.js.map +1 -0
  51. package/dist/plugins/recon/stages/recker-dns-stage.js +227 -0
  52. package/dist/plugins/recon/stages/recker-dns-stage.js.map +1 -0
  53. package/dist/plugins/recon/stages/recker-scrape-stage.js +369 -0
  54. package/dist/plugins/recon/stages/recker-scrape-stage.js.map +1 -0
  55. package/dist/plugins/replicator.plugin.js +13 -31
  56. package/dist/plugins/replicator.plugin.js.map +1 -1
  57. package/dist/plugins/replicators/base-replicator.class.js +10 -23
  58. package/dist/plugins/replicators/base-replicator.class.js.map +1 -1
  59. package/dist/plugins/spider/recker-link-discoverer.js +544 -0
  60. package/dist/plugins/spider/recker-link-discoverer.js.map +1 -0
  61. package/dist/plugins/spider/recker-llms-validator.js +334 -0
  62. package/dist/plugins/spider/recker-llms-validator.js.map +1 -0
  63. package/dist/plugins/spider/recker-robots-validator.js +336 -0
  64. package/dist/plugins/spider/recker-robots-validator.js.map +1 -0
  65. package/dist/plugins/spider/recker-security-adapter.js +325 -0
  66. package/dist/plugins/spider/recker-security-adapter.js.map +1 -0
  67. package/dist/plugins/spider/recker-seo-adapter.js +399 -0
  68. package/dist/plugins/spider/recker-seo-adapter.js.map +1 -0
  69. package/dist/plugins/spider/recker-sitemap-validator.js +406 -0
  70. package/dist/plugins/spider/recker-sitemap-validator.js.map +1 -0
  71. package/dist/resource.class.js +2 -0
  72. package/dist/resource.class.js.map +1 -1
  73. package/dist/s3db.cjs +444 -219
  74. package/dist/s3db.cjs.map +1 -1
  75. package/dist/s3db.es.js +445 -220
  76. package/dist/s3db.es.js.map +1 -1
  77. package/dist/stream/resource-reader.class.js +5 -7
  78. package/dist/stream/resource-reader.class.js.map +1 -1
  79. package/dist/stream/resource-writer.class.js +5 -7
  80. package/dist/stream/resource-writer.class.js.map +1 -1
  81. package/dist/tasks/tasks-pool.class.js +31 -0
  82. package/dist/tasks/tasks-pool.class.js.map +1 -1
  83. package/dist/types/clients/recker-http-handler.d.ts +1 -0
  84. package/dist/types/clients/recker-http-handler.d.ts.map +1 -1
  85. package/dist/types/clients/types.d.ts +14 -0
  86. package/dist/types/clients/types.d.ts.map +1 -1
  87. package/dist/types/concerns/high-performance-inserter.d.ts.map +1 -1
  88. package/dist/types/concerns/id/alphabets.d.ts +125 -0
  89. package/dist/types/concerns/id/alphabets.d.ts.map +1 -0
  90. package/dist/types/concerns/id/entropy.d.ts +84 -0
  91. package/dist/types/concerns/id/entropy.d.ts.map +1 -0
  92. package/dist/types/concerns/id/generators/nanoid.d.ts +46 -0
  93. package/dist/types/concerns/id/generators/nanoid.d.ts.map +1 -0
  94. package/dist/types/concerns/id/generators/sid.d.ts +45 -0
  95. package/dist/types/concerns/id/generators/sid.d.ts.map +1 -0
  96. package/dist/types/concerns/id/generators/ulid.d.ts +71 -0
  97. package/dist/types/concerns/id/generators/ulid.d.ts.map +1 -0
  98. package/dist/types/concerns/id/generators/uuid-v7.d.ts +60 -0
  99. package/dist/types/concerns/id/generators/uuid-v7.d.ts.map +1 -0
  100. package/dist/types/concerns/id/index.d.ts +51 -0
  101. package/dist/types/concerns/id/index.d.ts.map +1 -0
  102. package/dist/types/concerns/plugin-storage.d.ts +25 -0
  103. package/dist/types/concerns/plugin-storage.d.ts.map +1 -1
  104. package/dist/types/concerns/s3-errors.d.ts +20 -0
  105. package/dist/types/concerns/s3-errors.d.ts.map +1 -0
  106. package/dist/types/concerns/s3-key.d.ts +30 -0
  107. package/dist/types/concerns/s3-key.d.ts.map +1 -0
  108. package/dist/types/concerns/safe-merge.d.ts +22 -0
  109. package/dist/types/concerns/safe-merge.d.ts.map +1 -0
  110. package/dist/types/core/resource-config-validator.d.ts.map +1 -1
  111. package/dist/types/core/resource-partitions.class.d.ts.map +1 -1
  112. package/dist/types/core/resource-persistence.class.d.ts.map +1 -1
  113. package/dist/types/core/resource-query.class.d.ts.map +1 -1
  114. package/dist/types/database/database-connection.class.d.ts.map +1 -1
  115. package/dist/types/database/database-plugins.class.d.ts.map +1 -1
  116. package/dist/types/plugins/concerns/s3-mutex.class.d.ts +30 -0
  117. package/dist/types/plugins/concerns/s3-mutex.class.d.ts.map +1 -0
  118. package/dist/types/plugins/eventual-consistency/consolidation.d.ts.map +1 -1
  119. package/dist/types/plugins/eventual-consistency/garbage-collection.d.ts.map +1 -1
  120. package/dist/types/plugins/queue-consumer.plugin.d.ts.map +1 -1
  121. package/dist/types/plugins/recon/managers/scheduler-manager.d.ts.map +1 -1
  122. package/dist/types/plugins/recon/stages/recker-asn-stage.d.ts +90 -0
  123. package/dist/types/plugins/recon/stages/recker-asn-stage.d.ts.map +1 -0
  124. package/dist/types/plugins/recon/stages/recker-dns-stage.d.ts +125 -0
  125. package/dist/types/plugins/recon/stages/recker-dns-stage.d.ts.map +1 -0
  126. package/dist/types/plugins/recon/stages/recker-scrape-stage.d.ts +96 -0
  127. package/dist/types/plugins/recon/stages/recker-scrape-stage.d.ts.map +1 -0
  128. package/dist/types/plugins/replicator.plugin.d.ts.map +1 -1
  129. package/dist/types/plugins/replicators/base-replicator.class.d.ts.map +1 -1
  130. package/dist/types/plugins/spider/recker-link-discoverer.d.ts +54 -0
  131. package/dist/types/plugins/spider/recker-link-discoverer.d.ts.map +1 -0
  132. package/dist/types/plugins/spider/recker-llms-validator.d.ts +105 -0
  133. package/dist/types/plugins/spider/recker-llms-validator.d.ts.map +1 -0
  134. package/dist/types/plugins/spider/recker-robots-validator.d.ts +92 -0
  135. package/dist/types/plugins/spider/recker-robots-validator.d.ts.map +1 -0
  136. package/dist/types/plugins/spider/recker-security-adapter.d.ts +83 -0
  137. package/dist/types/plugins/spider/recker-security-adapter.d.ts.map +1 -0
  138. package/dist/types/plugins/spider/recker-seo-adapter.d.ts +187 -0
  139. package/dist/types/plugins/spider/recker-seo-adapter.d.ts.map +1 -0
  140. package/dist/types/plugins/spider/recker-sitemap-validator.d.ts +121 -0
  141. package/dist/types/plugins/spider/recker-sitemap-validator.d.ts.map +1 -0
  142. package/dist/types/resource.class.d.ts.map +1 -1
  143. package/dist/types/stream/resource-reader.class.d.ts.map +1 -1
  144. package/dist/types/stream/resource-writer.class.d.ts.map +1 -1
  145. package/dist/types/tasks/tasks-pool.class.d.ts +23 -0
  146. package/dist/types/tasks/tasks-pool.class.d.ts.map +1 -1
  147. package/mcp/prompts/index.ts +275 -0
  148. package/mcp/resources/index.ts +322 -0
  149. package/mcp/tools/plugins.ts +1137 -0
  150. package/mcp/tools/streams.ts +340 -0
  151. package/package.json +20 -22
  152. package/src/clients/recker-http-handler.ts +74 -8
  153. package/src/clients/types.ts +14 -0
  154. package/src/concerns/high-performance-inserter.ts +18 -57
  155. package/src/concerns/id/alphabets.ts +175 -0
  156. package/src/concerns/id/entropy.ts +286 -0
  157. package/src/concerns/id/generators/sid.ts +90 -0
  158. package/src/concerns/id/generators/ulid.ts +249 -0
  159. package/src/concerns/id/generators/uuid-v7.ts +179 -0
  160. package/src/concerns/id/index.ts +167 -0
  161. package/src/concerns/plugin-storage.ts +144 -0
  162. package/src/concerns/s3-errors.ts +97 -0
  163. package/src/concerns/s3-key.ts +62 -0
  164. package/src/concerns/safe-merge.ts +60 -0
  165. package/src/core/resource-config-validator.ts +9 -2
  166. package/src/core/resource-partitions.class.ts +14 -1
  167. package/src/core/resource-persistence.class.ts +47 -13
  168. package/src/core/resource-query.class.ts +21 -46
  169. package/src/database/database-connection.class.ts +7 -6
  170. package/src/database/database-plugins.class.ts +15 -13
  171. package/src/plugins/concerns/s3-mutex.class.ts +228 -0
  172. package/src/plugins/eventual-consistency/consolidation.ts +8 -7
  173. package/src/plugins/eventual-consistency/garbage-collection.ts +7 -6
  174. package/src/plugins/queue-consumer.plugin.ts +21 -19
  175. package/src/plugins/recon/managers/scheduler-manager.ts +7 -5
  176. package/src/plugins/recon/stages/recker-asn-stage.ts +385 -0
  177. package/src/plugins/recon/stages/recker-dns-stage.ts +360 -0
  178. package/src/plugins/recon/stages/recker-scrape-stage.ts +509 -0
  179. package/src/plugins/replicator.plugin.ts +41 -35
  180. package/src/plugins/replicators/base-replicator.class.ts +17 -23
  181. package/src/plugins/spider/recker-link-discoverer.ts +645 -0
  182. package/src/plugins/spider/recker-llms-validator.ts +500 -0
  183. package/src/plugins/spider/recker-robots-validator.ts +473 -0
  184. package/src/plugins/spider/recker-security-adapter.ts +489 -0
  185. package/src/plugins/spider/recker-seo-adapter.ts +605 -0
  186. package/src/plugins/spider/recker-sitemap-validator.ts +621 -0
  187. package/src/resource.class.ts +2 -0
  188. package/src/stream/resource-reader.class.ts +10 -8
  189. package/src/stream/resource-writer.class.ts +10 -8
  190. package/src/tasks/tasks-pool.class.ts +46 -0
@@ -0,0 +1,334 @@
1
+ import { createHttpClient } from '#src/concerns/http-client.js';
2
+ export class ReckerLlmsTxtValidator {
3
+ config;
4
+ _context;
5
+ cache;
6
+ _httpClient;
7
+ reckerAvailable = null;
8
+ parseLlmsTxt = null;
9
+ validateLlmsTxt = null;
10
+ fetchAndValidateLlmsTxt = null;
11
+ generateLlmsTxtTemplate = null;
12
+ constructor(config = {}) {
13
+ this.config = {
14
+ userAgent: config.userAgent || 's3db-spider',
15
+ fetchTimeout: config.fetchTimeout || 10000,
16
+ cacheTimeout: config.cacheTimeout || 3600000,
17
+ context: config.context || null
18
+ };
19
+ this._context = this.config.context;
20
+ this.cache = new Map();
21
+ this._httpClient = null;
22
+ }
23
+ async _checkReckerAvailability() {
24
+ if (this.reckerAvailable !== null) {
25
+ return this.reckerAvailable;
26
+ }
27
+ try {
28
+ const llmsModule = await import('recker/seo/validators/llms-txt');
29
+ this.parseLlmsTxt = llmsModule.parseLlmsTxt;
30
+ this.validateLlmsTxt = llmsModule.validateLlmsTxt;
31
+ this.fetchAndValidateLlmsTxt = llmsModule.fetchAndValidateLlmsTxt;
32
+ this.generateLlmsTxtTemplate = llmsModule.generateLlmsTxtTemplate;
33
+ this.reckerAvailable = true;
34
+ return true;
35
+ }
36
+ catch {
37
+ this.reckerAvailable = false;
38
+ return false;
39
+ }
40
+ }
41
+ async _getHttpClient() {
42
+ if (!this._httpClient) {
43
+ const baseConfig = this._context
44
+ ? this._context.getHttpClientConfig('https://example.com')
45
+ : {
46
+ headers: {
47
+ 'User-Agent': this.config.userAgent
48
+ }
49
+ };
50
+ this._httpClient = await createHttpClient({
51
+ ...baseConfig,
52
+ timeout: this.config.fetchTimeout,
53
+ retry: {
54
+ maxAttempts: 2,
55
+ delay: 500,
56
+ backoff: 'exponential',
57
+ retryAfter: true,
58
+ retryOn: [429, 500, 502, 503, 504]
59
+ }
60
+ });
61
+ }
62
+ return this._httpClient;
63
+ }
64
+ async check(domain) {
65
+ const isReckerAvailable = await this._checkReckerAvailability();
66
+ if (!isReckerAvailable) {
67
+ return this._fallbackCheck(domain);
68
+ }
69
+ const normalizedDomain = domain.replace(/\/$/, '');
70
+ const llmsUrl = `${normalizedDomain}/llms.txt`;
71
+ const cached = this.cache.get(normalizedDomain);
72
+ if (cached && Date.now() - cached.timestamp < this.config.cacheTimeout) {
73
+ return cached.result;
74
+ }
75
+ try {
76
+ const fetchResult = await this.fetchAndValidateLlmsTxt(llmsUrl, async (url) => {
77
+ const client = await this._getHttpClient();
78
+ const response = await client.get(url);
79
+ if (this._context) {
80
+ this._context.processResponse(response, url);
81
+ }
82
+ return {
83
+ status: response.ok ? 200 : response.status,
84
+ text: response.ok ? await response.text() : ''
85
+ };
86
+ });
87
+ const result = {
88
+ exists: fetchResult.exists,
89
+ valid: fetchResult.valid,
90
+ status: fetchResult.status,
91
+ fullVersionExists: fetchResult.fullVersionExists,
92
+ siteName: fetchResult.parseResult.siteName,
93
+ siteDescription: fetchResult.parseResult.siteDescription,
94
+ sections: fetchResult.parseResult.sections,
95
+ links: fetchResult.parseResult.links,
96
+ issues: fetchResult.issues,
97
+ errors: fetchResult.parseResult.errors,
98
+ warnings: fetchResult.parseResult.warnings,
99
+ size: fetchResult.parseResult.size
100
+ };
101
+ this.cache.set(normalizedDomain, { result, timestamp: Date.now() });
102
+ return result;
103
+ }
104
+ catch (error) {
105
+ return {
106
+ exists: false,
107
+ valid: false,
108
+ sections: [],
109
+ links: [],
110
+ issues: [{
111
+ type: 'error',
112
+ code: 'FETCH_ERROR',
113
+ message: `Failed to fetch llms.txt: ${error.message}`
114
+ }],
115
+ errors: [error.message],
116
+ warnings: []
117
+ };
118
+ }
119
+ }
120
+ async _fallbackCheck(domain) {
121
+ const normalizedDomain = domain.replace(/\/$/, '');
122
+ const llmsUrl = `${normalizedDomain}/llms.txt`;
123
+ try {
124
+ const client = await this._getHttpClient();
125
+ const response = await client.get(llmsUrl);
126
+ if (!response.ok) {
127
+ return {
128
+ exists: false,
129
+ valid: false,
130
+ status: response.status,
131
+ sections: [],
132
+ links: [],
133
+ issues: [{
134
+ type: 'info',
135
+ code: 'NOT_FOUND',
136
+ message: 'llms.txt file not found',
137
+ recommendation: 'Consider adding an llms.txt file for AI SEO'
138
+ }],
139
+ errors: [],
140
+ warnings: []
141
+ };
142
+ }
143
+ const content = await response.text();
144
+ const parsed = this._simpleParse(content);
145
+ return {
146
+ exists: true,
147
+ valid: parsed.sections.length > 0 || !!parsed.siteName,
148
+ status: 200,
149
+ siteName: parsed.siteName,
150
+ siteDescription: parsed.siteDescription,
151
+ sections: parsed.sections,
152
+ links: parsed.links,
153
+ issues: [],
154
+ errors: [],
155
+ warnings: ['Recker not available - using basic parsing'],
156
+ size: content.length
157
+ };
158
+ }
159
+ catch (error) {
160
+ return {
161
+ exists: false,
162
+ valid: false,
163
+ sections: [],
164
+ links: [],
165
+ issues: [],
166
+ errors: [error.message],
167
+ warnings: []
168
+ };
169
+ }
170
+ }
171
+ _simpleParse(content) {
172
+ const lines = content.split(/\r?\n/);
173
+ const sections = [];
174
+ const links = [];
175
+ let siteName;
176
+ let siteDescription;
177
+ let currentSection = null;
178
+ for (const line of lines) {
179
+ const trimmed = line.trim();
180
+ if (!trimmed || trimmed.startsWith('#'))
181
+ continue;
182
+ if (trimmed.startsWith('# ') && !siteName) {
183
+ siteName = trimmed.slice(2).trim();
184
+ continue;
185
+ }
186
+ if (trimmed.startsWith('>') && !siteDescription) {
187
+ siteDescription = trimmed.slice(1).trim();
188
+ continue;
189
+ }
190
+ if (trimmed.startsWith('## ')) {
191
+ if (currentSection) {
192
+ sections.push(currentSection);
193
+ }
194
+ currentSection = {
195
+ title: trimmed.slice(3).trim(),
196
+ content: '',
197
+ links: []
198
+ };
199
+ continue;
200
+ }
201
+ const linkMatch = trimmed.match(/^\[([^\]]+)\]\(([^)]+)\)(?:\s*-\s*(.+))?$/);
202
+ if (linkMatch) {
203
+ const link = {
204
+ text: linkMatch[1],
205
+ url: linkMatch[2],
206
+ description: linkMatch[3]?.trim(),
207
+ section: currentSection?.title
208
+ };
209
+ links.push(link);
210
+ if (currentSection) {
211
+ currentSection.links.push(link);
212
+ }
213
+ continue;
214
+ }
215
+ if (currentSection && trimmed) {
216
+ currentSection.content += (currentSection.content ? '\n' : '') + trimmed;
217
+ }
218
+ }
219
+ if (currentSection) {
220
+ sections.push(currentSection);
221
+ }
222
+ return { siteName, siteDescription, sections, links };
223
+ }
224
+ async validate(domain) {
225
+ const isReckerAvailable = await this._checkReckerAvailability();
226
+ if (!isReckerAvailable) {
227
+ return null;
228
+ }
229
+ const checkResult = await this.check(domain);
230
+ if (!checkResult.exists) {
231
+ return null;
232
+ }
233
+ const normalizedDomain = domain.replace(/\/$/, '');
234
+ const llmsUrl = `${normalizedDomain}/llms.txt`;
235
+ try {
236
+ const client = await this._getHttpClient();
237
+ const response = await client.get(llmsUrl);
238
+ const content = await response.text();
239
+ return this.validateLlmsTxt(content, normalizedDomain);
240
+ }
241
+ catch {
242
+ return null;
243
+ }
244
+ }
245
+ validateContent(content, baseUrl) {
246
+ if (!this.reckerAvailable || !this.validateLlmsTxt) {
247
+ return null;
248
+ }
249
+ return this.validateLlmsTxt(content, baseUrl);
250
+ }
251
+ parseContent(content) {
252
+ if (!this.reckerAvailable || !this.parseLlmsTxt) {
253
+ return null;
254
+ }
255
+ return this.parseLlmsTxt(content);
256
+ }
257
+ generateTemplate(options) {
258
+ if (!this.reckerAvailable || !this.generateLlmsTxtTemplate) {
259
+ return this._fallbackGenerateTemplate(options);
260
+ }
261
+ return this.generateLlmsTxtTemplate(options);
262
+ }
263
+ _fallbackGenerateTemplate(options) {
264
+ const lines = [];
265
+ lines.push(`# ${options.siteName}`);
266
+ lines.push('');
267
+ lines.push(`> ${options.siteDescription}`);
268
+ lines.push('');
269
+ if (options.sections) {
270
+ for (const section of options.sections) {
271
+ lines.push(`## ${section.title}`);
272
+ lines.push('');
273
+ for (const link of section.links) {
274
+ if (link.description) {
275
+ lines.push(`[${link.text}](${link.url}) - ${link.description}`);
276
+ }
277
+ else {
278
+ lines.push(`[${link.text}](${link.url})`);
279
+ }
280
+ }
281
+ lines.push('');
282
+ }
283
+ }
284
+ return lines.join('\n');
285
+ }
286
+ async checkFullVersion(domain) {
287
+ const normalizedDomain = domain.replace(/\/$/, '');
288
+ const llmsFullUrl = `${normalizedDomain}/llms-full.txt`;
289
+ try {
290
+ const client = await this._getHttpClient();
291
+ const response = await client.get(llmsFullUrl);
292
+ if (!response.ok) {
293
+ return { exists: false, status: response.status };
294
+ }
295
+ const content = await response.text();
296
+ return {
297
+ exists: true,
298
+ status: 200,
299
+ size: content.length
300
+ };
301
+ }
302
+ catch {
303
+ return { exists: false };
304
+ }
305
+ }
306
+ async getLinks(domain) {
307
+ const result = await this.check(domain);
308
+ return result.links;
309
+ }
310
+ async getSections(domain) {
311
+ const result = await this.check(domain);
312
+ return result.sections;
313
+ }
314
+ clearCache(domain) {
315
+ if (domain) {
316
+ const normalizedDomain = domain.replace(/\/$/, '');
317
+ this.cache.delete(normalizedDomain);
318
+ }
319
+ else {
320
+ this.cache.clear();
321
+ }
322
+ }
323
+ getCacheStats() {
324
+ return {
325
+ size: this.cache.size,
326
+ domains: [...this.cache.keys()]
327
+ };
328
+ }
329
+ isReckerEnabled() {
330
+ return this.reckerAvailable === true;
331
+ }
332
+ }
333
+ export default ReckerLlmsTxtValidator;
334
+ //# sourceMappingURL=recker-llms-validator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"recker-llms-validator.js","sourceRoot":"","sources":["../../../src/plugins/spider/recker-llms-validator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAqGhE,MAAM,OAAO,sBAAsB;IACzB,MAAM,CAEZ;IACM,QAAQ,CAAsB;IAC9B,KAAK,CAA0B;IAC/B,WAAW,CAAoB;IAE/B,eAAe,GAAmB,IAAI,CAAC;IACvC,YAAY,GAA8B,IAAI,CAAC;IAC/C,eAAe,GAAiC,IAAI,CAAC;IACrD,uBAAuB,GAAyC,IAAI,CAAC;IACrE,uBAAuB,GAAyC,IAAI,CAAC;IAE7E,YAAY,SAAiC,EAAE;QAC7C,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,aAAa;YAC5C,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,KAAK;YAC1C,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,OAAO;YAC5C,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,IAAI;SAChC,CAAC;QAEF,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC;QACpC,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAEO,KAAK,CAAC,wBAAwB;QACpC,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI,EAAE,CAAC;YAClC,OAAO,IAAI,CAAC,eAAe,CAAC;QAC9B,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,gCAAgC,CAAC,CAAC;YAClE,IAAI,CAAC,YAAY,GAAG,UAAU,CAAC,YAAY,CAAC;YAC5C,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC,eAAe,CAAC;YAClD,IAAI,CAAC,uBAAuB,GAAG,UAAU,CAAC,uBAAuB,CAAC;YAClE,IAAI,CAAC,uBAAuB,GAAG,UAAU,CAAC,uBAAuB,CAAC;YAClE,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,cAAc;QAC1B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ;gBAC9B,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,mBAAmB,CAAC,qBAAqB,CAAC;gBAC1D,CAAC,CAAC;oBACE,OAAO,EAAE;wBACP,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;qBACpC;iBACF,CAAC;YAEN,IAAI,CAAC,WAAW,GAAG,MAAM,gBAAgB,CAAC;gBACxC,GAAG,UAAU;gBACb,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY;gBACjC,KAAK,EAAE;oBACL,WAAW,EAAE,CAAC;oBACd,KAAK,EAAE,GAAG;oBACV,OAAO,EAAE,aAAa;oBACtB,UAAU,EAAE,IAAI;oBAChB,OAAO,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;iBACnC;aACF,CAA0B,CAAC;QAC9B,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,MAAc;QACxB,MAAM,iBAAiB,GAAG,MAAM,IAAI,CAAC,wBAAwB,EAAE,CAAC;QAEhE,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;QACrC,CAAC;QAED,MAAM,gBAAgB,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,GAAG,gBAAgB,WAAW,CAAC;QAE/C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QAChD,IAAI,MAAM,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;YACvE,OAAO,MAAM,CAAC,MAAM,CAAC;QACvB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,uBAAwB,CAAC,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE;gBAC7E,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;gBAC3C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAEvC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAClB,IAAI,CAAC,QAAQ,CAAC,eAAe,CAC3B,QAA0E,EAC1E,GAAG,CACJ,CAAC;gBACJ,CAAC;gBAED,OAAO;oBACL,MAAM,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM;oBAC3C,IAAI,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE;iBAC/C,CAAC;YACJ,CAAC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAuB;gBACjC,MAAM,EAAE,WAAW,CAAC,MAAM;gBAC1B,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,MAAM,EAAE,WAAW,CAAC,MAAM;gBAC1B,iBAAiB,EAAE,WAAW,CAAC,iBAAiB;gBAChD,QAAQ,EAAE,WAAW,CAAC,WAAW,CAAC,QAAQ;gBAC1C,eAAe,EAAE,WAAW,CAAC,WAAW,CAAC,eAAe;gBACxD,QAAQ,EAAE,WAAW,CAAC,WAAW,CAAC,QAAQ;gBAC1C,KAAK,EAAE,WAAW,CAAC,WAAW,CAAC,KAAK;gBACpC,MAAM,EAAE,WAAW,CAAC,MAAM;gBAC1B,MAAM,EAAE,WAAW,CAAC,WAAW,CAAC,MAAM;gBACtC,QAAQ,EAAE,WAAW,CAAC,WAAW,CAAC,QAAQ;gBAC1C,IAAI,EAAE,WAAW,CAAC,WAAW,CAAC,IAAI;aACnC,CAAC;YAEF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,gBAAgB,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YAEpE,OAAO,MAAM,CAAC;QAEhB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,KAAK;gBACZ,QAAQ,EAAE,EAAE;gBACZ,KAAK,EAAE,EAAE;gBACT,MAAM,EAAE,CAAC;wBACP,IAAI,EAAE,OAAO;wBACb,IAAI,EAAE,aAAa;wBACnB,OAAO,EAAE,6BAA8B,KAAe,CAAC,OAAO,EAAE;qBACjE,CAAC;gBACF,MAAM,EAAE,CAAE,KAAe,CAAC,OAAO,CAAC;gBAClC,QAAQ,EAAE,EAAE;aACb,CAAC;QACJ,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,cAAc,CAAC,MAAc;QACzC,MAAM,gBAAgB,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,GAAG,gBAAgB,WAAW,CAAC;QAE/C,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;YAC3C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YAE3C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,OAAO;oBACL,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,KAAK;oBACZ,MAAM,EAAE,QAAQ,CAAC,MAAM;oBACvB,QAAQ,EAAE,EAAE;oBACZ,KAAK,EAAE,EAAE;oBACT,MAAM,EAAE,CAAC;4BACP,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,WAAW;4BACjB,OAAO,EAAE,yBAAyB;4BAClC,cAAc,EAAE,6CAA6C;yBAC9D,CAAC;oBACF,MAAM,EAAE,EAAE;oBACV,QAAQ,EAAE,EAAE;iBACb,CAAC;YACJ,CAAC;YAED,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;YAE1C,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,QAAQ;gBACtD,MAAM,EAAE,GAAG;gBACX,QAAQ,EAAE,MAAM,CAAC,QAAQ;gBACzB,eAAe,EAAE,MAAM,CAAC,eAAe;gBACvC,QAAQ,EAAE,MAAM,CAAC,QAAQ;gBACzB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,MAAM,EAAE,EAAE;gBACV,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,CAAC,4CAA4C,CAAC;gBACxD,IAAI,EAAE,OAAO,CAAC,MAAM;aACrB,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,KAAK;gBACZ,QAAQ,EAAE,EAAE;gBACZ,KAAK,EAAE,EAAE;gBACT,MAAM,EAAE,EAAE;gBACV,MAAM,EAAE,CAAE,KAAe,CAAC,OAAO,CAAC;gBAClC,QAAQ,EAAE,EAAE;aACb,CAAC;QACJ,CAAC;IACH,CAAC;IAEO,YAAY,CAAC,OAAe;QAMlC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,QAAQ,GAAqB,EAAE,CAAC;QACtC,MAAM,KAAK,GAAkB,EAAE,CAAC;QAChC,IAAI,QAA4B,CAAC;QACjC,IAAI,eAAmC,CAAC;QACxC,IAAI,cAAc,GAA0B,IAAI,CAAC;QAEjD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAE5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,SAAS;YAElD,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAC1C,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBACnC,SAAS;YACX,CAAC;YAED,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;gBAChD,eAAe,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1C,SAAS;YACX,CAAC;YAED,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,IAAI,cAAc,EAAE,CAAC;oBACnB,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAChC,CAAC;gBACD,cAAc,GAAG;oBACf,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE;oBAC9B,OAAO,EAAE,EAAE;oBACX,KAAK,EAAE,EAAE;iBACV,CAAC;gBACF,SAAS;YACX,CAAC;YAED,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAC7E,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,IAAI,GAAgB;oBACxB,IAAI,EAAE,SAAS,CAAC,CAAC,CAAE;oBACnB,GAAG,EAAE,SAAS,CAAC,CAAC,CAAE;oBAClB,WAAW,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE;oBACjC,OAAO,EAAE,cAAc,EAAE,KAAK;iBAC/B,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACjB,IAAI,cAAc,EAAE,CAAC;oBACnB,cAAc,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAClC,CAAC;gBACD,SAAS;YACX,CAAC;YAED,IAAI,cAAc,IAAI,OAAO,EAAE,CAAC;gBAC9B,cAAc,CAAC,OAAO,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC;YAC3E,CAAC;QACH,CAAC;QAED,IAAI,cAAc,EAAE,CAAC;YACnB,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAChC,CAAC;QAED,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;IACxD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,MAAc;QAC3B,MAAM,iBAAiB,GAAG,MAAM,IAAI,CAAC,wBAAwB,EAAE,CAAC;QAEhE,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAE7C,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;YACxB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,gBAAgB,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,GAAG,gBAAgB,WAAW,CAAC;QAE/C,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;YAC3C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YAC3C,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEtC,OAAO,IAAI,CAAC,eAAgB,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QAC1D,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,eAAe,CAAC,OAAe,EAAE,OAAgB;QAC/C,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YACnD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAChD,CAAC;IAED,YAAY,CAAC,OAAe;QAC1B,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YAChD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC;IAED,gBAAgB,CAAC,OAA+B;QAC9C,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,uBAAuB,EAAE,CAAC;YAC3D,OAAO,IAAI,CAAC,yBAAyB,CAAC,OAAO,CAAC,CAAC;QACjD,CAAC;QAED,OAAO,IAAI,CAAC,uBAAuB,CAAC,OAAO,CAAC,CAAC;IAC/C,CAAC;IAEO,yBAAyB,CAAC,OAA+B;QAC/D,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;QACpC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC;QAC3C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACrB,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;gBAClC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBACjC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;wBACrB,KAAK,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,GAAG,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;oBAClE,CAAC;yBAAM,CAAC;wBACN,KAAK,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC;oBAC5C,CAAC;gBACH,CAAC;gBACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACjB,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,MAAc;QAKnC,MAAM,gBAAgB,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACnD,MAAM,WAAW,GAAG,GAAG,gBAAgB,gBAAgB,CAAC;QAExD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;YAC3C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YAE/C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;YACpD,CAAC;YAED,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACtC,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,MAAM,EAAE,GAAG;gBACX,IAAI,EAAE,OAAO,CAAC,MAAM;aACrB,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,MAAc;QAC3B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACxC,OAAO,MAAM,CAAC,KAAK,CAAC;IACtB,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,MAAc;QAC9B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACxC,OAAO,MAAM,CAAC,QAAQ,CAAC;IACzB,CAAC;IAED,UAAU,CAAC,MAAe;QACxB,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,gBAAgB,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACnD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAED,aAAa;QACX,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI;YACrB,OAAO,EAAE,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SAChC,CAAC;IACJ,CAAC;IAED,eAAe;QACb,OAAO,IAAI,CAAC,eAAe,KAAK,IAAI,CAAC;IACvC,CAAC;CACF;AAED,eAAe,sBAAsB,CAAC"}
@@ -0,0 +1,336 @@
1
+ import { createHttpClient } from '#src/concerns/http-client.js';
2
+ export class ReckerRobotsValidator {
3
+ config;
4
+ _context;
5
+ cache;
6
+ fetcher;
7
+ _httpClient;
8
+ reckerAvailable = null;
9
+ parseRobotsTxt = null;
10
+ validateRobotsTxt = null;
11
+ isPathAllowed = null;
12
+ fetchAndValidateRobotsTxt = null;
13
+ fallbackParser = null;
14
+ constructor(config = {}) {
15
+ this.config = {
16
+ userAgent: config.userAgent || 's3db-spider',
17
+ defaultAllow: config.defaultAllow !== false,
18
+ cacheTimeout: config.cacheTimeout || 3600000,
19
+ fetchTimeout: config.fetchTimeout || 10000,
20
+ context: config.context || null,
21
+ fetcher: config.fetcher || null
22
+ };
23
+ this._context = this.config.context;
24
+ this.cache = new Map();
25
+ this.fetcher = this.config.fetcher;
26
+ this._httpClient = null;
27
+ }
28
+ async _checkReckerAvailability() {
29
+ if (this.reckerAvailable !== null) {
30
+ return this.reckerAvailable;
31
+ }
32
+ try {
33
+ const robotsModule = await import('recker/seo/validators/robots');
34
+ this.parseRobotsTxt = robotsModule.parseRobotsTxt;
35
+ this.validateRobotsTxt = robotsModule.validateRobotsTxt;
36
+ this.isPathAllowed = robotsModule.isPathAllowed;
37
+ this.fetchAndValidateRobotsTxt = robotsModule.fetchAndValidateRobotsTxt;
38
+ this.reckerAvailable = true;
39
+ return true;
40
+ }
41
+ catch {
42
+ this.reckerAvailable = false;
43
+ return false;
44
+ }
45
+ }
46
+ async _getFallbackParser() {
47
+ if (!this.fallbackParser) {
48
+ const { RobotsParser } = await import('./robots-parser.js');
49
+ this.fallbackParser = new RobotsParser(this.config);
50
+ }
51
+ return this.fallbackParser;
52
+ }
53
+ setFetcher(fetcher) {
54
+ this.fetcher = fetcher;
55
+ if (this.fallbackParser) {
56
+ this.fallbackParser.setFetcher(fetcher);
57
+ }
58
+ }
59
+ async isAllowed(url) {
60
+ const isReckerAvailable = await this._checkReckerAvailability();
61
+ if (!isReckerAvailable) {
62
+ const fallback = await this._getFallbackParser();
63
+ return fallback.isAllowed(url);
64
+ }
65
+ try {
66
+ const urlObj = new URL(url);
67
+ const domain = `${urlObj.protocol}//${urlObj.host}`;
68
+ const path = urlObj.pathname + urlObj.search;
69
+ const cached = await this._getCachedOrFetch(domain);
70
+ if (!cached.parseResult) {
71
+ return { allowed: this.config.defaultAllow, source: 'no-robots-txt' };
72
+ }
73
+ const allowed = this.isPathAllowed(cached.parseResult, path, this.config.userAgent);
74
+ const crawlDelay = this._getCrawlDelayFromParseResult(cached.parseResult);
75
+ const matchedRule = this._findMatchedRule(cached.parseResult, path);
76
+ return {
77
+ allowed,
78
+ crawlDelay,
79
+ source: 'robots-txt',
80
+ matchedRule
81
+ };
82
+ }
83
+ catch (error) {
84
+ return {
85
+ allowed: this.config.defaultAllow,
86
+ source: 'error',
87
+ error: error.message
88
+ };
89
+ }
90
+ }
91
+ async _getCachedOrFetch(domain) {
92
+ const cached = this.cache.get(domain);
93
+ if (cached && Date.now() - cached.timestamp < this.config.cacheTimeout) {
94
+ return cached;
95
+ }
96
+ const robotsUrl = `${domain}/robots.txt`;
97
+ let content = null;
98
+ try {
99
+ if (this.fetcher) {
100
+ content = await this.fetcher(robotsUrl);
101
+ }
102
+ else {
103
+ content = await this._fetchRobotsTxt(robotsUrl);
104
+ }
105
+ }
106
+ catch {
107
+ const entry = {
108
+ parseResult: null,
109
+ validationResult: null,
110
+ timestamp: Date.now()
111
+ };
112
+ this.cache.set(domain, entry);
113
+ return entry;
114
+ }
115
+ const parseResult = this.parseRobotsTxt(content);
116
+ const validationResult = this.validateRobotsTxt(content, domain);
117
+ const entry = {
118
+ parseResult,
119
+ validationResult,
120
+ timestamp: Date.now()
121
+ };
122
+ this.cache.set(domain, entry);
123
+ return entry;
124
+ }
125
+ async _getHttpClient() {
126
+ if (!this._httpClient) {
127
+ const baseConfig = this._context
128
+ ? this._context.getHttpClientConfig('https://example.com')
129
+ : {
130
+ headers: {
131
+ 'User-Agent': this.config.userAgent
132
+ }
133
+ };
134
+ this._httpClient = await createHttpClient({
135
+ ...baseConfig,
136
+ timeout: this.config.fetchTimeout,
137
+ retry: {
138
+ maxAttempts: 2,
139
+ delay: 500,
140
+ backoff: 'exponential',
141
+ retryAfter: true,
142
+ retryOn: [429, 500, 502, 503, 504]
143
+ }
144
+ });
145
+ }
146
+ return this._httpClient;
147
+ }
148
+ async _fetchRobotsTxt(url) {
149
+ const client = await this._getHttpClient();
150
+ const response = await client.get(url);
151
+ if (this._context) {
152
+ this._context.processResponse(response, url);
153
+ }
154
+ if (!response.ok) {
155
+ throw new Error(`HTTP ${response.status}`);
156
+ }
157
+ return await response.text();
158
+ }
159
+ _getCrawlDelayFromParseResult(parseResult) {
160
+ const userAgentLower = this.config.userAgent.toLowerCase();
161
+ for (const block of parseResult.userAgentBlocks) {
162
+ const agents = block.userAgents.map(a => a.toLowerCase());
163
+ if (agents.includes(userAgentLower)) {
164
+ return block.crawlDelay ? block.crawlDelay * 1000 : null;
165
+ }
166
+ }
167
+ for (const block of parseResult.userAgentBlocks) {
168
+ const agents = block.userAgents.map(a => a.toLowerCase());
169
+ for (const agent of agents) {
170
+ if (agent !== '*' && (agent.includes(userAgentLower) || userAgentLower.includes(agent))) {
171
+ return block.crawlDelay ? block.crawlDelay * 1000 : null;
172
+ }
173
+ }
174
+ }
175
+ for (const block of parseResult.userAgentBlocks) {
176
+ if (block.userAgents.map(a => a.toLowerCase()).includes('*')) {
177
+ return block.crawlDelay ? block.crawlDelay * 1000 : null;
178
+ }
179
+ }
180
+ return null;
181
+ }
182
+ _findMatchedRule(parseResult, path) {
183
+ const userAgentLower = this.config.userAgent.toLowerCase();
184
+ let targetBlock = null;
185
+ for (const block of parseResult.userAgentBlocks) {
186
+ const agents = block.userAgents.map(a => a.toLowerCase());
187
+ if (agents.includes(userAgentLower)) {
188
+ targetBlock = block;
189
+ break;
190
+ }
191
+ }
192
+ if (!targetBlock) {
193
+ for (const block of parseResult.userAgentBlocks) {
194
+ const agents = block.userAgents.map(a => a.toLowerCase());
195
+ for (const agent of agents) {
196
+ if (agent !== '*' && (agent.includes(userAgentLower) || userAgentLower.includes(agent))) {
197
+ targetBlock = block;
198
+ break;
199
+ }
200
+ }
201
+ if (targetBlock)
202
+ break;
203
+ }
204
+ }
205
+ if (!targetBlock) {
206
+ for (const block of parseResult.userAgentBlocks) {
207
+ if (block.userAgents.map(a => a.toLowerCase()).includes('*')) {
208
+ targetBlock = block;
209
+ break;
210
+ }
211
+ }
212
+ }
213
+ if (!targetBlock)
214
+ return undefined;
215
+ const sortedRules = [...targetBlock.rules].sort((a, b) => {
216
+ const lenA = a.path.replace(/\*/g, '').length;
217
+ const lenB = b.path.replace(/\*/g, '').length;
218
+ return lenB - lenA;
219
+ });
220
+ for (const rule of sortedRules) {
221
+ if (this._pathMatches(path, rule.path)) {
222
+ return rule.path;
223
+ }
224
+ }
225
+ return undefined;
226
+ }
227
+ _pathMatches(path, pattern) {
228
+ let escaped = pattern.replace(/[.+?^{}()|[\]\\]/g, '\\$&');
229
+ escaped = escaped.replace(/\*/g, '.*');
230
+ if (escaped.endsWith('$')) {
231
+ escaped = escaped.slice(0, -1) + '$';
232
+ }
233
+ else {
234
+ escaped = escaped + '.*';
235
+ }
236
+ const regex = new RegExp(`^${escaped}$`, 'i');
237
+ return regex.test(path);
238
+ }
239
+ async getSitemaps(domain) {
240
+ const isReckerAvailable = await this._checkReckerAvailability();
241
+ if (!isReckerAvailable) {
242
+ const fallback = await this._getFallbackParser();
243
+ return fallback.getSitemaps(domain);
244
+ }
245
+ const cached = await this._getCachedOrFetch(domain);
246
+ return cached.parseResult?.sitemaps || [];
247
+ }
248
+ async getCrawlDelay(domain) {
249
+ const isReckerAvailable = await this._checkReckerAvailability();
250
+ if (!isReckerAvailable) {
251
+ const fallback = await this._getFallbackParser();
252
+ return fallback.getCrawlDelay(domain);
253
+ }
254
+ const cached = await this._getCachedOrFetch(domain);
255
+ if (!cached.parseResult)
256
+ return null;
257
+ return this._getCrawlDelayFromParseResult(cached.parseResult);
258
+ }
259
+ async preload(domain) {
260
+ await this._getCachedOrFetch(domain);
261
+ }
262
+ clearCache(domain) {
263
+ if (domain) {
264
+ this.cache.delete(domain);
265
+ }
266
+ else {
267
+ this.cache.clear();
268
+ }
269
+ }
270
+ getCacheStats() {
271
+ return {
272
+ size: this.cache.size,
273
+ domains: [...this.cache.keys()]
274
+ };
275
+ }
276
+ async validate(url) {
277
+ const isReckerAvailable = await this._checkReckerAvailability();
278
+ if (!isReckerAvailable) {
279
+ return null;
280
+ }
281
+ try {
282
+ const urlObj = new URL(url);
283
+ const domain = `${urlObj.protocol}//${urlObj.host}`;
284
+ const cached = await this._getCachedOrFetch(domain);
285
+ if (!cached.validationResult || !cached.parseResult) {
286
+ return null;
287
+ }
288
+ return {
289
+ valid: cached.validationResult.valid,
290
+ issues: cached.validationResult.issues,
291
+ blocksAllRobots: cached.parseResult.blocksAllRobots,
292
+ blocksImportantPaths: cached.parseResult.blocksImportantPaths,
293
+ host: cached.parseResult.host,
294
+ size: cached.parseResult.size
295
+ };
296
+ }
297
+ catch {
298
+ return null;
299
+ }
300
+ }
301
+ async validateContent(content, baseUrl) {
302
+ const isReckerAvailable = await this._checkReckerAvailability();
303
+ if (!isReckerAvailable || !this.validateRobotsTxt) {
304
+ return null;
305
+ }
306
+ return this.validateRobotsTxt(content, baseUrl);
307
+ }
308
+ parseContent(content) {
309
+ if (!this.reckerAvailable || !this.parseRobotsTxt) {
310
+ return null;
311
+ }
312
+ return this.parseRobotsTxt(content);
313
+ }
314
+ getBlockingStatus(domain) {
315
+ const cached = this.cache.get(domain);
316
+ if (!cached?.parseResult)
317
+ return null;
318
+ return {
319
+ blocksAllRobots: cached.parseResult.blocksAllRobots,
320
+ blocksImportantPaths: cached.parseResult.blocksImportantPaths
321
+ };
322
+ }
323
+ getHost(domain) {
324
+ const cached = this.cache.get(domain);
325
+ return cached?.parseResult?.host || null;
326
+ }
327
+ async getValidationIssues(domain) {
328
+ const cached = await this._getCachedOrFetch(domain);
329
+ return cached.validationResult?.issues || [];
330
+ }
331
+ isReckerEnabled() {
332
+ return this.reckerAvailable === true;
333
+ }
334
+ }
335
+ export default ReckerRobotsValidator;
336
+ //# sourceMappingURL=recker-robots-validator.js.map