s3db.js 18.0.11-next.1534f717 → 18.0.11-next.47047b5d
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/clients/recker-http-handler.js +56 -8
- package/dist/clients/recker-http-handler.js.map +1 -1
- package/dist/concerns/id/alphabets.js +150 -0
- package/dist/concerns/id/alphabets.js.map +1 -0
- package/dist/concerns/id/entropy.js +243 -0
- package/dist/concerns/id/entropy.js.map +1 -0
- package/dist/concerns/id/generators/nanoid.js +74 -0
- package/dist/concerns/id/generators/nanoid.js.map +1 -0
- package/dist/concerns/id/generators/sid.js +73 -0
- package/dist/concerns/id/generators/sid.js.map +1 -0
- package/dist/concerns/id/generators/ulid.js +208 -0
- package/dist/concerns/id/generators/ulid.js.map +1 -0
- package/dist/concerns/id/generators/uuid-v7.js +150 -0
- package/dist/concerns/id/generators/uuid-v7.js.map +1 -0
- package/dist/concerns/id/index.js +74 -0
- package/dist/concerns/id/index.js.map +1 -0
- package/dist/concerns/plugin-storage.js +114 -0
- package/dist/concerns/plugin-storage.js.map +1 -1
- package/dist/concerns/s3-errors.js +72 -0
- package/dist/concerns/s3-errors.js.map +1 -0
- package/dist/concerns/s3-key.js +54 -0
- package/dist/concerns/s3-key.js.map +1 -0
- package/dist/concerns/safe-merge.js +47 -0
- package/dist/concerns/safe-merge.js.map +1 -0
- package/dist/core/resource-config-validator.js +12 -2
- package/dist/core/resource-config-validator.js.map +1 -1
- package/dist/core/resource-partitions.class.js +12 -1
- package/dist/core/resource-partitions.class.js.map +1 -1
- package/dist/core/resource-persistence.class.js +41 -12
- package/dist/core/resource-persistence.class.js.map +1 -1
- package/dist/core/resource-query.class.js +21 -47
- package/dist/core/resource-query.class.js.map +1 -1
- package/dist/plugins/concerns/s3-mutex.class.js +155 -0
- package/dist/plugins/concerns/s3-mutex.class.js.map +1 -0
- package/dist/plugins/recon/stages/recker-asn-stage.js +279 -0
- package/dist/plugins/recon/stages/recker-asn-stage.js.map +1 -0
- package/dist/plugins/recon/stages/recker-dns-stage.js +227 -0
- package/dist/plugins/recon/stages/recker-dns-stage.js.map +1 -0
- package/dist/plugins/recon/stages/recker-scrape-stage.js +369 -0
- package/dist/plugins/recon/stages/recker-scrape-stage.js.map +1 -0
- package/dist/plugins/spider/recker-link-discoverer.js +544 -0
- package/dist/plugins/spider/recker-link-discoverer.js.map +1 -0
- package/dist/plugins/spider/recker-llms-validator.js +334 -0
- package/dist/plugins/spider/recker-llms-validator.js.map +1 -0
- package/dist/plugins/spider/recker-robots-validator.js +336 -0
- package/dist/plugins/spider/recker-robots-validator.js.map +1 -0
- package/dist/plugins/spider/recker-security-adapter.js +325 -0
- package/dist/plugins/spider/recker-security-adapter.js.map +1 -0
- package/dist/plugins/spider/recker-seo-adapter.js +399 -0
- package/dist/plugins/spider/recker-seo-adapter.js.map +1 -0
- package/dist/plugins/spider/recker-sitemap-validator.js +406 -0
- package/dist/plugins/spider/recker-sitemap-validator.js.map +1 -0
- package/dist/resource.class.js +2 -0
- package/dist/resource.class.js.map +1 -1
- package/dist/s3db.cjs +353 -71
- package/dist/s3db.cjs.map +1 -1
- package/dist/s3db.es.js +354 -72
- package/dist/s3db.es.js.map +1 -1
- package/dist/types/clients/recker-http-handler.d.ts +1 -0
- package/dist/types/clients/recker-http-handler.d.ts.map +1 -1
- package/dist/types/clients/types.d.ts +14 -0
- package/dist/types/clients/types.d.ts.map +1 -1
- package/dist/types/concerns/id/alphabets.d.ts +125 -0
- package/dist/types/concerns/id/alphabets.d.ts.map +1 -0
- package/dist/types/concerns/id/entropy.d.ts +84 -0
- package/dist/types/concerns/id/entropy.d.ts.map +1 -0
- package/dist/types/concerns/id/generators/nanoid.d.ts +46 -0
- package/dist/types/concerns/id/generators/nanoid.d.ts.map +1 -0
- package/dist/types/concerns/id/generators/sid.d.ts +45 -0
- package/dist/types/concerns/id/generators/sid.d.ts.map +1 -0
- package/dist/types/concerns/id/generators/ulid.d.ts +71 -0
- package/dist/types/concerns/id/generators/ulid.d.ts.map +1 -0
- package/dist/types/concerns/id/generators/uuid-v7.d.ts +60 -0
- package/dist/types/concerns/id/generators/uuid-v7.d.ts.map +1 -0
- package/dist/types/concerns/id/index.d.ts +51 -0
- package/dist/types/concerns/id/index.d.ts.map +1 -0
- package/dist/types/concerns/plugin-storage.d.ts +25 -0
- package/dist/types/concerns/plugin-storage.d.ts.map +1 -1
- package/dist/types/concerns/s3-errors.d.ts +20 -0
- package/dist/types/concerns/s3-errors.d.ts.map +1 -0
- package/dist/types/concerns/s3-key.d.ts +30 -0
- package/dist/types/concerns/s3-key.d.ts.map +1 -0
- package/dist/types/concerns/safe-merge.d.ts +22 -0
- package/dist/types/concerns/safe-merge.d.ts.map +1 -0
- package/dist/types/core/resource-config-validator.d.ts.map +1 -1
- package/dist/types/core/resource-partitions.class.d.ts.map +1 -1
- package/dist/types/core/resource-persistence.class.d.ts.map +1 -1
- package/dist/types/core/resource-query.class.d.ts.map +1 -1
- package/dist/types/plugins/concerns/s3-mutex.class.d.ts +30 -0
- package/dist/types/plugins/concerns/s3-mutex.class.d.ts.map +1 -0
- package/dist/types/plugins/recon/stages/recker-asn-stage.d.ts +90 -0
- package/dist/types/plugins/recon/stages/recker-asn-stage.d.ts.map +1 -0
- package/dist/types/plugins/recon/stages/recker-dns-stage.d.ts +125 -0
- package/dist/types/plugins/recon/stages/recker-dns-stage.d.ts.map +1 -0
- package/dist/types/plugins/recon/stages/recker-scrape-stage.d.ts +96 -0
- package/dist/types/plugins/recon/stages/recker-scrape-stage.d.ts.map +1 -0
- package/dist/types/plugins/spider/recker-link-discoverer.d.ts +54 -0
- package/dist/types/plugins/spider/recker-link-discoverer.d.ts.map +1 -0
- package/dist/types/plugins/spider/recker-llms-validator.d.ts +105 -0
- package/dist/types/plugins/spider/recker-llms-validator.d.ts.map +1 -0
- package/dist/types/plugins/spider/recker-robots-validator.d.ts +92 -0
- package/dist/types/plugins/spider/recker-robots-validator.d.ts.map +1 -0
- package/dist/types/plugins/spider/recker-security-adapter.d.ts +83 -0
- package/dist/types/plugins/spider/recker-security-adapter.d.ts.map +1 -0
- package/dist/types/plugins/spider/recker-seo-adapter.d.ts +187 -0
- package/dist/types/plugins/spider/recker-seo-adapter.d.ts.map +1 -0
- package/dist/types/plugins/spider/recker-sitemap-validator.d.ts +121 -0
- package/dist/types/plugins/spider/recker-sitemap-validator.d.ts.map +1 -0
- package/dist/types/resource.class.d.ts.map +1 -1
- package/mcp/prompts/index.ts +275 -0
- package/mcp/resources/index.ts +322 -0
- package/mcp/tools/plugins.ts +1137 -0
- package/mcp/tools/streams.ts +340 -0
- package/package.json +20 -21
- package/src/clients/recker-http-handler.ts +74 -8
- package/src/clients/types.ts +14 -0
- package/src/concerns/id/alphabets.ts +175 -0
- package/src/concerns/id/entropy.ts +286 -0
- package/src/concerns/id/generators/sid.ts +90 -0
- package/src/concerns/id/generators/ulid.ts +249 -0
- package/src/concerns/id/generators/uuid-v7.ts +179 -0
- package/src/concerns/id/index.ts +167 -0
- package/src/concerns/plugin-storage.ts +144 -0
- package/src/concerns/s3-errors.ts +97 -0
- package/src/concerns/s3-key.ts +62 -0
- package/src/concerns/safe-merge.ts +60 -0
- package/src/core/resource-config-validator.ts +9 -2
- package/src/core/resource-partitions.class.ts +14 -1
- package/src/core/resource-persistence.class.ts +47 -13
- package/src/core/resource-query.class.ts +21 -46
- package/src/plugins/concerns/s3-mutex.class.ts +228 -0
- package/src/plugins/recon/stages/recker-asn-stage.ts +385 -0
- package/src/plugins/recon/stages/recker-dns-stage.ts +360 -0
- package/src/plugins/recon/stages/recker-scrape-stage.ts +509 -0
- package/src/plugins/spider/recker-link-discoverer.ts +645 -0
- package/src/plugins/spider/recker-llms-validator.ts +500 -0
- package/src/plugins/spider/recker-robots-validator.ts +473 -0
- package/src/plugins/spider/recker-security-adapter.ts +489 -0
- package/src/plugins/spider/recker-seo-adapter.ts +605 -0
- package/src/plugins/spider/recker-sitemap-validator.ts +621 -0
- package/src/resource.class.ts +2 -0
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
import { createHttpClient } from '#src/concerns/http-client.js';
|
|
2
|
+
import type { CrawlContext } from './crawl-context.js';
|
|
3
|
+
|
|
4
|
+
export interface LlmsTxtLink {
|
|
5
|
+
text: string;
|
|
6
|
+
url: string;
|
|
7
|
+
description?: string;
|
|
8
|
+
section?: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface LlmsTxtSection {
|
|
12
|
+
title: string;
|
|
13
|
+
content: string;
|
|
14
|
+
links: LlmsTxtLink[];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface LlmsTxtParseResult {
|
|
18
|
+
valid: boolean;
|
|
19
|
+
errors: string[];
|
|
20
|
+
warnings: string[];
|
|
21
|
+
siteName?: string;
|
|
22
|
+
siteDescription?: string;
|
|
23
|
+
sections: LlmsTxtSection[];
|
|
24
|
+
links: LlmsTxtLink[];
|
|
25
|
+
hasFullVersion: boolean;
|
|
26
|
+
rawContent: string;
|
|
27
|
+
size: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface LlmsTxtValidationIssue {
|
|
31
|
+
type: 'error' | 'warning' | 'info';
|
|
32
|
+
code: string;
|
|
33
|
+
message: string;
|
|
34
|
+
line?: number;
|
|
35
|
+
recommendation?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface LlmsTxtValidationResult {
|
|
39
|
+
valid: boolean;
|
|
40
|
+
issues: LlmsTxtValidationIssue[];
|
|
41
|
+
parseResult: LlmsTxtParseResult;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface LlmsTxtCheckResult {
|
|
45
|
+
exists: boolean;
|
|
46
|
+
valid: boolean;
|
|
47
|
+
status?: number;
|
|
48
|
+
fullVersionExists?: boolean;
|
|
49
|
+
siteName?: string;
|
|
50
|
+
siteDescription?: string;
|
|
51
|
+
sections: LlmsTxtSection[];
|
|
52
|
+
links: LlmsTxtLink[];
|
|
53
|
+
issues: LlmsTxtValidationIssue[];
|
|
54
|
+
errors: string[];
|
|
55
|
+
warnings: string[];
|
|
56
|
+
size?: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface LlmsTxtTemplateOptions {
|
|
60
|
+
siteName: string;
|
|
61
|
+
siteDescription: string;
|
|
62
|
+
sections?: Array<{
|
|
63
|
+
title: string;
|
|
64
|
+
links: Array<{
|
|
65
|
+
text: string;
|
|
66
|
+
url: string;
|
|
67
|
+
description?: string;
|
|
68
|
+
}>;
|
|
69
|
+
}>;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export interface LlmsTxtValidatorConfig {
|
|
73
|
+
userAgent?: string;
|
|
74
|
+
fetchTimeout?: number;
|
|
75
|
+
cacheTimeout?: number;
|
|
76
|
+
context?: CrawlContext | null;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
type ReckerParseLlmsTxt = (content: string) => LlmsTxtParseResult;
|
|
80
|
+
type ReckerValidateLlmsTxt = (content: string, baseUrl?: string) => LlmsTxtValidationResult;
|
|
81
|
+
type ReckerFetchAndValidateLlmsTxt = (
|
|
82
|
+
url: string,
|
|
83
|
+
fetcher?: (url: string) => Promise<{ status: number; text: string }>
|
|
84
|
+
) => Promise<LlmsTxtValidationResult & { exists: boolean; status?: number; fullVersionExists?: boolean }>;
|
|
85
|
+
type ReckerGenerateLlmsTxtTemplate = (options: LlmsTxtTemplateOptions) => string;
|
|
86
|
+
|
|
87
|
+
interface CacheEntry {
|
|
88
|
+
result: LlmsTxtCheckResult;
|
|
89
|
+
timestamp: number;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
interface HttpClient {
|
|
93
|
+
get(url: string): Promise<HttpResponse>;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
interface HttpResponse {
|
|
97
|
+
ok: boolean;
|
|
98
|
+
status: number;
|
|
99
|
+
text(): Promise<string>;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export class ReckerLlmsTxtValidator {
|
|
103
|
+
private config: Required<Omit<LlmsTxtValidatorConfig, 'context'>> & {
|
|
104
|
+
context: CrawlContext | null;
|
|
105
|
+
};
|
|
106
|
+
private _context: CrawlContext | null;
|
|
107
|
+
private cache: Map<string, CacheEntry>;
|
|
108
|
+
private _httpClient: HttpClient | null;
|
|
109
|
+
|
|
110
|
+
private reckerAvailable: boolean | null = null;
|
|
111
|
+
private parseLlmsTxt: ReckerParseLlmsTxt | null = null;
|
|
112
|
+
private validateLlmsTxt: ReckerValidateLlmsTxt | null = null;
|
|
113
|
+
private fetchAndValidateLlmsTxt: ReckerFetchAndValidateLlmsTxt | null = null;
|
|
114
|
+
private generateLlmsTxtTemplate: ReckerGenerateLlmsTxtTemplate | null = null;
|
|
115
|
+
|
|
116
|
+
constructor(config: LlmsTxtValidatorConfig = {}) {
|
|
117
|
+
this.config = {
|
|
118
|
+
userAgent: config.userAgent || 's3db-spider',
|
|
119
|
+
fetchTimeout: config.fetchTimeout || 10000,
|
|
120
|
+
cacheTimeout: config.cacheTimeout || 3600000,
|
|
121
|
+
context: config.context || null
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
this._context = this.config.context;
|
|
125
|
+
this.cache = new Map();
|
|
126
|
+
this._httpClient = null;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
private async _checkReckerAvailability(): Promise<boolean> {
|
|
130
|
+
if (this.reckerAvailable !== null) {
|
|
131
|
+
return this.reckerAvailable;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
const llmsModule = await import('recker/seo/validators/llms-txt');
|
|
136
|
+
this.parseLlmsTxt = llmsModule.parseLlmsTxt;
|
|
137
|
+
this.validateLlmsTxt = llmsModule.validateLlmsTxt;
|
|
138
|
+
this.fetchAndValidateLlmsTxt = llmsModule.fetchAndValidateLlmsTxt;
|
|
139
|
+
this.generateLlmsTxtTemplate = llmsModule.generateLlmsTxtTemplate;
|
|
140
|
+
this.reckerAvailable = true;
|
|
141
|
+
return true;
|
|
142
|
+
} catch {
|
|
143
|
+
this.reckerAvailable = false;
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
private async _getHttpClient(): Promise<HttpClient> {
|
|
149
|
+
if (!this._httpClient) {
|
|
150
|
+
const baseConfig = this._context
|
|
151
|
+
? this._context.getHttpClientConfig('https://example.com')
|
|
152
|
+
: {
|
|
153
|
+
headers: {
|
|
154
|
+
'User-Agent': this.config.userAgent
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
this._httpClient = await createHttpClient({
|
|
159
|
+
...baseConfig,
|
|
160
|
+
timeout: this.config.fetchTimeout,
|
|
161
|
+
retry: {
|
|
162
|
+
maxAttempts: 2,
|
|
163
|
+
delay: 500,
|
|
164
|
+
backoff: 'exponential',
|
|
165
|
+
retryAfter: true,
|
|
166
|
+
retryOn: [429, 500, 502, 503, 504]
|
|
167
|
+
}
|
|
168
|
+
}) as unknown as HttpClient;
|
|
169
|
+
}
|
|
170
|
+
return this._httpClient;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
async check(domain: string): Promise<LlmsTxtCheckResult> {
|
|
174
|
+
const isReckerAvailable = await this._checkReckerAvailability();
|
|
175
|
+
|
|
176
|
+
if (!isReckerAvailable) {
|
|
177
|
+
return this._fallbackCheck(domain);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const normalizedDomain = domain.replace(/\/$/, '');
|
|
181
|
+
const llmsUrl = `${normalizedDomain}/llms.txt`;
|
|
182
|
+
|
|
183
|
+
const cached = this.cache.get(normalizedDomain);
|
|
184
|
+
if (cached && Date.now() - cached.timestamp < this.config.cacheTimeout) {
|
|
185
|
+
return cached.result;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
try {
|
|
189
|
+
const fetchResult = await this.fetchAndValidateLlmsTxt!(llmsUrl, async (url) => {
|
|
190
|
+
const client = await this._getHttpClient();
|
|
191
|
+
const response = await client.get(url);
|
|
192
|
+
|
|
193
|
+
if (this._context) {
|
|
194
|
+
this._context.processResponse(
|
|
195
|
+
response as unknown as Parameters<typeof this._context.processResponse>[0],
|
|
196
|
+
url
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
status: response.ok ? 200 : response.status,
|
|
202
|
+
text: response.ok ? await response.text() : ''
|
|
203
|
+
};
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
const result: LlmsTxtCheckResult = {
|
|
207
|
+
exists: fetchResult.exists,
|
|
208
|
+
valid: fetchResult.valid,
|
|
209
|
+
status: fetchResult.status,
|
|
210
|
+
fullVersionExists: fetchResult.fullVersionExists,
|
|
211
|
+
siteName: fetchResult.parseResult.siteName,
|
|
212
|
+
siteDescription: fetchResult.parseResult.siteDescription,
|
|
213
|
+
sections: fetchResult.parseResult.sections,
|
|
214
|
+
links: fetchResult.parseResult.links,
|
|
215
|
+
issues: fetchResult.issues,
|
|
216
|
+
errors: fetchResult.parseResult.errors,
|
|
217
|
+
warnings: fetchResult.parseResult.warnings,
|
|
218
|
+
size: fetchResult.parseResult.size
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
this.cache.set(normalizedDomain, { result, timestamp: Date.now() });
|
|
222
|
+
|
|
223
|
+
return result;
|
|
224
|
+
|
|
225
|
+
} catch (error) {
|
|
226
|
+
return {
|
|
227
|
+
exists: false,
|
|
228
|
+
valid: false,
|
|
229
|
+
sections: [],
|
|
230
|
+
links: [],
|
|
231
|
+
issues: [{
|
|
232
|
+
type: 'error',
|
|
233
|
+
code: 'FETCH_ERROR',
|
|
234
|
+
message: `Failed to fetch llms.txt: ${(error as Error).message}`
|
|
235
|
+
}],
|
|
236
|
+
errors: [(error as Error).message],
|
|
237
|
+
warnings: []
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
private async _fallbackCheck(domain: string): Promise<LlmsTxtCheckResult> {
|
|
243
|
+
const normalizedDomain = domain.replace(/\/$/, '');
|
|
244
|
+
const llmsUrl = `${normalizedDomain}/llms.txt`;
|
|
245
|
+
|
|
246
|
+
try {
|
|
247
|
+
const client = await this._getHttpClient();
|
|
248
|
+
const response = await client.get(llmsUrl);
|
|
249
|
+
|
|
250
|
+
if (!response.ok) {
|
|
251
|
+
return {
|
|
252
|
+
exists: false,
|
|
253
|
+
valid: false,
|
|
254
|
+
status: response.status,
|
|
255
|
+
sections: [],
|
|
256
|
+
links: [],
|
|
257
|
+
issues: [{
|
|
258
|
+
type: 'info',
|
|
259
|
+
code: 'NOT_FOUND',
|
|
260
|
+
message: 'llms.txt file not found',
|
|
261
|
+
recommendation: 'Consider adding an llms.txt file for AI SEO'
|
|
262
|
+
}],
|
|
263
|
+
errors: [],
|
|
264
|
+
warnings: []
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
const content = await response.text();
|
|
269
|
+
const parsed = this._simpleParse(content);
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
exists: true,
|
|
273
|
+
valid: parsed.sections.length > 0 || !!parsed.siteName,
|
|
274
|
+
status: 200,
|
|
275
|
+
siteName: parsed.siteName,
|
|
276
|
+
siteDescription: parsed.siteDescription,
|
|
277
|
+
sections: parsed.sections,
|
|
278
|
+
links: parsed.links,
|
|
279
|
+
issues: [],
|
|
280
|
+
errors: [],
|
|
281
|
+
warnings: ['Recker not available - using basic parsing'],
|
|
282
|
+
size: content.length
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
} catch (error) {
|
|
286
|
+
return {
|
|
287
|
+
exists: false,
|
|
288
|
+
valid: false,
|
|
289
|
+
sections: [],
|
|
290
|
+
links: [],
|
|
291
|
+
issues: [],
|
|
292
|
+
errors: [(error as Error).message],
|
|
293
|
+
warnings: []
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
private _simpleParse(content: string): {
|
|
299
|
+
siteName?: string;
|
|
300
|
+
siteDescription?: string;
|
|
301
|
+
sections: LlmsTxtSection[];
|
|
302
|
+
links: LlmsTxtLink[];
|
|
303
|
+
} {
|
|
304
|
+
const lines = content.split(/\r?\n/);
|
|
305
|
+
const sections: LlmsTxtSection[] = [];
|
|
306
|
+
const links: LlmsTxtLink[] = [];
|
|
307
|
+
let siteName: string | undefined;
|
|
308
|
+
let siteDescription: string | undefined;
|
|
309
|
+
let currentSection: LlmsTxtSection | null = null;
|
|
310
|
+
|
|
311
|
+
for (const line of lines) {
|
|
312
|
+
const trimmed = line.trim();
|
|
313
|
+
|
|
314
|
+
if (!trimmed || trimmed.startsWith('#')) continue;
|
|
315
|
+
|
|
316
|
+
if (trimmed.startsWith('# ') && !siteName) {
|
|
317
|
+
siteName = trimmed.slice(2).trim();
|
|
318
|
+
continue;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
if (trimmed.startsWith('>') && !siteDescription) {
|
|
322
|
+
siteDescription = trimmed.slice(1).trim();
|
|
323
|
+
continue;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (trimmed.startsWith('## ')) {
|
|
327
|
+
if (currentSection) {
|
|
328
|
+
sections.push(currentSection);
|
|
329
|
+
}
|
|
330
|
+
currentSection = {
|
|
331
|
+
title: trimmed.slice(3).trim(),
|
|
332
|
+
content: '',
|
|
333
|
+
links: []
|
|
334
|
+
};
|
|
335
|
+
continue;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
const linkMatch = trimmed.match(/^\[([^\]]+)\]\(([^)]+)\)(?:\s*-\s*(.+))?$/);
|
|
339
|
+
if (linkMatch) {
|
|
340
|
+
const link: LlmsTxtLink = {
|
|
341
|
+
text: linkMatch[1]!,
|
|
342
|
+
url: linkMatch[2]!,
|
|
343
|
+
description: linkMatch[3]?.trim(),
|
|
344
|
+
section: currentSection?.title
|
|
345
|
+
};
|
|
346
|
+
links.push(link);
|
|
347
|
+
if (currentSection) {
|
|
348
|
+
currentSection.links.push(link);
|
|
349
|
+
}
|
|
350
|
+
continue;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (currentSection && trimmed) {
|
|
354
|
+
currentSection.content += (currentSection.content ? '\n' : '') + trimmed;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (currentSection) {
|
|
359
|
+
sections.push(currentSection);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return { siteName, siteDescription, sections, links };
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
async validate(domain: string): Promise<LlmsTxtValidationResult | null> {
|
|
366
|
+
const isReckerAvailable = await this._checkReckerAvailability();
|
|
367
|
+
|
|
368
|
+
if (!isReckerAvailable) {
|
|
369
|
+
return null;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
const checkResult = await this.check(domain);
|
|
373
|
+
|
|
374
|
+
if (!checkResult.exists) {
|
|
375
|
+
return null;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
const normalizedDomain = domain.replace(/\/$/, '');
|
|
379
|
+
const llmsUrl = `${normalizedDomain}/llms.txt`;
|
|
380
|
+
|
|
381
|
+
try {
|
|
382
|
+
const client = await this._getHttpClient();
|
|
383
|
+
const response = await client.get(llmsUrl);
|
|
384
|
+
const content = await response.text();
|
|
385
|
+
|
|
386
|
+
return this.validateLlmsTxt!(content, normalizedDomain);
|
|
387
|
+
} catch {
|
|
388
|
+
return null;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
validateContent(content: string, baseUrl?: string): LlmsTxtValidationResult | null {
|
|
393
|
+
if (!this.reckerAvailable || !this.validateLlmsTxt) {
|
|
394
|
+
return null;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
return this.validateLlmsTxt(content, baseUrl);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
parseContent(content: string): LlmsTxtParseResult | null {
|
|
401
|
+
if (!this.reckerAvailable || !this.parseLlmsTxt) {
|
|
402
|
+
return null;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
return this.parseLlmsTxt(content);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
generateTemplate(options: LlmsTxtTemplateOptions): string | null {
|
|
409
|
+
if (!this.reckerAvailable || !this.generateLlmsTxtTemplate) {
|
|
410
|
+
return this._fallbackGenerateTemplate(options);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
return this.generateLlmsTxtTemplate(options);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
private _fallbackGenerateTemplate(options: LlmsTxtTemplateOptions): string {
|
|
417
|
+
const lines: string[] = [];
|
|
418
|
+
|
|
419
|
+
lines.push(`# ${options.siteName}`);
|
|
420
|
+
lines.push('');
|
|
421
|
+
lines.push(`> ${options.siteDescription}`);
|
|
422
|
+
lines.push('');
|
|
423
|
+
|
|
424
|
+
if (options.sections) {
|
|
425
|
+
for (const section of options.sections) {
|
|
426
|
+
lines.push(`## ${section.title}`);
|
|
427
|
+
lines.push('');
|
|
428
|
+
for (const link of section.links) {
|
|
429
|
+
if (link.description) {
|
|
430
|
+
lines.push(`[${link.text}](${link.url}) - ${link.description}`);
|
|
431
|
+
} else {
|
|
432
|
+
lines.push(`[${link.text}](${link.url})`);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
lines.push('');
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
return lines.join('\n');
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
async checkFullVersion(domain: string): Promise<{
|
|
443
|
+
exists: boolean;
|
|
444
|
+
status?: number;
|
|
445
|
+
size?: number;
|
|
446
|
+
}> {
|
|
447
|
+
const normalizedDomain = domain.replace(/\/$/, '');
|
|
448
|
+
const llmsFullUrl = `${normalizedDomain}/llms-full.txt`;
|
|
449
|
+
|
|
450
|
+
try {
|
|
451
|
+
const client = await this._getHttpClient();
|
|
452
|
+
const response = await client.get(llmsFullUrl);
|
|
453
|
+
|
|
454
|
+
if (!response.ok) {
|
|
455
|
+
return { exists: false, status: response.status };
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const content = await response.text();
|
|
459
|
+
return {
|
|
460
|
+
exists: true,
|
|
461
|
+
status: 200,
|
|
462
|
+
size: content.length
|
|
463
|
+
};
|
|
464
|
+
} catch {
|
|
465
|
+
return { exists: false };
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
async getLinks(domain: string): Promise<LlmsTxtLink[]> {
|
|
470
|
+
const result = await this.check(domain);
|
|
471
|
+
return result.links;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
async getSections(domain: string): Promise<LlmsTxtSection[]> {
|
|
475
|
+
const result = await this.check(domain);
|
|
476
|
+
return result.sections;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
clearCache(domain?: string): void {
|
|
480
|
+
if (domain) {
|
|
481
|
+
const normalizedDomain = domain.replace(/\/$/, '');
|
|
482
|
+
this.cache.delete(normalizedDomain);
|
|
483
|
+
} else {
|
|
484
|
+
this.cache.clear();
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
getCacheStats(): { size: number; domains: string[] } {
|
|
489
|
+
return {
|
|
490
|
+
size: this.cache.size,
|
|
491
|
+
domains: [...this.cache.keys()]
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
isReckerEnabled(): boolean {
|
|
496
|
+
return this.reckerAvailable === true;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
export default ReckerLlmsTxtValidator;
|