@houtini/seo-crawler-mcp 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/.github/workflows/ci.yml +59 -0
  2. package/LICENSE +190 -0
  3. package/NOTICE +8 -0
  4. package/README.md +694 -0
  5. package/build/analyzers/QueryLoader.d.ts +30 -0
  6. package/build/analyzers/QueryLoader.d.ts.map +1 -0
  7. package/build/analyzers/QueryLoader.js +126 -0
  8. package/build/analyzers/QueryLoader.js.map +1 -0
  9. package/build/cli.d.ts +3 -0
  10. package/build/cli.d.ts.map +1 -0
  11. package/build/cli.js +190 -0
  12. package/build/cli.js.map +1 -0
  13. package/build/core/ContentExtractor.d.ts +30 -0
  14. package/build/core/ContentExtractor.d.ts.map +1 -0
  15. package/build/core/ContentExtractor.js +362 -0
  16. package/build/core/ContentExtractor.js.map +1 -0
  17. package/build/core/CrawlDatabase.d.ts +25 -0
  18. package/build/core/CrawlDatabase.d.ts.map +1 -0
  19. package/build/core/CrawlDatabase.js +603 -0
  20. package/build/core/CrawlDatabase.js.map +1 -0
  21. package/build/core/CrawlOrchestrator.d.ts +27 -0
  22. package/build/core/CrawlOrchestrator.d.ts.map +1 -0
  23. package/build/core/CrawlOrchestrator.js +279 -0
  24. package/build/core/CrawlOrchestrator.js.map +1 -0
  25. package/build/core/CrawlStorage.d.ts +33 -0
  26. package/build/core/CrawlStorage.d.ts.map +1 -0
  27. package/build/core/CrawlStorage.js +94 -0
  28. package/build/core/CrawlStorage.js.map +1 -0
  29. package/build/core/LinkExtractor.d.ts +14 -0
  30. package/build/core/LinkExtractor.d.ts.map +1 -0
  31. package/build/core/LinkExtractor.js +91 -0
  32. package/build/core/LinkExtractor.js.map +1 -0
  33. package/build/core/UrlManager.d.ts +21 -0
  34. package/build/core/UrlManager.d.ts.map +1 -0
  35. package/build/core/UrlManager.js +87 -0
  36. package/build/core/UrlManager.js.map +1 -0
  37. package/build/formatters/structured-report-format.d.ts +48 -0
  38. package/build/formatters/structured-report-format.d.ts.map +1 -0
  39. package/build/formatters/structured-report-format.js +145 -0
  40. package/build/formatters/structured-report-format.js.map +1 -0
  41. package/build/index.d.ts +3 -0
  42. package/build/index.d.ts.map +1 -0
  43. package/build/index.js +214 -0
  44. package/build/index.js.map +1 -0
  45. package/build/schema/index.d.ts +627 -0
  46. package/build/schema/index.d.ts.map +1 -0
  47. package/build/schema/index.js +159 -0
  48. package/build/schema/index.js.map +1 -0
  49. package/build/tools/analyze-seo.d.ts +44 -0
  50. package/build/tools/analyze-seo.d.ts.map +1 -0
  51. package/build/tools/analyze-seo.js +110 -0
  52. package/build/tools/analyze-seo.js.map +1 -0
  53. package/build/tools/list-queries.d.ts +28 -0
  54. package/build/tools/list-queries.d.ts.map +1 -0
  55. package/build/tools/list-queries.js +30 -0
  56. package/build/tools/list-queries.js.map +1 -0
  57. package/build/tools/query-seo-data.d.ts +15 -0
  58. package/build/tools/query-seo-data.d.ts.map +1 -0
  59. package/build/tools/query-seo-data.js +43 -0
  60. package/build/tools/query-seo-data.js.map +1 -0
  61. package/build/tools/run-seo-audit.d.ts +3 -0
  62. package/build/tools/run-seo-audit.d.ts.map +1 -0
  63. package/build/tools/run-seo-audit.js +54 -0
  64. package/build/tools/run-seo-audit.js.map +1 -0
  65. package/build/types/index.d.ts +158 -0
  66. package/build/types/index.d.ts.map +1 -0
  67. package/build/types/index.js +2 -0
  68. package/build/types/index.js.map +1 -0
  69. package/build/utils/debug.d.ts +2 -0
  70. package/build/utils/debug.d.ts.map +1 -0
  71. package/build/utils/debug.js +7 -0
  72. package/build/utils/debug.js.map +1 -0
  73. package/package.json +49 -0
  74. package/server.json +31 -0
  75. package/src/analyzers/QueryLoader.ts +175 -0
  76. package/src/analyzers/queries/README.md +228 -0
  77. package/src/analyzers/queries/content/duplicate-h1.sql +18 -0
  78. package/src/analyzers/queries/content/duplicate-meta-descriptions.sql +18 -0
  79. package/src/analyzers/queries/content/duplicate-titles.sql +19 -0
  80. package/src/analyzers/queries/content/missing-h1.sql +18 -0
  81. package/src/analyzers/queries/content/missing-meta-descriptions.sql +19 -0
  82. package/src/analyzers/queries/content/multiple-h1.sql +17 -0
  83. package/src/analyzers/queries/content/thin-content.sql +18 -0
  84. package/src/analyzers/queries/critical/404-errors.sql +14 -0
  85. package/src/analyzers/queries/critical/broken-internal-links.sql +20 -0
  86. package/src/analyzers/queries/critical/missing-titles.sql +17 -0
  87. package/src/analyzers/queries/critical/server-errors.sql +15 -0
  88. package/src/analyzers/queries/opportunities/high-external-links.sql +18 -0
  89. package/src/analyzers/queries/opportunities/meta-description-length.sql +27 -0
  90. package/src/analyzers/queries/opportunities/missing-images.sql +18 -0
  91. package/src/analyzers/queries/opportunities/no-outbound-links.sql +18 -0
  92. package/src/analyzers/queries/opportunities/title-equals-h1.sql +21 -0
  93. package/src/analyzers/queries/opportunities/title-length.sql +27 -0
  94. package/src/analyzers/queries/security/missing-csp.sql +16 -0
  95. package/src/analyzers/queries/security/missing-hsts.sql +17 -0
  96. package/src/analyzers/queries/security/missing-referrer-policy.sql +16 -0
  97. package/src/analyzers/queries/security/missing-x-frame-options.sql +16 -0
  98. package/src/analyzers/queries/security/protocol-relative-links.sql +16 -0
  99. package/src/analyzers/queries/security/unsafe-external-links.sql +17 -0
  100. package/src/analyzers/queries/technical/canonical-issues.sql +20 -0
  101. package/src/analyzers/queries/technical/heading-hierarchy-issues.sql +19 -0
  102. package/src/analyzers/queries/technical/non-https.sql +16 -0
  103. package/src/analyzers/queries/technical/orphan-pages.sql +21 -0
  104. package/src/analyzers/queries/technical/redirects.sql +15 -0
  105. package/src/cli.ts +224 -0
  106. package/src/core/ContentExtractor.ts +480 -0
  107. package/src/core/CrawlDatabase.ts +736 -0
  108. package/src/core/CrawlOrchestrator.ts +346 -0
  109. package/src/core/CrawlStorage.ts +148 -0
  110. package/src/core/LinkExtractor.ts +123 -0
  111. package/src/core/UrlManager.ts +114 -0
  112. package/src/formatters/structured-report-format.ts +254 -0
  113. package/src/index.ts +259 -0
  114. package/src/schema/index.ts +176 -0
  115. package/src/tools/analyze-seo.ts +184 -0
  116. package/src/tools/list-queries.ts +70 -0
  117. package/src/tools/query-seo-data.ts +77 -0
  118. package/src/tools/run-seo-audit.ts +83 -0
  119. package/src/types/index.ts +179 -0
  120. package/src/utils/debug.ts +12 -0
  121. package/tsconfig.json +26 -0
@@ -0,0 +1,279 @@
1
+ import { HttpCrawler, RequestQueue, Configuration, log } from 'crawlee';
2
+ import { MemoryStorage } from '@crawlee/memory-storage';
3
+ import { load } from 'cheerio';
4
+ import { debug } from '../utils/debug.js';
5
+ export class CrawlOrchestrator {
6
+ config;
7
+ urlManager;
8
+ contentExtractor;
9
+ linkExtractor;
10
+ storage;
11
+ crawler;
12
+ metadata;
13
+ linkBuffer = [];
14
+ LINK_BUFFER_SIZE = 100;
15
+ memoryStorage;
16
+ constructor(config, urlManager, contentExtractor, linkExtractor, storage) {
17
+ this.config = config;
18
+ this.urlManager = urlManager;
19
+ this.contentExtractor = contentExtractor;
20
+ this.linkExtractor = linkExtractor;
21
+ this.storage = storage;
22
+ log.setLevel(log.LEVELS.OFF);
23
+ this.metadata = this.createInitialMetadata();
24
+ }
25
+ createInitialMetadata() {
26
+ return {
27
+ crawlId: this.config.crawlId,
28
+ status: 'queued',
29
+ startedAt: null,
30
+ completedAt: null,
31
+ duration: null,
32
+ stats: {
33
+ discovered: 0,
34
+ crawled: 0,
35
+ failed: 0,
36
+ skipped: 0,
37
+ depth: 0,
38
+ speed: 0
39
+ },
40
+ errors: []
41
+ };
42
+ }
43
+ async initializeCrawler() {
44
+ const storageDir = `./crawlee-storage-${this.config.crawlId}`;
45
+ this.memoryStorage = new MemoryStorage({ localDataDirectory: storageDir });
46
+ const configuration = new Configuration({
47
+ storageClient: this.memoryStorage,
48
+ persistStorage: false,
49
+ });
50
+ const requestQueue = await RequestQueue.open(undefined, { config: configuration });
51
+ const userAgents = {
52
+ chrome: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
53
+ googlebot: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
54
+ };
55
+ const userAgent = userAgents[this.config.userAgent] || userAgents.chrome;
56
+ const crawlerConfig = {
57
+ maxRequestsPerCrawl: this.config.maxPages,
58
+ maxConcurrency: 20,
59
+ minConcurrency: 5,
60
+ maxRequestRetries: 5,
61
+ requestHandlerTimeoutSecs: this.config.timeout / 1000,
62
+ navigationTimeoutSecs: 30,
63
+ additionalMimeTypes: ['text/html', 'application/xhtml+xml'],
64
+ requestQueue,
65
+ preNavigationHooks: [
66
+ async ({ request }) => {
67
+ request.headers = {
68
+ 'User-Agent': userAgent,
69
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
70
+ 'Accept-Language': 'en-US,en;q=0.9',
71
+ 'Accept-Encoding': 'gzip, deflate, br',
72
+ 'Connection': 'keep-alive',
73
+ 'Upgrade-Insecure-Requests': '1'
74
+ };
75
+ }
76
+ ],
77
+ requestHandler: async (context) => {
78
+ const { request, response, body, crawler } = context;
79
+ if (!response || !body)
80
+ return;
81
+ const $ = load(body.toString());
82
+ await this.processPage(request.url, body.toString(), $, response, crawler);
83
+ },
84
+ failedRequestHandler: async (context, error) => {
85
+ const { request } = context;
86
+ await this.handleFailedRequest(request, error);
87
+ }
88
+ };
89
+ this.crawler = new HttpCrawler(crawlerConfig);
90
+ }
91
+ async run() {
92
+ try {
93
+ debug('[ORCH] Starting orchestrator.run()');
94
+ debug('[ORCH] Config:', JSON.stringify(this.config, null, 2));
95
+ await this.initializeCrawler();
96
+ debug('[ORCH] Crawler initialized with isolated MemoryStorage');
97
+ await this.storage.initialize();
98
+ debug('[ORCH] Storage initialized');
99
+ this.metadata.status = 'running';
100
+ this.metadata.startedAt = new Date().toISOString();
101
+ await this.storage.saveMetadata(this.metadata, this.config);
102
+ debug('[ORCH] Metadata saved, status=running');
103
+ this.urlManager.addDiscovered(this.config.startUrl, 0);
104
+ debug('[ORCH] Start URL added to UrlManager');
105
+ debug('[ORCH] About to call crawler.run()...');
106
+ await this.crawler.run([this.config.startUrl]);
107
+ debug('[ORCH] Crawler.run() completed');
108
+ if (this.linkBuffer.length > 0) {
109
+ await this.storage.saveLinkData(this.linkBuffer);
110
+ this.linkBuffer = [];
111
+ }
112
+ const endTime = Date.now();
113
+ const startTime = new Date(this.metadata.startedAt).getTime();
114
+ this.metadata.status = 'completed';
115
+ this.metadata.completedAt = new Date().toISOString();
116
+ this.metadata.duration = endTime - startTime;
117
+ this.metadata.stats.depth = this.urlManager.getMaxDepth();
118
+ this.metadata.stats.speed = this.metadata.duration > 0
119
+ ? (this.metadata.stats.crawled / (this.metadata.duration / 1000))
120
+ : 0;
121
+ }
122
+ catch (error) {
123
+ console.error('[ORCH ERROR] Fatal error in run():', error);
124
+ console.error('[ORCH ERROR] Stack:', error.stack);
125
+ this.metadata.status = 'failed';
126
+ this.metadata.errors.push({
127
+ url: '',
128
+ errorType: 'unknown',
129
+ message: error.message || 'Unknown error',
130
+ timestamp: new Date().toISOString()
131
+ });
132
+ }
133
+ finally {
134
+ if (this.memoryStorage) {
135
+ try {
136
+ await this.memoryStorage.purge();
137
+ debug('[ORCH] MemoryStorage cleaned up successfully');
138
+ }
139
+ catch (cleanupError) {
140
+ console.error('[ORCH WARN] Storage cleanup failed:', cleanupError.message);
141
+ }
142
+ }
143
+ }
144
+ await this.storage.saveMetadata(this.metadata, this.config);
145
+ return this.metadata;
146
+ }
147
+ async processPage(url, html, $, response, crawler) {
148
+ this.urlManager.markVisited(url);
149
+ const pageData = this.contentExtractor.extract(url, html, $, {
150
+ crawlId: this.config.crawlId,
151
+ depth: this.urlManager.getDepth(url),
152
+ statusCode: response.status || 200,
153
+ contentType: response.headers?.['content-type'] || 'text/html',
154
+ responseTime: 0,
155
+ size: html.length,
156
+ isInternal: this.urlManager.isInternal(url),
157
+ linkedFrom: this.urlManager.getSourcePages(url),
158
+ redirects: []
159
+ }, response);
160
+ const links = this.linkExtractor.extract($, url, this.config.crawlId);
161
+ const currentDepth = this.urlManager.getDepth(url);
162
+ const linksToAdd = [];
163
+ for (const link of links) {
164
+ if (this.shouldCrawlUrl(link.targetUrl)) {
165
+ if (currentDepth < this.config.maxDepth) {
166
+ if (!this.urlManager.isDiscovered(link.targetUrl)) {
167
+ this.urlManager.addDiscovered(link.targetUrl, currentDepth + 1, url);
168
+ }
169
+ linksToAdd.push(link.targetUrl);
170
+ }
171
+ else {
172
+ this.metadata.stats.skipped++;
173
+ }
174
+ }
175
+ else {
176
+ this.metadata.stats.skipped++;
177
+ }
178
+ }
179
+ if (linksToAdd.length > 0) {
180
+ await crawler.addRequests(linksToAdd);
181
+ }
182
+ await this.storage.savePageData(pageData);
183
+ this.linkBuffer.push(...links);
184
+ if (this.linkBuffer.length >= this.LINK_BUFFER_SIZE) {
185
+ await this.storage.saveLinkData(this.linkBuffer);
186
+ this.linkBuffer = [];
187
+ }
188
+ this.metadata.stats.crawled++;
189
+ this.metadata.stats.discovered = this.urlManager.getTotalDiscovered();
190
+ if (this.metadata.stats.crawled % 10 === 0) {
191
+ await this.storage.updateMetadata(this.metadata);
192
+ }
193
+ }
194
+ async handleFailedRequest(request, error) {
195
+ this.metadata.stats.failed++;
196
+ const errorType = this.categorizeError(error);
197
+ const errorMessage = error.message || 'Unknown error';
198
+ const errorMessages = request.errorMessages || [];
199
+ const fullErrorDetails = errorMessages.length > 0
200
+ ? `${errorMessage} (Retry ${request.retryCount}: ${errorMessages.join(', ')})`
201
+ : errorMessage;
202
+ this.metadata.errors.push({
203
+ url: request.url,
204
+ errorType: errorType,
205
+ message: fullErrorDetails,
206
+ timestamp: new Date().toISOString()
207
+ });
208
+ }
209
+ categorizeError(error) {
210
+ const message = error.message.toLowerCase();
211
+ if (message.includes('timeout'))
212
+ return 'timeout';
213
+ if (message.includes('dns') || message.includes('getaddrinfo'))
214
+ return 'dns';
215
+ if (message.includes('connect') || message.includes('econnrefused'))
216
+ return 'connection';
217
+ if (message.includes('ssl') || message.includes('certificate'))
218
+ return 'ssl';
219
+ if (message.includes('401') || message.includes('403'))
220
+ return 'auth';
221
+ if (message.includes('404'))
222
+ return 'not_found';
223
+ if (message.includes('429') || message.includes('rate limit'))
224
+ return 'rate_limit';
225
+ if (message.includes('500') || message.includes('502') || message.includes('503'))
226
+ return 'server_error';
227
+ return 'network';
228
+ }
229
+ shouldCrawlUrl(url) {
230
+ const ext = this.getFileExtension(url);
231
+ if (ext && this.config.excludeExtensions.includes(ext)) {
232
+ return false;
233
+ }
234
+ if (ext && this.config.includeExtensions.length > 0 &&
235
+ !this.config.includeExtensions.includes(ext)) {
236
+ return false;
237
+ }
238
+ if (this.config.excludePatterns.length > 0) {
239
+ if (this.config.excludePatterns.some(pattern => {
240
+ try {
241
+ return new RegExp(pattern).test(url);
242
+ }
243
+ catch {
244
+ return false;
245
+ }
246
+ })) {
247
+ return false;
248
+ }
249
+ }
250
+ if (this.config.includePatterns.length > 0) {
251
+ if (!this.config.includePatterns.some(pattern => {
252
+ try {
253
+ return new RegExp(pattern).test(url);
254
+ }
255
+ catch {
256
+ return false;
257
+ }
258
+ })) {
259
+ return false;
260
+ }
261
+ }
262
+ if (!this.config.crawlExternal && !this.urlManager.isInternal(url)) {
263
+ return false;
264
+ }
265
+ return true;
266
+ }
267
+ getFileExtension(url) {
268
+ try {
269
+ const pathname = new URL(url).pathname;
270
+ const parts = pathname.split('.');
271
+ if (parts.length > 1) {
272
+ return parts[parts.length - 1].toLowerCase();
273
+ }
274
+ }
275
+ catch { }
276
+ return '';
277
+ }
278
+ }
279
+ //# sourceMappingURL=CrawlOrchestrator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"CrawlOrchestrator.js","sourceRoot":"","sources":["../../src/core/CrawlOrchestrator.ts"],"names":[],"mappings":"AAcA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,aAAa,EAAsC,GAAG,EAAE,MAAM,SAAS,CAAC;AAC5G,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAM/B,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAE1C,MAAM,OAAO,iBAAiB;IAQlB;IACA;IACA;IACA;IACA;IAXF,OAAO,CAAM;IACb,QAAQ,CAAgB;IACxB,UAAU,GAAU,EAAE,CAAC;IACd,gBAAgB,GAAG,GAAG,CAAC;IAChC,aAAa,CAAiB;IAEtC,YACU,MAAmB,EACnB,UAAsB,EACtB,gBAAkC,EAClC,aAA4B,EAC5B,OAAqB;QAJrB,WAAM,GAAN,MAAM,CAAa;QACnB,eAAU,GAAV,UAAU,CAAY;QACtB,qBAAgB,GAAhB,gBAAgB,CAAkB;QAClC,kBAAa,GAAb,aAAa,CAAe;QAC5B,YAAO,GAAP,OAAO,CAAc;QAG7B,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAE7B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,qBAAqB,EAAE,CAAC;IAE/C,CAAC;IAEO,qBAAqB;QAC3B,OAAO;YACL,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,MAAM,EAAE,QAAQ;YAChB,SAAS,EAAE,IAAI;YACf,WAAW,EAAE,IAAI;YACjB,QAAQ,EAAE,IAAI;YACd,KAAK,EAAE;gBACL,UAAU,EAAE,CAAC;gBACb,OAAO,EAAE,CAAC;gBACV,MAAM,EAAE,CAAC;gBACT,OAAO,EAAE,CAAC;gBACV,KAAK,EAAE,CAAC;gBACR,KAAK,EAAE,CAAC;aACT;YACD,MAAM,EAAE,EAAE;SACX,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,iBAAiB;QAI7B,MAAM,UAAU,GAAG,qBAAqB,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAC9D,IAAI,CAAC,aAAa,GAAG,IAAI,aAAa,CAAC,EAAE,kBAAkB,EAAE,UAAU,EAAE,CAAC,CAAC;QAG3E,MAAM,aAAa,GAAG,IAAI,aAAa,CAAC;YACtC,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,cAAc,EAAE,KAAK;SACtB,CAAC,CAAC;QAGH,MAAM,YAAY,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC,CAAC;QAGnF,MAAM,UAAU,GAAG;YACjB,MAAM,EAAE,iHAAiH;YACzH,SAAS,EAAE,0EAA0E;SACtF,CAAC;QAEF,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,SAAoC,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC;QAEpG,MAAM,aAAa,GAAG;YACpB,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ;YACzC,cAAc,EAAE,EAAE;YAClB,cAAc,EAAE,CAAC;YACjB,iBAAiB,EAAE,CAAC;YACpB,yBAAyB,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,GAAG,IAAI;YACrD,qBAAqB,EAAE,EAAE;YACzB,mBAAmB,EAAE,CAAC,WAAW,EAAE,uBAAuB,CAAC;YAC3D,YAAY;YACZ,kBAAkB,EAAE;gBAClB,KAAK,EAAE,EAAE,OAAO,EAAO,EAAE,EAAE;oBACzB,OAAO,CAAC,OAAO,GAAG;wBAChB,YAAY,EAAE,SAAS;wBACvB,QAAQ,EAAE,4EAA4E;wBACtF,iBAAiB,EAAE,gBAAgB;wBACnC,iBAAiB,EAAE,mBAAmB;wBACtC,YAAY,EAAE,YAAY;wBAC1B,2BAA2B,EAAE,GAAG;qBACjC,CAAC;gBACJ,CAAC;aACF;YACD,cAAc,EAAE,KAAK,EAAE,OAAwB,EAAE,EAAE;gBACjD,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,OAAc,CAAC;gBAC5D,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI;oBAAE,OAAO;gBAE/B,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAChC,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC7E,CAAC;YACD,oBAAoB,EAAE,KAAK,EAAE,OAAwB,EAAE,KAAY,EAAE,EAAE;gBACrE,MAAM,EAAE,OAAO,EAAE,GAAG,OAAc,CAAC;gBACnC,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACjD,CAAC;SACF,CAAC;QAEF,IAAI,CAAC,OAAO,GAAG,IAAI,WAAW,CAAC,aAAoB,CAAC,CAAC;IACvD,CAAC;IAED,KAAK,CAAC,GAAG;QACP,IAAI,CAAC;YACH,KAAK,CAAC,oCAAoC,CAAC,CAAC;YAC5C,KAAK,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAG9D,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;YAC/B,KAAK,CAAC,wDAAwD,CAAC,CAAC;YAEhE,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;YAChC,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAEpC,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC;YACjC,IAAI,CAAC,QAAQ,CAAC,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACnD,MAAM,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAC5D,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAE/C,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;YACvD,KAAK,CAAC,sCAAsC,CAAC,CAAC;YAE9C,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAC/C,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC/C,KAAK,CAAC,gCAAgC,CAAC,CAAC;YAExC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/B,MAAM,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBACjD,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;YACvB,CAAC;YAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC3B,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAU,CAAC,CAAC,OAAO,EAAE,CAAC;YAE/D,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,WAAW,CAAC;YACnC,IAAI,CAAC,QAAQ,CAAC,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACrD,IAAI,CAAC,QAAQ,CAAC,QAAQ,GAAG,OAAO,GAAG,SAAS,CAAC;YAC7C,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC;YAC1D,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,GAAG,CAAC;gBACpD,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,GAAG,IAAI,CAAC,CAAC;gBACjE,CAAC,CAAC,CAAC,CAAC;QAER,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,KAAK,CAAC,CAAC;YAC3D,OAAO,CAAC,KAAK,CAAC,qBAAqB,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;YAClD,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,QAAQ,CAAC;YAChC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC;gBACxB,GAAG,EAAE,EAAE;gBACP,SAAS,EAAE,SAAS;gBACpB,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,eAAe;gBACzC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC,CAAC;QACL,CAAC;gBAAS,CAAC;YAGT,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,IAAI,CAAC;oBACH,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC;oBACjC,KAAK,CAAC,8CAA8C,CAAC,CAAC;gBACxD,CAAC;gBAAC,OAAO,YAAiB,EAAE,CAAC;oBAC3B,OAAO,CAAC,KAAK,CAAC,qCAAqC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;gBAC7E,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAC5D,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAEO,KAAK,CAAC,WAAW,CACvB,GAAW,EACX,IAAY,EACZ,CAAM,EACN,QAAa,EACb,OAAY;QAEZ,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAEjC,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE;YAC3D,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC;YACpC,UAAU,EAAE,QAAQ,CAAC,MAAM,IAAI,GAAG;YAClC,WAAW,EAAE,QAAQ,CAAC,OAAO,EAAE,CAAC,cAAc,CAAC,IAAI,WAAW;YAC9D,YAAY,EAAE,CAAC;YACf,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC;YAC3C,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,GAAG,CAAC;YAC/C,SAAS,EAAE,EAAE;SACd,EAAE,QAAQ,CAAC,CAAC;QAEb,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAEtE,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACnD,MAAM,UAAU,GAAa,EAAE,CAAC;QAEhC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;gBAExC,IAAI,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;oBAExC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;wBAClD,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;oBACvE,CAAC;oBACD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAClC,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;gBAChC,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YAChC,CAAC;QACH,CAAC;QAED,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,OAAO,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QAE1C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QAC/B,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACpD,MAAM,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACjD,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACvB,CAAC;QAED,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;QAC9B,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,kBAAkB,EAAE,CAAC;QAEtE,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YAC3C,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,mBAAmB,CAAC,OAAgB,EAAE,KAAY;QAC9D,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QAE7B,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;QAC9C,MAAM,YAAY,GAAG,KAAK,CAAC,OAAO,IAAI,eAAe,CAAC;QACtD,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE,CAAC;QAElD,MAAM,gBAAgB,GAAG,aAAa,CAAC,MAAM,GAAG,CAAC;YAC/C,CAAC,CAAC,GAAG,YAAY,WAAW,OAAO,CAAC,UAAU,KAAK,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YAC9E,CAAC,CAAC,YAAY,CAAC;QAEjB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC;YACxB,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,SAAS;YACpB,OAAO,EAAE,gBAAgB;YACzB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC;IAEO,eAAe,CAAC,KAAY;QAClC,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;QAE5C,IAAI,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC;YAAE,OAAO,SAAS,CAAC;QAClD,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC;YAAE,OAAO,KAAK,CAAC;QAC7E,IAAI,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC;YAAE,OAAO,YAAY,CAAC;QACzF,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC;YAAE,OAAO,KAAK,CAAC;QAC7E,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC;QACtE,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,WAAW,CAAC;QAChD,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;YAAE,OAAO,YAAY,CAAC;QACnF,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,cAAc,CAAC;QAEzG,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,cAAc,CAAC,GAAW;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;QAGvC,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACvD,OAAO,KAAK,CAAC;QACf,CAAC;QAID,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC;YAC/C,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACjD,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3C,IAAI,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE;gBAC7C,IAAI,CAAC;oBACH,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACvC,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO,KAAK,CAAC;gBACf,CAAC;YACH,CAAC,CAAC,EAAE,CAAC;gBACH,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE;gBAC9C,IAAI,CAAC;oBACH,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACvC,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO,KAAK,CAAC;gBACf,CAAC;YACH,CAAC,CAAC,EAAE,CAAC;gBACH,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACnE,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,gBAAgB,CAAC,GAAW;QAClC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YAC/C,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QACV,OAAO,EAAE,CAAC;IACZ,CAAC;CACF"}
@@ -0,0 +1,33 @@
1
+ import type { CrawlConfig, CrawlMetadata, PageData, LinkData } from '../types/index.js';
2
+ import { CrawlDatabase } from './CrawlDatabase.js';
3
+ export declare class CrawlStorage {
4
+ private baseDir;
5
+ private db;
6
+ private dbPath;
7
+ private baseUrl?;
8
+ constructor(outputPath: string, baseUrl?: string);
9
+ initialize(): Promise<void>;
10
+ saveConfig(config: CrawlConfig): Promise<void>;
11
+ loadConfig(): Promise<CrawlConfig>;
12
+ saveMetadata(metadata: CrawlMetadata, config?: CrawlConfig): Promise<void>;
13
+ loadMetadata(): Promise<CrawlMetadata | null>;
14
+ updateMetadata(updates: Partial<CrawlMetadata>): Promise<void>;
15
+ savePageData(page: PageData): Promise<void>;
16
+ savePageDataBatch(pages: PageData[]): Promise<void>;
17
+ loadPageData(url: string): Promise<PageData | null>;
18
+ loadAllPageData(): Promise<PageData[]>;
19
+ saveLinkData(links: LinkData[]): Promise<void>;
20
+ loadLinkData(): Promise<LinkData[]>;
21
+ appendLinkData(newLinks: LinkData[]): Promise<void>;
22
+ generateCsvExport(): Promise<void>;
23
+ exists(): Promise<boolean>;
24
+ getStats(): Promise<{
25
+ totalPages: number;
26
+ totalLinks: number;
27
+ hasMetadata: boolean;
28
+ hasConfig: boolean;
29
+ }>;
30
+ close(): void;
31
+ getDatabase(): CrawlDatabase;
32
+ }
33
+ //# sourceMappingURL=CrawlStorage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"CrawlStorage.d.ts","sourceRoot":"","sources":["../../src/core/CrawlStorage.ts"],"names":[],"mappings":"AAqBA,OAAO,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAExF,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,EAAE,CAAiB;IAC3B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,OAAO,CAAC,CAAS;gBAEb,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM;IAM1C,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B,UAAU,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAM9C,UAAU,IAAI,OAAO,CAAC,WAAW,CAAC;IAOlC,YAAY,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAI1E,YAAY,IAAI,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC;IAK7C,cAAc,CAAC,OAAO,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAK9D,YAAY,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IAI3C,iBAAiB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAInD,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC;IAInD,eAAe,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;IAKtC,YAAY,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAI9C,YAAY,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;IAInC,cAAc,CAAC,QAAQ,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAKnD,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC;IAMlC,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC;IAS1B,QAAQ,IAAI,OAAO,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,OAAO,CAAC;QACrB,SAAS,EAAE,OAAO,CAAC;KACpB,CAAC;IAkBF,KAAK,IAAI,IAAI;IAKb,WAAW,IAAI,aAAa;CAG7B"}
@@ -0,0 +1,94 @@
1
+ import { promises as fs } from 'fs';
2
+ import { join } from 'path';
3
+ import { CrawlConfigSchema } from '../schema/index.js';
4
+ import { CrawlDatabase } from './CrawlDatabase.js';
5
+ export class CrawlStorage {
6
+ baseDir;
7
+ db;
8
+ dbPath;
9
+ baseUrl;
10
+ constructor(outputPath, baseUrl) {
11
+ this.baseDir = outputPath;
12
+ this.dbPath = join(outputPath, 'crawl-data.db');
13
+ this.baseUrl = baseUrl;
14
+ }
15
+ async initialize() {
16
+ await fs.mkdir(this.baseDir, { recursive: true });
17
+ this.db = new CrawlDatabase(this.dbPath);
18
+ }
19
+ async saveConfig(config) {
20
+ const validated = CrawlConfigSchema.parse(config);
21
+ const filePath = join(this.baseDir, 'config.json');
22
+ await fs.writeFile(filePath, JSON.stringify(validated, null, 2), 'utf-8');
23
+ }
24
+ async loadConfig() {
25
+ const filePath = join(this.baseDir, 'config.json');
26
+ const content = await fs.readFile(filePath, 'utf-8');
27
+ return CrawlConfigSchema.parse(JSON.parse(content));
28
+ }
29
+ async saveMetadata(metadata, config) {
30
+ this.db.saveCrawlMetadata(metadata, this.baseUrl, config);
31
+ }
32
+ async loadMetadata() {
33
+ const config = await this.loadConfig();
34
+ return this.db.getCrawlMetadata(config.crawlId);
35
+ }
36
+ async updateMetadata(updates) {
37
+ this.db.updateCrawlMetadata(updates);
38
+ }
39
+ async savePageData(page) {
40
+ this.db.savePage(page);
41
+ }
42
+ async savePageDataBatch(pages) {
43
+ this.db.savePageBatch(pages);
44
+ }
45
+ async loadPageData(url) {
46
+ return this.db.getPage(url);
47
+ }
48
+ async loadAllPageData() {
49
+ return this.db.getAllPages();
50
+ }
51
+ async saveLinkData(links) {
52
+ this.db.saveLinks(links);
53
+ }
54
+ async loadLinkData() {
55
+ return this.db.getAllLinks();
56
+ }
57
+ async appendLinkData(newLinks) {
58
+ this.db.saveLinks(newLinks);
59
+ }
60
+ async generateCsvExport() {
61
+ const csvPath = join(this.baseDir, 'crawl-export.csv');
62
+ this.db.exportToCsv(csvPath);
63
+ }
64
+ async exists() {
65
+ try {
66
+ await fs.access(this.baseDir);
67
+ return true;
68
+ }
69
+ catch {
70
+ return false;
71
+ }
72
+ }
73
+ async getStats() {
74
+ const [pageCount, linkCount, hasConfig] = await Promise.all([
75
+ Promise.resolve(this.db.getPageCount()),
76
+ Promise.resolve(this.db.getLinkCount()),
77
+ fs.access(join(this.baseDir, 'config.json')).then(() => true).catch(() => false)
78
+ ]);
79
+ const metadata = await this.loadMetadata();
80
+ return {
81
+ totalPages: pageCount,
82
+ totalLinks: linkCount,
83
+ hasMetadata: metadata !== null,
84
+ hasConfig
85
+ };
86
+ }
87
+ close() {
88
+ this.db.close();
89
+ }
90
+ getDatabase() {
91
+ return this.db;
92
+ }
93
+ }
94
+ //# sourceMappingURL=CrawlStorage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"CrawlStorage.js","sourceRoot":"","sources":["../../src/core/CrawlStorage.ts"],"names":[],"mappings":"AAmBA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAE5B,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,MAAM,OAAO,YAAY;IACf,OAAO,CAAS;IAChB,EAAE,CAAiB;IACnB,MAAM,CAAS;IACf,OAAO,CAAU;IAEzB,YAAY,UAAkB,EAAE,OAAgB;QAC9C,IAAI,CAAC,OAAO,GAAG,UAAU,CAAC;QAC1B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,EAAE,eAAe,CAAC,CAAC;QAChD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,UAAU;QACd,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAElD,IAAI,CAAC,EAAE,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3C,CAAC;IAGD,KAAK,CAAC,UAAU,CAAC,MAAmB;QAClC,MAAM,SAAS,GAAG,iBAAiB,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;QACnD,MAAM,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IAC5E,CAAC;IAED,KAAK,CAAC,UAAU;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACrD,OAAO,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;IACtD,CAAC;IAGD,KAAK,CAAC,YAAY,CAAC,QAAuB,EAAE,MAAoB;QAC9D,IAAI,CAAC,EAAE,CAAC,iBAAiB,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,YAAY;QAChB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QACvC,OAAO,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAClD,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,OAA+B;QAClD,IAAI,CAAC,EAAE,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;IACvC,CAAC;IAGD,KAAK,CAAC,YAAY,CAAC,IAAc;QAC/B,IAAI,CAAC,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,KAAiB;QACvC,IAAI,CAAC,EAAE,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,GAAW;QAC5B,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,CAAC,eAAe;QACnB,OAAO,IAAI,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC;IAC/B,CAAC;IAGD,KAAK,CAAC,YAAY,CAAC,KAAiB;QAClC,IAAI,CAAC,EAAE,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,YAAY;QAChB,OAAO,IAAI,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,QAAoB;QACvC,IAAI,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAC9B,CAAC;IAGD,KAAK,CAAC,iBAAiB;QACrB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC;QACvD,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;IAGD,KAAK,CAAC,MAAM;QACV,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC9B,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ;QAMZ,MAAM,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC1D,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC;YACvC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC;YACvC,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC;SACjF,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAE3C,OAAO;YACL,UAAU,EAAE,SAAS;YACrB,UAAU,EAAE,SAAS;YACrB,WAAW,EAAE,QAAQ,KAAK,IAAI;YAC9B,SAAS;SACV,CAAC;IACJ,CAAC;IAGD,KAAK;QACH,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;IAGD,WAAW;QACT,OAAO,IAAI,CAAC,EAAE,CAAC;IACjB,CAAC;CACF"}
@@ -0,0 +1,14 @@
1
+ import type { CheerioAPI } from 'cheerio';
2
+ import type { LinkData } from '../types/index.js';
3
+ export declare class LinkExtractor {
4
+ private baseDomain;
5
+ constructor(baseDomain: string);
6
+ extract($: CheerioAPI, sourceUrl: string, crawlId: string): LinkData[];
7
+ private shouldSkipLink;
8
+ private extractAnchorText;
9
+ private cleanUrl;
10
+ private normalizeDomain;
11
+ private isInternal;
12
+ private detectPlacement;
13
+ }
14
+ //# sourceMappingURL=LinkExtractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LinkExtractor.d.ts","sourceRoot":"","sources":["../../src/core/LinkExtractor.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAC1C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAGlD,qBAAa,aAAa;IACxB,OAAO,CAAC,UAAU,CAAS;gBAEf,UAAU,EAAE,MAAM;IAI9B,OAAO,CAAC,CAAC,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,QAAQ,EAAE;IAgCtE,OAAO,CAAC,cAAc;IAKtB,OAAO,CAAC,iBAAiB;IAKzB,OAAO,CAAC,QAAQ;IAchB,OAAO,CAAC,eAAe;IASvB,OAAO,CAAC,UAAU;IASlB,OAAO,CAAC,eAAe;CA0BxB"}
@@ -0,0 +1,91 @@
1
+ import { LinkDataSchema } from '../schema/index.js';
2
+ export class LinkExtractor {
3
+ baseDomain;
4
+ constructor(baseDomain) {
5
+ this.baseDomain = this.normalizeDomain(baseDomain);
6
+ }
7
+ extract($, sourceUrl, crawlId) {
8
+ const links = [];
9
+ $('a[href]').each((_, el) => {
10
+ const href = $(el).attr('href')?.trim();
11
+ if (!href || this.shouldSkipLink(href)) {
12
+ return;
13
+ }
14
+ try {
15
+ const absolute = new URL(href, sourceUrl);
16
+ const targetUrl = this.cleanUrl(absolute);
17
+ const linkData = {
18
+ crawlId,
19
+ sourceUrl,
20
+ targetUrl,
21
+ anchorText: this.extractAnchorText($(el)),
22
+ isInternal: this.isInternal(targetUrl),
23
+ targetDomain: absolute.hostname,
24
+ targetStatus: null,
25
+ placement: this.detectPlacement($(el)),
26
+ discoveredAt: new Date().toISOString()
27
+ };
28
+ links.push(LinkDataSchema.parse(linkData));
29
+ }
30
+ catch { }
31
+ });
32
+ return links;
33
+ }
34
+ shouldSkipLink(href) {
35
+ const skipPrefixes = ['#', 'mailto:', 'tel:', 'javascript:'];
36
+ return skipPrefixes.some(prefix => href.startsWith(prefix));
37
+ }
38
+ extractAnchorText($el) {
39
+ const text = $el.text().trim();
40
+ return text.slice(0, 100) || '(no text)';
41
+ }
42
+ cleanUrl(url) {
43
+ let clean = `${url.protocol}//${url.hostname}${url.pathname}`;
44
+ if (url.search) {
45
+ clean += url.search;
46
+ }
47
+ if (clean.endsWith('/') && clean.length > clean.indexOf('://') + 4) {
48
+ clean = clean.slice(0, -1);
49
+ }
50
+ return clean;
51
+ }
52
+ normalizeDomain(domain) {
53
+ try {
54
+ const url = domain.startsWith('http') ? domain : `https://${domain}`;
55
+ return new URL(url).hostname.replace(/^www\./, '');
56
+ }
57
+ catch {
58
+ return domain.replace(/^www\./, '');
59
+ }
60
+ }
61
+ isInternal(url) {
62
+ try {
63
+ const urlDomain = new URL(url).hostname.replace(/^www\./, '');
64
+ return urlDomain === this.baseDomain;
65
+ }
66
+ catch {
67
+ return false;
68
+ }
69
+ }
70
+ detectPlacement($el) {
71
+ let current = $el.parent();
72
+ while (current && current.length > 0) {
73
+ const tagName = current.prop('tagName')?.toLowerCase();
74
+ const classes = current.attr('class')?.toLowerCase() ?? '';
75
+ const id = current.attr('id')?.toLowerCase() ?? '';
76
+ if (tagName === 'footer' || classes.includes('footer') || id.includes('footer')) {
77
+ return 'footer';
78
+ }
79
+ if (tagName === 'nav' || tagName === 'header') {
80
+ return 'navigation';
81
+ }
82
+ const navKeywords = ['nav', 'menu', 'header'];
83
+ if (navKeywords.some(keyword => classes.includes(keyword) || id.includes(keyword))) {
84
+ return 'navigation';
85
+ }
86
+ current = current.parent();
87
+ }
88
+ return 'body';
89
+ }
90
+ }
91
+ //# sourceMappingURL=LinkExtractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LinkExtractor.js","sourceRoot":"","sources":["../../src/core/LinkExtractor.ts"],"names":[],"mappings":"AAaA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEpD,MAAM,OAAO,aAAa;IAChB,UAAU,CAAS;IAE3B,YAAY,UAAkB;QAC5B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IACrD,CAAC;IAED,OAAO,CAAC,CAAa,EAAE,SAAiB,EAAE,OAAe;QACvD,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvC,OAAO;YACT,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBAE1C,MAAM,QAAQ,GAAa;oBACzB,OAAO;oBACP,SAAS;oBACT,SAAS;oBACT,UAAU,EAAE,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACzC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;oBACtC,YAAY,EAAE,QAAQ,CAAC,QAAQ;oBAC/B,YAAY,EAAE,IAAI;oBAClB,SAAS,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACtC,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACvC,CAAC;gBAEF,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC7C,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC,CAAC,CAAC;QAEH,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC;QAC7D,OAAO,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;IAC9D,CAAC;IAEO,iBAAiB,CAAC,GAAQ;QAChC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,WAAW,CAAC;IAC3C,CAAC;IAEO,QAAQ,CAAC,GAAQ;QACvB,IAAI,KAAK,GAAG,GAAG,GAAG,CAAC,QAAQ,KAAK,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAE9D,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;YACf,KAAK,IAAI,GAAG,CAAC,MAAM,CAAC;QACtB,CAAC;QAED,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACnE,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7B,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,eAAe,CAAC,MAAc;QACpC,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,MAAM,EAAE,CAAC;YACrE,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACrD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,UAAU,CAAC,GAAW;QAC5B,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAC9D,OAAO,SAAS,KAAK,IAAI,CAAC,UAAU,CAAC;QACvC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAEO,eAAe,CAAC,GAAQ;QAC9B,IAAI,OAAO,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;QAE3B,OAAO,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,CAAC;YACvD,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAC3D,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAEnD,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAChF,OAAO,QAAQ,CAAC;YAClB,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAC9C,OAAO,YAAY,CAAC;YACtB,CAAC;YAED,MAAM,WAAW,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;YAC9C,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;gBACnF,OAAO,YAAY,CAAC;YACtB,CAAC;YAED,OAAO,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAC7B,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
@@ -0,0 +1,21 @@
1
+ export declare class UrlManager {
2
+ private baseDomain;
3
+ private discovered;
4
+ private visited;
5
+ private sourcePagesMap;
6
+ constructor(baseDomain: string);
7
+ normalizeUrl(url: string): string;
8
+ private normalizeDomain;
9
+ isInternal(url: string): boolean;
10
+ addDiscovered(url: string, depth: number, sourceUrl?: string): void;
11
+ markVisited(url: string): void;
12
+ isVisited(url: string): boolean;
13
+ isDiscovered(url: string): boolean;
14
+ getSourcePages(url: string): string[];
15
+ getDepth(url: string): number;
16
+ getTotalDiscovered(): number;
17
+ getTotalVisited(): number;
18
+ getMaxDepth(): number;
19
+ getUnvisitedUrls(): string[];
20
+ }
21
+ //# sourceMappingURL=UrlManager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"UrlManager.d.ts","sourceRoot":"","sources":["../../src/core/UrlManager.ts"],"names":[],"mappings":"AAWA,qBAAa,UAAU;IACrB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,UAAU,CAAkC;IACpD,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,cAAc,CAAuC;gBAEjD,UAAU,EAAE,MAAM;IAI9B,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAoBjC,OAAO,CAAC,eAAe;IASvB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAShC,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI;IAgBnE,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAK9B,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI/B,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAIlC,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAMrC,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAI7B,kBAAkB,IAAI,MAAM;IAI5B,eAAe,IAAI,MAAM;IAIzB,WAAW,IAAI,MAAM;IAIrB,gBAAgB,IAAI,MAAM,EAAE;CAG7B"}