@arabold/docs-mcp-server 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -3
- package/dist/{chunk-FAZDXJQN.js → chunk-A5FW7XVC.js} +346 -265
- package/dist/chunk-A5FW7XVC.js.map +1 -0
- package/dist/cli.js +38 -4
- package/dist/cli.js.map +1 -1
- package/dist/server.js +38 -13
- package/dist/server.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-FAZDXJQN.js.map +0 -1
|
@@ -100,6 +100,11 @@ var require_extend = __commonJS({
|
|
|
100
100
|
}
|
|
101
101
|
});
|
|
102
102
|
|
|
103
|
+
// src/config.ts
|
|
104
|
+
var DEFAULT_MAX_PAGES = 1e3;
|
|
105
|
+
var DEFAULT_MAX_DEPTH = 3;
|
|
106
|
+
var DEFAULT_MAX_CONCURRENCY = 3;
|
|
107
|
+
|
|
103
108
|
// src/utils/logger.ts
|
|
104
109
|
var currentLogLevel = 2 /* INFO */;
|
|
105
110
|
function setLogLevel(level) {
|
|
@@ -144,55 +149,8 @@ var logger = {
|
|
|
144
149
|
}
|
|
145
150
|
};
|
|
146
151
|
|
|
147
|
-
//
|
|
148
|
-
|
|
149
|
-
for (let i = 0; i < 256; ++i) {
|
|
150
|
-
byteToHex.push((i + 256).toString(16).slice(1));
|
|
151
|
-
}
|
|
152
|
-
function unsafeStringify(arr, offset = 0) {
|
|
153
|
-
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
// node_modules/uuid/dist/esm-node/rng.js
|
|
157
|
-
import crypto from "node:crypto";
|
|
158
|
-
var rnds8Pool = new Uint8Array(256);
|
|
159
|
-
var poolPtr = rnds8Pool.length;
|
|
160
|
-
function rng() {
|
|
161
|
-
if (poolPtr > rnds8Pool.length - 16) {
|
|
162
|
-
crypto.randomFillSync(rnds8Pool);
|
|
163
|
-
poolPtr = 0;
|
|
164
|
-
}
|
|
165
|
-
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// node_modules/uuid/dist/esm-node/native.js
|
|
169
|
-
import crypto2 from "node:crypto";
|
|
170
|
-
var native_default = {
|
|
171
|
-
randomUUID: crypto2.randomUUID
|
|
172
|
-
};
|
|
173
|
-
|
|
174
|
-
// node_modules/uuid/dist/esm-node/v4.js
|
|
175
|
-
function v4(options, buf, offset) {
|
|
176
|
-
if (native_default.randomUUID && !buf && !options) {
|
|
177
|
-
return native_default.randomUUID();
|
|
178
|
-
}
|
|
179
|
-
options = options || {};
|
|
180
|
-
const rnds = options.random || (options.rng || rng)();
|
|
181
|
-
rnds[6] = rnds[6] & 15 | 64;
|
|
182
|
-
rnds[8] = rnds[8] & 63 | 128;
|
|
183
|
-
if (buf) {
|
|
184
|
-
offset = offset || 0;
|
|
185
|
-
for (let i = 0; i < 16; ++i) {
|
|
186
|
-
buf[offset + i] = rnds[i];
|
|
187
|
-
}
|
|
188
|
-
return buf;
|
|
189
|
-
}
|
|
190
|
-
return unsafeStringify(rnds);
|
|
191
|
-
}
|
|
192
|
-
var v4_default = v4;
|
|
193
|
-
|
|
194
|
-
// src/utils/url.ts
|
|
195
|
-
import psl from "psl";
|
|
152
|
+
// src/scraper/fetcher/HttpFetcher.ts
|
|
153
|
+
import axios from "axios";
|
|
196
154
|
|
|
197
155
|
// src/utils/errors.ts
|
|
198
156
|
var ScraperError = class extends Error {
|
|
@@ -224,67 +182,7 @@ var RedirectError = class extends ScraperError {
|
|
|
224
182
|
}
|
|
225
183
|
};
|
|
226
184
|
|
|
227
|
-
// src/utils/url.ts
|
|
228
|
-
var defaultNormalizerOptions = {
|
|
229
|
-
ignoreCase: true,
|
|
230
|
-
removeHash: true,
|
|
231
|
-
removeTrailingSlash: true,
|
|
232
|
-
removeQuery: false,
|
|
233
|
-
removeIndex: true
|
|
234
|
-
};
|
|
235
|
-
function normalizeUrl(url, options = defaultNormalizerOptions) {
|
|
236
|
-
try {
|
|
237
|
-
const parsedUrl = new URL(url);
|
|
238
|
-
const finalOptions = { ...defaultNormalizerOptions, ...options };
|
|
239
|
-
const normalized = new URL(parsedUrl.origin + parsedUrl.pathname);
|
|
240
|
-
if (finalOptions.removeIndex) {
|
|
241
|
-
normalized.pathname = normalized.pathname.replace(
|
|
242
|
-
/\/index\.(html|htm|asp|php|jsp)$/i,
|
|
243
|
-
"/"
|
|
244
|
-
);
|
|
245
|
-
}
|
|
246
|
-
if (finalOptions.removeTrailingSlash && normalized.pathname.length > 1) {
|
|
247
|
-
normalized.pathname = normalized.pathname.replace(/\/+$/, "");
|
|
248
|
-
}
|
|
249
|
-
const preservedHash = !finalOptions.removeHash ? parsedUrl.hash : "";
|
|
250
|
-
const preservedSearch = !finalOptions.removeQuery ? parsedUrl.search : "";
|
|
251
|
-
let result = normalized.origin + normalized.pathname;
|
|
252
|
-
if (preservedSearch) {
|
|
253
|
-
result += preservedSearch;
|
|
254
|
-
}
|
|
255
|
-
if (preservedHash) {
|
|
256
|
-
result += preservedHash;
|
|
257
|
-
}
|
|
258
|
-
if (finalOptions.ignoreCase) {
|
|
259
|
-
result = result.toLowerCase();
|
|
260
|
-
}
|
|
261
|
-
return result;
|
|
262
|
-
} catch {
|
|
263
|
-
return url;
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
function validateUrl(url) {
|
|
267
|
-
try {
|
|
268
|
-
new URL(url);
|
|
269
|
-
} catch (error) {
|
|
270
|
-
throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
function hasSameHostname(urlA, urlB) {
|
|
274
|
-
return urlA.hostname.toLowerCase() === urlB.hostname.toLowerCase();
|
|
275
|
-
}
|
|
276
|
-
function hasSameDomain(urlA, urlB) {
|
|
277
|
-
const domainA = psl.get(urlA.hostname.toLowerCase());
|
|
278
|
-
const domainB = psl.get(urlB.hostname.toLowerCase());
|
|
279
|
-
return domainA !== null && domainA === domainB;
|
|
280
|
-
}
|
|
281
|
-
function isSubpath(baseUrl, targetUrl) {
|
|
282
|
-
const basePath = baseUrl.pathname.endsWith("/") ? baseUrl.pathname : `${baseUrl.pathname}/`;
|
|
283
|
-
return targetUrl.pathname.startsWith(basePath);
|
|
284
|
-
}
|
|
285
|
-
|
|
286
185
|
// src/scraper/fetcher/HttpFetcher.ts
|
|
287
|
-
import axios from "axios";
|
|
288
186
|
var HttpFetcher = class {
|
|
289
187
|
MAX_RETRIES = 6;
|
|
290
188
|
BASE_DELAY = 1e3;
|
|
@@ -394,29 +292,6 @@ var FileFetcher = class {
|
|
|
394
292
|
}
|
|
395
293
|
};
|
|
396
294
|
|
|
397
|
-
// src/scraper/strategies/BaseScraperStrategy.ts
|
|
398
|
-
import { URL as URL2 } from "node:url";
|
|
399
|
-
|
|
400
|
-
// src/pipeline/errors.ts
|
|
401
|
-
var PipelineError = class extends Error {
|
|
402
|
-
constructor(message, cause) {
|
|
403
|
-
super(message);
|
|
404
|
-
this.cause = cause;
|
|
405
|
-
this.name = this.constructor.name;
|
|
406
|
-
if (cause?.stack) {
|
|
407
|
-
this.stack = `${this.stack}
|
|
408
|
-
Caused by: ${cause.stack}`;
|
|
409
|
-
}
|
|
410
|
-
}
|
|
411
|
-
};
|
|
412
|
-
var PipelineStateError = class extends PipelineError {
|
|
413
|
-
};
|
|
414
|
-
var CancellationError = class extends PipelineError {
|
|
415
|
-
constructor(message = "Operation cancelled") {
|
|
416
|
-
super(message);
|
|
417
|
-
}
|
|
418
|
-
};
|
|
419
|
-
|
|
420
295
|
// src/scraper/processor/HtmlProcessor.ts
|
|
421
296
|
import createDOMPurify from "dompurify";
|
|
422
297
|
import { JSDOM } from "jsdom";
|
|
@@ -439,7 +314,7 @@ var HtmlProcessor = class {
|
|
|
439
314
|
"input",
|
|
440
315
|
"textarea",
|
|
441
316
|
"select",
|
|
442
|
-
"form",
|
|
317
|
+
// "form", // Known issue: Some pages use alerts for important content
|
|
443
318
|
".ads",
|
|
444
319
|
".advertisement",
|
|
445
320
|
".banner",
|
|
@@ -472,18 +347,16 @@ var HtmlProcessor = class {
|
|
|
472
347
|
".signup-form",
|
|
473
348
|
".tooltip",
|
|
474
349
|
".dropdown-menu",
|
|
475
|
-
".alert",
|
|
350
|
+
// ".alert", // Known issue: Some pages use alerts for important content
|
|
476
351
|
".breadcrumb",
|
|
477
352
|
".pagination",
|
|
478
|
-
'[role="alert"]',
|
|
353
|
+
// '[role="alert"]', // Known issue: Some pages use alerts for important content
|
|
479
354
|
'[role="banner"]',
|
|
480
355
|
'[role="dialog"]',
|
|
481
356
|
'[role="alertdialog"]',
|
|
482
357
|
'[role="region"][aria-label*="skip" i]',
|
|
483
358
|
'[aria-modal="true"]',
|
|
484
|
-
".noprint"
|
|
485
|
-
"figure",
|
|
486
|
-
"sup"
|
|
359
|
+
".noprint"
|
|
487
360
|
];
|
|
488
361
|
constructor(options) {
|
|
489
362
|
this.turndownService = new TurndownService({
|
|
@@ -514,9 +387,17 @@ var HtmlProcessor = class {
|
|
|
514
387
|
}
|
|
515
388
|
}
|
|
516
389
|
}
|
|
390
|
+
const text3 = (() => {
|
|
391
|
+
const clone = element.cloneNode(true);
|
|
392
|
+
const brElements = Array.from(clone.querySelectorAll("br"));
|
|
393
|
+
for (const br of brElements) {
|
|
394
|
+
br.replaceWith("\n");
|
|
395
|
+
}
|
|
396
|
+
return clone.textContent;
|
|
397
|
+
})();
|
|
517
398
|
return `
|
|
518
399
|
\`\`\`${language}
|
|
519
|
-
${
|
|
400
|
+
${text3}
|
|
520
401
|
\`\`\`
|
|
521
402
|
`;
|
|
522
403
|
}
|
|
@@ -545,9 +426,8 @@ ${cleanedContent}
|
|
|
545
426
|
);
|
|
546
427
|
}
|
|
547
428
|
const htmlContent = typeof content3.content === "string" ? content3.content : content3.content.toString(content3.encoding || "utf-8");
|
|
548
|
-
const
|
|
549
|
-
const title =
|
|
550
|
-
const window = new JSDOM(content3.content, { url: content3.source }).window;
|
|
429
|
+
const window = new JSDOM(htmlContent, { url: content3.source }).window;
|
|
430
|
+
const title = window.document.title || "Untitled";
|
|
551
431
|
const purify = createDOMPurify(window);
|
|
552
432
|
const purifiedContent = purify.sanitize(htmlContent, {
|
|
553
433
|
WHOLE_DOCUMENT: true,
|
|
@@ -622,9 +502,139 @@ var MarkdownProcessor = class {
|
|
|
622
502
|
}
|
|
623
503
|
};
|
|
624
504
|
|
|
505
|
+
// node_modules/uuid/dist/esm-node/stringify.js
|
|
506
|
+
var byteToHex = [];
|
|
507
|
+
for (let i = 0; i < 256; ++i) {
|
|
508
|
+
byteToHex.push((i + 256).toString(16).slice(1));
|
|
509
|
+
}
|
|
510
|
+
function unsafeStringify(arr, offset = 0) {
|
|
511
|
+
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// node_modules/uuid/dist/esm-node/rng.js
|
|
515
|
+
import crypto from "node:crypto";
|
|
516
|
+
var rnds8Pool = new Uint8Array(256);
|
|
517
|
+
var poolPtr = rnds8Pool.length;
|
|
518
|
+
function rng() {
|
|
519
|
+
if (poolPtr > rnds8Pool.length - 16) {
|
|
520
|
+
crypto.randomFillSync(rnds8Pool);
|
|
521
|
+
poolPtr = 0;
|
|
522
|
+
}
|
|
523
|
+
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// node_modules/uuid/dist/esm-node/native.js
|
|
527
|
+
import crypto2 from "node:crypto";
|
|
528
|
+
var native_default = {
|
|
529
|
+
randomUUID: crypto2.randomUUID
|
|
530
|
+
};
|
|
531
|
+
|
|
532
|
+
// node_modules/uuid/dist/esm-node/v4.js
|
|
533
|
+
function v4(options, buf, offset) {
|
|
534
|
+
if (native_default.randomUUID && !buf && !options) {
|
|
535
|
+
return native_default.randomUUID();
|
|
536
|
+
}
|
|
537
|
+
options = options || {};
|
|
538
|
+
const rnds = options.random || (options.rng || rng)();
|
|
539
|
+
rnds[6] = rnds[6] & 15 | 64;
|
|
540
|
+
rnds[8] = rnds[8] & 63 | 128;
|
|
541
|
+
if (buf) {
|
|
542
|
+
offset = offset || 0;
|
|
543
|
+
for (let i = 0; i < 16; ++i) {
|
|
544
|
+
buf[offset + i] = rnds[i];
|
|
545
|
+
}
|
|
546
|
+
return buf;
|
|
547
|
+
}
|
|
548
|
+
return unsafeStringify(rnds);
|
|
549
|
+
}
|
|
550
|
+
var v4_default = v4;
|
|
551
|
+
|
|
552
|
+
// src/utils/url.ts
|
|
553
|
+
import psl from "psl";
|
|
554
|
+
var defaultNormalizerOptions = {
|
|
555
|
+
ignoreCase: true,
|
|
556
|
+
removeHash: true,
|
|
557
|
+
removeTrailingSlash: true,
|
|
558
|
+
removeQuery: false,
|
|
559
|
+
removeIndex: true
|
|
560
|
+
};
|
|
561
|
+
function normalizeUrl(url, options = defaultNormalizerOptions) {
|
|
562
|
+
try {
|
|
563
|
+
const parsedUrl = new URL(url);
|
|
564
|
+
const finalOptions = { ...defaultNormalizerOptions, ...options };
|
|
565
|
+
const normalized = new URL(parsedUrl.origin + parsedUrl.pathname);
|
|
566
|
+
if (finalOptions.removeIndex) {
|
|
567
|
+
normalized.pathname = normalized.pathname.replace(
|
|
568
|
+
/\/index\.(html|htm|asp|php|jsp)$/i,
|
|
569
|
+
"/"
|
|
570
|
+
);
|
|
571
|
+
}
|
|
572
|
+
if (finalOptions.removeTrailingSlash && normalized.pathname.length > 1) {
|
|
573
|
+
normalized.pathname = normalized.pathname.replace(/\/+$/, "");
|
|
574
|
+
}
|
|
575
|
+
const preservedHash = !finalOptions.removeHash ? parsedUrl.hash : "";
|
|
576
|
+
const preservedSearch = !finalOptions.removeQuery ? parsedUrl.search : "";
|
|
577
|
+
let result = normalized.origin + normalized.pathname;
|
|
578
|
+
if (preservedSearch) {
|
|
579
|
+
result += preservedSearch;
|
|
580
|
+
}
|
|
581
|
+
if (preservedHash) {
|
|
582
|
+
result += preservedHash;
|
|
583
|
+
}
|
|
584
|
+
if (finalOptions.ignoreCase) {
|
|
585
|
+
result = result.toLowerCase();
|
|
586
|
+
}
|
|
587
|
+
return result;
|
|
588
|
+
} catch {
|
|
589
|
+
return url;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
function validateUrl(url) {
|
|
593
|
+
try {
|
|
594
|
+
new URL(url);
|
|
595
|
+
} catch (error) {
|
|
596
|
+
throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
function hasSameHostname(urlA, urlB) {
|
|
600
|
+
return urlA.hostname.toLowerCase() === urlB.hostname.toLowerCase();
|
|
601
|
+
}
|
|
602
|
+
function hasSameDomain(urlA, urlB) {
|
|
603
|
+
const domainA = psl.get(urlA.hostname.toLowerCase());
|
|
604
|
+
const domainB = psl.get(urlB.hostname.toLowerCase());
|
|
605
|
+
return domainA !== null && domainA === domainB;
|
|
606
|
+
}
|
|
607
|
+
function isSubpath(baseUrl, targetUrl) {
|
|
608
|
+
const basePath = baseUrl.pathname.endsWith("/") ? baseUrl.pathname : `${baseUrl.pathname}/`;
|
|
609
|
+
return targetUrl.pathname.startsWith(basePath);
|
|
610
|
+
}
|
|
611
|
+
|
|
625
612
|
// src/scraper/strategies/BaseScraperStrategy.ts
|
|
626
|
-
|
|
627
|
-
|
|
613
|
+
import { URL as URL2 } from "node:url";
|
|
614
|
+
|
|
615
|
+
// src/pipeline/errors.ts
|
|
616
|
+
var PipelineError = class extends Error {
|
|
617
|
+
constructor(message, cause) {
|
|
618
|
+
super(message);
|
|
619
|
+
this.cause = cause;
|
|
620
|
+
this.name = this.constructor.name;
|
|
621
|
+
if (cause?.stack) {
|
|
622
|
+
this.stack = `${this.stack}
|
|
623
|
+
Caused by: ${cause.stack}`;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
};
|
|
627
|
+
var PipelineStateError = class extends PipelineError {
|
|
628
|
+
};
|
|
629
|
+
var CancellationError = class extends PipelineError {
|
|
630
|
+
constructor(message = "Operation cancelled") {
|
|
631
|
+
super(message);
|
|
632
|
+
}
|
|
633
|
+
};
|
|
634
|
+
|
|
635
|
+
// src/scraper/strategies/BaseScraperStrategy.ts
|
|
636
|
+
var DEFAULT_MAX_PAGES2 = 100;
|
|
637
|
+
var DEFAULT_MAX_DEPTH2 = 3;
|
|
628
638
|
var DEFAULT_CONCURRENCY = 3;
|
|
629
639
|
var BaseScraperStrategy = class {
|
|
630
640
|
visited = /* @__PURE__ */ new Set();
|
|
@@ -645,7 +655,7 @@ var BaseScraperStrategy = class {
|
|
|
645
655
|
if (signal?.aborted) {
|
|
646
656
|
throw new CancellationError("Scraping cancelled during batch processing");
|
|
647
657
|
}
|
|
648
|
-
const maxDepth = options.maxDepth ??
|
|
658
|
+
const maxDepth = options.maxDepth ?? DEFAULT_MAX_DEPTH2;
|
|
649
659
|
if (item.depth > maxDepth) {
|
|
650
660
|
return [];
|
|
651
661
|
}
|
|
@@ -653,7 +663,7 @@ var BaseScraperStrategy = class {
|
|
|
653
663
|
const result = await this.processItem(item, options, void 0, signal);
|
|
654
664
|
if (result.document) {
|
|
655
665
|
this.pageCount++;
|
|
656
|
-
const maxPages = options.maxPages ??
|
|
666
|
+
const maxPages = options.maxPages ?? DEFAULT_MAX_PAGES2;
|
|
657
667
|
logger.info(
|
|
658
668
|
`\u{1F310} Scraping page ${this.pageCount}/${maxPages} (depth ${item.depth}/${maxDepth}): ${item.url}`
|
|
659
669
|
);
|
|
@@ -705,7 +715,7 @@ var BaseScraperStrategy = class {
|
|
|
705
715
|
const baseUrl = new URL2(options.url);
|
|
706
716
|
const queue = [{ url: options.url, depth: 0 }];
|
|
707
717
|
this.visited.add(normalizeUrl(options.url, this.options.urlNormalizerOptions));
|
|
708
|
-
const maxPages = options.maxPages ??
|
|
718
|
+
const maxPages = options.maxPages ?? DEFAULT_MAX_PAGES2;
|
|
709
719
|
const maxConcurrency = options.maxConcurrency ?? DEFAULT_CONCURRENCY;
|
|
710
720
|
while (queue.length > 0 && this.pageCount < maxPages) {
|
|
711
721
|
if (signal?.aborted) {
|
|
@@ -1315,57 +1325,54 @@ var LibraryNotFoundError = class extends ToolError {
|
|
|
1315
1325
|
}
|
|
1316
1326
|
};
|
|
1317
1327
|
|
|
1318
|
-
// src/tools/
|
|
1319
|
-
var
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
this.
|
|
1328
|
+
// src/tools/FetchUrlTool.ts
|
|
1329
|
+
var FetchUrlTool = class {
|
|
1330
|
+
constructor(httpFetcher, fileFetcher, processor) {
|
|
1331
|
+
this.processor = processor;
|
|
1332
|
+
this.fetchers = [httpFetcher, fileFetcher];
|
|
1323
1333
|
}
|
|
1334
|
+
/**
|
|
1335
|
+
* Collection of fetchers that will be tried in order for a given URL.
|
|
1336
|
+
*/
|
|
1337
|
+
fetchers;
|
|
1338
|
+
/**
|
|
1339
|
+
* Fetches content from a URL and converts it to Markdown.
|
|
1340
|
+
* Supports both HTTP/HTTPS URLs and local file URLs (file://).
|
|
1341
|
+
* @returns The processed Markdown content
|
|
1342
|
+
* @throws {ToolError} If fetching or processing fails
|
|
1343
|
+
*/
|
|
1324
1344
|
async execute(options) {
|
|
1325
|
-
const {
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
)
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
if (!exactMatch) {
|
|
1333
|
-
const versionResult = await this.docService.findBestVersion(library, version);
|
|
1334
|
-
versionToSearch = versionResult.bestMatch;
|
|
1335
|
-
}
|
|
1336
|
-
const results = await this.docService.searchStore(
|
|
1337
|
-
library,
|
|
1338
|
-
versionToSearch,
|
|
1339
|
-
query,
|
|
1340
|
-
limit
|
|
1345
|
+
const { url } = options;
|
|
1346
|
+
const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
|
|
1347
|
+
const fetcherIndex = canFetchResults.findIndex((result) => result === true);
|
|
1348
|
+
if (fetcherIndex === -1) {
|
|
1349
|
+
throw new ToolError(
|
|
1350
|
+
`Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
|
|
1351
|
+
this.constructor.name
|
|
1341
1352
|
);
|
|
1342
|
-
|
|
1343
|
-
|
|
1353
|
+
}
|
|
1354
|
+
const fetcher = this.fetchers[fetcherIndex];
|
|
1355
|
+
try {
|
|
1356
|
+
logger.info(`\u{1F4E1} Fetching ${url}...`);
|
|
1357
|
+
const rawContent = await fetcher.fetch(url, {
|
|
1358
|
+
followRedirects: options.followRedirects ?? true,
|
|
1359
|
+
maxRetries: 3
|
|
1360
|
+
});
|
|
1361
|
+
logger.info("\u{1F504} Converting to Markdown...");
|
|
1362
|
+
const processed = await this.processor.process(rawContent);
|
|
1363
|
+
logger.info(`\u2705 Successfully converted ${url} to Markdown`);
|
|
1364
|
+
return processed.content;
|
|
1344
1365
|
} catch (error) {
|
|
1345
|
-
if (error instanceof
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
message: error.message,
|
|
1351
|
-
suggestions: error.suggestions
|
|
1352
|
-
}
|
|
1353
|
-
};
|
|
1354
|
-
}
|
|
1355
|
-
if (error instanceof VersionNotFoundError) {
|
|
1356
|
-
logger.info(`\u2139\uFE0F Version not found: ${error.message}`);
|
|
1357
|
-
return {
|
|
1358
|
-
results: [],
|
|
1359
|
-
error: {
|
|
1360
|
-
message: error.message,
|
|
1361
|
-
availableVersions: error.availableVersions
|
|
1362
|
-
}
|
|
1363
|
-
};
|
|
1366
|
+
if (error instanceof ScraperError) {
|
|
1367
|
+
throw new ToolError(
|
|
1368
|
+
`Failed to fetch or process URL: ${error.message}`,
|
|
1369
|
+
this.constructor.name
|
|
1370
|
+
);
|
|
1364
1371
|
}
|
|
1365
|
-
|
|
1366
|
-
|
|
1372
|
+
throw new ToolError(
|
|
1373
|
+
`Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
|
|
1374
|
+
this.constructor.name
|
|
1367
1375
|
);
|
|
1368
|
-
throw error;
|
|
1369
1376
|
}
|
|
1370
1377
|
}
|
|
1371
1378
|
};
|
|
@@ -1486,9 +1493,9 @@ var ScrapeTool = class {
|
|
|
1486
1493
|
version: internalVersion,
|
|
1487
1494
|
scope: scraperOptions?.scope ?? "subpages",
|
|
1488
1495
|
followRedirects: scraperOptions?.followRedirects ?? true,
|
|
1489
|
-
maxPages: scraperOptions?.maxPages ??
|
|
1490
|
-
maxDepth: scraperOptions?.maxDepth ??
|
|
1491
|
-
|
|
1496
|
+
maxPages: scraperOptions?.maxPages ?? DEFAULT_MAX_PAGES,
|
|
1497
|
+
maxDepth: scraperOptions?.maxDepth ?? DEFAULT_MAX_DEPTH,
|
|
1498
|
+
maxConcurrency: scraperOptions?.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
|
|
1492
1499
|
ignoreErrors: scraperOptions?.ignoreErrors ?? true
|
|
1493
1500
|
});
|
|
1494
1501
|
logger.info(`\u{1F680} Job ${jobId} enqueued for scraping.`);
|
|
@@ -1531,72 +1538,69 @@ var ScrapeTool = class {
|
|
|
1531
1538
|
}
|
|
1532
1539
|
};
|
|
1533
1540
|
|
|
1534
|
-
// src/tools/
|
|
1535
|
-
var
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
* Creates an instance of ListJobsTool.
|
|
1540
|
-
* @param manager The PipelineManager instance.
|
|
1541
|
-
*/
|
|
1542
|
-
constructor(manager) {
|
|
1543
|
-
this.manager = manager;
|
|
1541
|
+
// src/tools/SearchTool.ts
|
|
1542
|
+
var SearchTool = class {
|
|
1543
|
+
docService;
|
|
1544
|
+
constructor(docService) {
|
|
1545
|
+
this.docService = docService;
|
|
1544
1546
|
}
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
startedAt: job.startedAt?.toISOString() ?? null,
|
|
1561
|
-
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
1562
|
-
error: job.error?.message ?? null
|
|
1563
|
-
})
|
|
1547
|
+
async execute(options) {
|
|
1548
|
+
const { library, version, query, limit = 5, exactMatch = false } = options;
|
|
1549
|
+
if (exactMatch && (!version || version === "latest")) {
|
|
1550
|
+
await this.docService.validateLibraryExists(library);
|
|
1551
|
+
const versions = await this.docService.listVersions(library);
|
|
1552
|
+
throw new VersionNotFoundError(
|
|
1553
|
+
library,
|
|
1554
|
+
"latest",
|
|
1555
|
+
versions
|
|
1556
|
+
// versions already has the correct { version: string, indexed: boolean } format
|
|
1557
|
+
);
|
|
1558
|
+
}
|
|
1559
|
+
const resolvedVersion = version || "latest";
|
|
1560
|
+
logger.info(
|
|
1561
|
+
`\u{1F50D} Searching ${library}@${resolvedVersion} for: ${query}${exactMatch ? " (exact match)" : ""}`
|
|
1564
1562
|
);
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1563
|
+
try {
|
|
1564
|
+
await this.docService.validateLibraryExists(library);
|
|
1565
|
+
let versionToSearch = resolvedVersion;
|
|
1566
|
+
if (!exactMatch) {
|
|
1567
|
+
const versionResult = await this.docService.findBestVersion(library, version);
|
|
1568
|
+
versionToSearch = versionResult.bestMatch;
|
|
1569
|
+
}
|
|
1570
|
+
const results = await this.docService.searchStore(
|
|
1571
|
+
library,
|
|
1572
|
+
versionToSearch,
|
|
1573
|
+
query,
|
|
1574
|
+
limit
|
|
1575
|
+
);
|
|
1576
|
+
logger.info(`\u2705 Found ${results.length} matching results`);
|
|
1577
|
+
return { results };
|
|
1578
|
+
} catch (error) {
|
|
1579
|
+
if (error instanceof LibraryNotFoundError) {
|
|
1580
|
+
logger.info(`\u2139\uFE0F Library not found: ${error.message}`);
|
|
1581
|
+
return {
|
|
1582
|
+
results: [],
|
|
1583
|
+
error: {
|
|
1584
|
+
message: error.message,
|
|
1585
|
+
suggestions: error.suggestions
|
|
1586
|
+
}
|
|
1587
|
+
};
|
|
1588
|
+
}
|
|
1589
|
+
if (error instanceof VersionNotFoundError) {
|
|
1590
|
+
logger.info(`\u2139\uFE0F Version not found: ${error.message}`);
|
|
1591
|
+
return {
|
|
1592
|
+
results: [],
|
|
1593
|
+
error: {
|
|
1594
|
+
message: error.message,
|
|
1595
|
+
availableVersions: error.availableVersions
|
|
1596
|
+
}
|
|
1597
|
+
};
|
|
1598
|
+
}
|
|
1599
|
+
logger.error(
|
|
1600
|
+
`\u274C Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1601
|
+
);
|
|
1602
|
+
throw error;
|
|
1588
1603
|
}
|
|
1589
|
-
const jobInfo = {
|
|
1590
|
-
id: job.id,
|
|
1591
|
-
library: job.library,
|
|
1592
|
-
version: job.version,
|
|
1593
|
-
status: job.status,
|
|
1594
|
-
createdAt: job.createdAt.toISOString(),
|
|
1595
|
-
startedAt: job.startedAt?.toISOString() ?? null,
|
|
1596
|
-
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
1597
|
-
error: job.error?.message ?? null
|
|
1598
|
-
};
|
|
1599
|
-
return { job: jobInfo };
|
|
1600
1604
|
}
|
|
1601
1605
|
};
|
|
1602
1606
|
|
|
@@ -1657,6 +1661,75 @@ var CancelJobTool = class {
|
|
|
1657
1661
|
}
|
|
1658
1662
|
};
|
|
1659
1663
|
|
|
1664
|
+
// src/tools/GetJobInfoTool.ts
|
|
1665
|
+
var GetJobInfoTool = class {
|
|
1666
|
+
manager;
|
|
1667
|
+
/**
|
|
1668
|
+
* Creates an instance of GetJobInfoTool.
|
|
1669
|
+
* @param manager The PipelineManager instance.
|
|
1670
|
+
*/
|
|
1671
|
+
constructor(manager) {
|
|
1672
|
+
this.manager = manager;
|
|
1673
|
+
}
|
|
1674
|
+
/**
|
|
1675
|
+
* Executes the tool to retrieve simplified info for a specific job.
|
|
1676
|
+
* @param input - The input parameters, containing the jobId.
|
|
1677
|
+
* @returns A promise that resolves with the simplified job info or null if not found.
|
|
1678
|
+
*/
|
|
1679
|
+
async execute(input) {
|
|
1680
|
+
const job = await this.manager.getJob(input.jobId);
|
|
1681
|
+
if (!job) {
|
|
1682
|
+
return { job: null };
|
|
1683
|
+
}
|
|
1684
|
+
const jobInfo = {
|
|
1685
|
+
id: job.id,
|
|
1686
|
+
library: job.library,
|
|
1687
|
+
version: job.version,
|
|
1688
|
+
status: job.status,
|
|
1689
|
+
createdAt: job.createdAt.toISOString(),
|
|
1690
|
+
startedAt: job.startedAt?.toISOString() ?? null,
|
|
1691
|
+
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
1692
|
+
error: job.error?.message ?? null
|
|
1693
|
+
};
|
|
1694
|
+
return { job: jobInfo };
|
|
1695
|
+
}
|
|
1696
|
+
};
|
|
1697
|
+
|
|
1698
|
+
// src/tools/ListJobsTool.ts
|
|
1699
|
+
var ListJobsTool = class {
|
|
1700
|
+
manager;
|
|
1701
|
+
// Change property name and type
|
|
1702
|
+
/**
|
|
1703
|
+
* Creates an instance of ListJobsTool.
|
|
1704
|
+
* @param manager The PipelineManager instance.
|
|
1705
|
+
*/
|
|
1706
|
+
constructor(manager) {
|
|
1707
|
+
this.manager = manager;
|
|
1708
|
+
}
|
|
1709
|
+
/**
|
|
1710
|
+
* Executes the tool to retrieve a list of pipeline jobs.
|
|
1711
|
+
* @param input - The input parameters, optionally including a status filter.
|
|
1712
|
+
* @returns A promise that resolves with the list of simplified job objects.
|
|
1713
|
+
* @throws {PipelineStateError} If the pipeline manager is somehow unavailable.
|
|
1714
|
+
*/
|
|
1715
|
+
async execute(input) {
|
|
1716
|
+
const jobs = await this.manager.getJobs(input.status);
|
|
1717
|
+
const simplifiedJobs = jobs.map(
|
|
1718
|
+
(job) => ({
|
|
1719
|
+
id: job.id,
|
|
1720
|
+
library: job.library,
|
|
1721
|
+
version: job.version,
|
|
1722
|
+
status: job.status,
|
|
1723
|
+
createdAt: job.createdAt.toISOString(),
|
|
1724
|
+
startedAt: job.startedAt?.toISOString() ?? null,
|
|
1725
|
+
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
1726
|
+
error: job.error?.message ?? null
|
|
1727
|
+
})
|
|
1728
|
+
);
|
|
1729
|
+
return { jobs: simplifiedJobs };
|
|
1730
|
+
}
|
|
1731
|
+
};
|
|
1732
|
+
|
|
1660
1733
|
// src/tools/RemoveTool.ts
|
|
1661
1734
|
var RemoveToolInputSchema = {
|
|
1662
1735
|
type: "object",
|
|
@@ -11269,7 +11342,8 @@ var DocumentManagementService = class {
|
|
|
11269
11342
|
dbPath = path3.join(dbDir, "documents.db");
|
|
11270
11343
|
logger.debug(`\u{1F4BE} Using database directory from DOCS_MCP_STORE_PATH: ${dbDir}`);
|
|
11271
11344
|
} else {
|
|
11272
|
-
const
|
|
11345
|
+
const projectRoot = path3.resolve(import.meta.dirname, "..");
|
|
11346
|
+
const oldDbDir = path3.join(projectRoot, ".store");
|
|
11273
11347
|
const oldDbPath = path3.join(oldDbDir, "documents.db");
|
|
11274
11348
|
const oldDbExists = existsSync(oldDbPath);
|
|
11275
11349
|
if (oldDbExists) {
|
|
@@ -11496,19 +11570,26 @@ var DocumentManagementService = class {
|
|
|
11496
11570
|
};
|
|
11497
11571
|
|
|
11498
11572
|
export {
|
|
11573
|
+
DEFAULT_MAX_PAGES,
|
|
11574
|
+
DEFAULT_MAX_DEPTH,
|
|
11575
|
+
DEFAULT_MAX_CONCURRENCY,
|
|
11499
11576
|
setLogLevel,
|
|
11500
11577
|
logger,
|
|
11578
|
+
HttpFetcher,
|
|
11579
|
+
FileFetcher,
|
|
11580
|
+
HtmlProcessor,
|
|
11501
11581
|
PipelineJobStatus,
|
|
11502
11582
|
PipelineManager,
|
|
11583
|
+
CancelJobTool,
|
|
11503
11584
|
VersionNotFoundError,
|
|
11504
|
-
|
|
11585
|
+
FetchUrlTool,
|
|
11505
11586
|
FindVersionTool,
|
|
11506
|
-
ListLibrariesTool,
|
|
11507
|
-
ScrapeTool,
|
|
11508
|
-
ListJobsTool,
|
|
11509
11587
|
GetJobInfoTool,
|
|
11510
|
-
|
|
11588
|
+
ListJobsTool,
|
|
11589
|
+
ListLibrariesTool,
|
|
11511
11590
|
RemoveTool,
|
|
11591
|
+
ScrapeTool,
|
|
11592
|
+
SearchTool,
|
|
11512
11593
|
DocumentManagementService
|
|
11513
11594
|
};
|
|
11514
|
-
//# sourceMappingURL=chunk-
|
|
11595
|
+
//# sourceMappingURL=chunk-A5FW7XVC.js.map
|