@arabold/docs-mcp-server 1.6.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +111 -16
- package/dist/EmbeddingFactory-6UEXNF44.js +1177 -0
- package/dist/EmbeddingFactory-6UEXNF44.js.map +1 -0
- package/dist/{chunk-S7C2LRQA.js → chunk-ADZQJG2M.js} +358 -391
- package/dist/chunk-ADZQJG2M.js.map +1 -0
- package/dist/chunk-YCXNASA6.js +124 -0
- package/dist/chunk-YCXNASA6.js.map +1 -0
- package/dist/cli.js +26 -3
- package/dist/cli.js.map +1 -1
- package/dist/server.js +35 -11
- package/dist/server.js.map +1 -1
- package/package.json +4 -1
- package/dist/chunk-S7C2LRQA.js.map +0 -1
|
@@ -1,32 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
for (var name in all2)
|
|
12
|
-
__defProp(target, name, { get: all2[name], enumerable: true });
|
|
13
|
-
};
|
|
14
|
-
var __copyProps = (to, from, except, desc) => {
|
|
15
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
16
|
-
for (let key of __getOwnPropNames(from))
|
|
17
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
18
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
19
|
-
}
|
|
20
|
-
return to;
|
|
21
|
-
};
|
|
22
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
23
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
24
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
25
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
26
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
27
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
28
|
-
mod
|
|
29
|
-
));
|
|
1
|
+
import {
|
|
2
|
+
ConnectionError,
|
|
3
|
+
DimensionError,
|
|
4
|
+
StoreError,
|
|
5
|
+
VECTOR_DIMENSION,
|
|
6
|
+
__commonJS,
|
|
7
|
+
__export,
|
|
8
|
+
__toESM,
|
|
9
|
+
createTablesSQL
|
|
10
|
+
} from "./chunk-YCXNASA6.js";
|
|
30
11
|
|
|
31
12
|
// node_modules/extend/index.js
|
|
32
13
|
var require_extend = __commonJS({
|
|
@@ -163,55 +144,8 @@ var logger = {
|
|
|
163
144
|
}
|
|
164
145
|
};
|
|
165
146
|
|
|
166
|
-
//
|
|
167
|
-
|
|
168
|
-
for (let i = 0; i < 256; ++i) {
|
|
169
|
-
byteToHex.push((i + 256).toString(16).slice(1));
|
|
170
|
-
}
|
|
171
|
-
function unsafeStringify(arr, offset = 0) {
|
|
172
|
-
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
// node_modules/uuid/dist/esm-node/rng.js
|
|
176
|
-
import crypto from "node:crypto";
|
|
177
|
-
var rnds8Pool = new Uint8Array(256);
|
|
178
|
-
var poolPtr = rnds8Pool.length;
|
|
179
|
-
function rng() {
|
|
180
|
-
if (poolPtr > rnds8Pool.length - 16) {
|
|
181
|
-
crypto.randomFillSync(rnds8Pool);
|
|
182
|
-
poolPtr = 0;
|
|
183
|
-
}
|
|
184
|
-
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// node_modules/uuid/dist/esm-node/native.js
|
|
188
|
-
import crypto2 from "node:crypto";
|
|
189
|
-
var native_default = {
|
|
190
|
-
randomUUID: crypto2.randomUUID
|
|
191
|
-
};
|
|
192
|
-
|
|
193
|
-
// node_modules/uuid/dist/esm-node/v4.js
|
|
194
|
-
function v4(options, buf, offset) {
|
|
195
|
-
if (native_default.randomUUID && !buf && !options) {
|
|
196
|
-
return native_default.randomUUID();
|
|
197
|
-
}
|
|
198
|
-
options = options || {};
|
|
199
|
-
const rnds = options.random || (options.rng || rng)();
|
|
200
|
-
rnds[6] = rnds[6] & 15 | 64;
|
|
201
|
-
rnds[8] = rnds[8] & 63 | 128;
|
|
202
|
-
if (buf) {
|
|
203
|
-
offset = offset || 0;
|
|
204
|
-
for (let i = 0; i < 16; ++i) {
|
|
205
|
-
buf[offset + i] = rnds[i];
|
|
206
|
-
}
|
|
207
|
-
return buf;
|
|
208
|
-
}
|
|
209
|
-
return unsafeStringify(rnds);
|
|
210
|
-
}
|
|
211
|
-
var v4_default = v4;
|
|
212
|
-
|
|
213
|
-
// src/utils/url.ts
|
|
214
|
-
import psl from "psl";
|
|
147
|
+
// src/scraper/fetcher/HttpFetcher.ts
|
|
148
|
+
import axios from "axios";
|
|
215
149
|
|
|
216
150
|
// src/utils/errors.ts
|
|
217
151
|
var ScraperError = class extends Error {
|
|
@@ -243,67 +177,7 @@ var RedirectError = class extends ScraperError {
|
|
|
243
177
|
}
|
|
244
178
|
};
|
|
245
179
|
|
|
246
|
-
// src/utils/url.ts
|
|
247
|
-
var defaultNormalizerOptions = {
|
|
248
|
-
ignoreCase: true,
|
|
249
|
-
removeHash: true,
|
|
250
|
-
removeTrailingSlash: true,
|
|
251
|
-
removeQuery: false,
|
|
252
|
-
removeIndex: true
|
|
253
|
-
};
|
|
254
|
-
function normalizeUrl(url, options = defaultNormalizerOptions) {
|
|
255
|
-
try {
|
|
256
|
-
const parsedUrl = new URL(url);
|
|
257
|
-
const finalOptions = { ...defaultNormalizerOptions, ...options };
|
|
258
|
-
const normalized = new URL(parsedUrl.origin + parsedUrl.pathname);
|
|
259
|
-
if (finalOptions.removeIndex) {
|
|
260
|
-
normalized.pathname = normalized.pathname.replace(
|
|
261
|
-
/\/index\.(html|htm|asp|php|jsp)$/i,
|
|
262
|
-
"/"
|
|
263
|
-
);
|
|
264
|
-
}
|
|
265
|
-
if (finalOptions.removeTrailingSlash && normalized.pathname.length > 1) {
|
|
266
|
-
normalized.pathname = normalized.pathname.replace(/\/+$/, "");
|
|
267
|
-
}
|
|
268
|
-
const preservedHash = !finalOptions.removeHash ? parsedUrl.hash : "";
|
|
269
|
-
const preservedSearch = !finalOptions.removeQuery ? parsedUrl.search : "";
|
|
270
|
-
let result = normalized.origin + normalized.pathname;
|
|
271
|
-
if (preservedSearch) {
|
|
272
|
-
result += preservedSearch;
|
|
273
|
-
}
|
|
274
|
-
if (preservedHash) {
|
|
275
|
-
result += preservedHash;
|
|
276
|
-
}
|
|
277
|
-
if (finalOptions.ignoreCase) {
|
|
278
|
-
result = result.toLowerCase();
|
|
279
|
-
}
|
|
280
|
-
return result;
|
|
281
|
-
} catch {
|
|
282
|
-
return url;
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
function validateUrl(url) {
|
|
286
|
-
try {
|
|
287
|
-
new URL(url);
|
|
288
|
-
} catch (error) {
|
|
289
|
-
throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
function hasSameHostname(urlA, urlB) {
|
|
293
|
-
return urlA.hostname.toLowerCase() === urlB.hostname.toLowerCase();
|
|
294
|
-
}
|
|
295
|
-
function hasSameDomain(urlA, urlB) {
|
|
296
|
-
const domainA = psl.get(urlA.hostname.toLowerCase());
|
|
297
|
-
const domainB = psl.get(urlB.hostname.toLowerCase());
|
|
298
|
-
return domainA !== null && domainA === domainB;
|
|
299
|
-
}
|
|
300
|
-
function isSubpath(baseUrl, targetUrl) {
|
|
301
|
-
const basePath = baseUrl.pathname.endsWith("/") ? baseUrl.pathname : `${baseUrl.pathname}/`;
|
|
302
|
-
return targetUrl.pathname.startsWith(basePath);
|
|
303
|
-
}
|
|
304
|
-
|
|
305
180
|
// src/scraper/fetcher/HttpFetcher.ts
|
|
306
|
-
import axios from "axios";
|
|
307
181
|
var HttpFetcher = class {
|
|
308
182
|
MAX_RETRIES = 6;
|
|
309
183
|
BASE_DELAY = 1e3;
|
|
@@ -413,29 +287,6 @@ var FileFetcher = class {
|
|
|
413
287
|
}
|
|
414
288
|
};
|
|
415
289
|
|
|
416
|
-
// src/scraper/strategies/BaseScraperStrategy.ts
|
|
417
|
-
import { URL as URL2 } from "node:url";
|
|
418
|
-
|
|
419
|
-
// src/pipeline/errors.ts
|
|
420
|
-
var PipelineError = class extends Error {
|
|
421
|
-
constructor(message, cause) {
|
|
422
|
-
super(message);
|
|
423
|
-
this.cause = cause;
|
|
424
|
-
this.name = this.constructor.name;
|
|
425
|
-
if (cause?.stack) {
|
|
426
|
-
this.stack = `${this.stack}
|
|
427
|
-
Caused by: ${cause.stack}`;
|
|
428
|
-
}
|
|
429
|
-
}
|
|
430
|
-
};
|
|
431
|
-
var PipelineStateError = class extends PipelineError {
|
|
432
|
-
};
|
|
433
|
-
var CancellationError = class extends PipelineError {
|
|
434
|
-
constructor(message = "Operation cancelled") {
|
|
435
|
-
super(message);
|
|
436
|
-
}
|
|
437
|
-
};
|
|
438
|
-
|
|
439
290
|
// src/scraper/processor/HtmlProcessor.ts
|
|
440
291
|
import createDOMPurify from "dompurify";
|
|
441
292
|
import { JSDOM } from "jsdom";
|
|
@@ -458,7 +309,7 @@ var HtmlProcessor = class {
|
|
|
458
309
|
"input",
|
|
459
310
|
"textarea",
|
|
460
311
|
"select",
|
|
461
|
-
"form",
|
|
312
|
+
// "form", // Known issue: Some pages use alerts for important content
|
|
462
313
|
".ads",
|
|
463
314
|
".advertisement",
|
|
464
315
|
".banner",
|
|
@@ -491,18 +342,16 @@ var HtmlProcessor = class {
|
|
|
491
342
|
".signup-form",
|
|
492
343
|
".tooltip",
|
|
493
344
|
".dropdown-menu",
|
|
494
|
-
".alert",
|
|
345
|
+
// ".alert", // Known issue: Some pages use alerts for important content
|
|
495
346
|
".breadcrumb",
|
|
496
347
|
".pagination",
|
|
497
|
-
'[role="alert"]',
|
|
348
|
+
// '[role="alert"]', // Known issue: Some pages use alerts for important content
|
|
498
349
|
'[role="banner"]',
|
|
499
350
|
'[role="dialog"]',
|
|
500
351
|
'[role="alertdialog"]',
|
|
501
352
|
'[role="region"][aria-label*="skip" i]',
|
|
502
353
|
'[aria-modal="true"]',
|
|
503
|
-
".noprint"
|
|
504
|
-
"figure",
|
|
505
|
-
"sup"
|
|
354
|
+
".noprint"
|
|
506
355
|
];
|
|
507
356
|
constructor(options) {
|
|
508
357
|
this.turndownService = new TurndownService({
|
|
@@ -533,9 +382,17 @@ var HtmlProcessor = class {
|
|
|
533
382
|
}
|
|
534
383
|
}
|
|
535
384
|
}
|
|
385
|
+
const text3 = (() => {
|
|
386
|
+
const clone = element.cloneNode(true);
|
|
387
|
+
const brElements = Array.from(clone.querySelectorAll("br"));
|
|
388
|
+
for (const br of brElements) {
|
|
389
|
+
br.replaceWith("\n");
|
|
390
|
+
}
|
|
391
|
+
return clone.textContent;
|
|
392
|
+
})();
|
|
536
393
|
return `
|
|
537
394
|
\`\`\`${language}
|
|
538
|
-
${
|
|
395
|
+
${text3}
|
|
539
396
|
\`\`\`
|
|
540
397
|
`;
|
|
541
398
|
}
|
|
@@ -641,6 +498,136 @@ var MarkdownProcessor = class {
|
|
|
641
498
|
}
|
|
642
499
|
};
|
|
643
500
|
|
|
501
|
+
// node_modules/uuid/dist/esm-node/stringify.js
|
|
502
|
+
var byteToHex = [];
|
|
503
|
+
for (let i = 0; i < 256; ++i) {
|
|
504
|
+
byteToHex.push((i + 256).toString(16).slice(1));
|
|
505
|
+
}
|
|
506
|
+
function unsafeStringify(arr, offset = 0) {
|
|
507
|
+
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// node_modules/uuid/dist/esm-node/rng.js
|
|
511
|
+
import crypto from "node:crypto";
|
|
512
|
+
var rnds8Pool = new Uint8Array(256);
|
|
513
|
+
var poolPtr = rnds8Pool.length;
|
|
514
|
+
function rng() {
|
|
515
|
+
if (poolPtr > rnds8Pool.length - 16) {
|
|
516
|
+
crypto.randomFillSync(rnds8Pool);
|
|
517
|
+
poolPtr = 0;
|
|
518
|
+
}
|
|
519
|
+
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// node_modules/uuid/dist/esm-node/native.js
|
|
523
|
+
import crypto2 from "node:crypto";
|
|
524
|
+
var native_default = {
|
|
525
|
+
randomUUID: crypto2.randomUUID
|
|
526
|
+
};
|
|
527
|
+
|
|
528
|
+
// node_modules/uuid/dist/esm-node/v4.js
|
|
529
|
+
function v4(options, buf, offset) {
|
|
530
|
+
if (native_default.randomUUID && !buf && !options) {
|
|
531
|
+
return native_default.randomUUID();
|
|
532
|
+
}
|
|
533
|
+
options = options || {};
|
|
534
|
+
const rnds = options.random || (options.rng || rng)();
|
|
535
|
+
rnds[6] = rnds[6] & 15 | 64;
|
|
536
|
+
rnds[8] = rnds[8] & 63 | 128;
|
|
537
|
+
if (buf) {
|
|
538
|
+
offset = offset || 0;
|
|
539
|
+
for (let i = 0; i < 16; ++i) {
|
|
540
|
+
buf[offset + i] = rnds[i];
|
|
541
|
+
}
|
|
542
|
+
return buf;
|
|
543
|
+
}
|
|
544
|
+
return unsafeStringify(rnds);
|
|
545
|
+
}
|
|
546
|
+
var v4_default = v4;
|
|
547
|
+
|
|
548
|
+
// src/utils/url.ts
|
|
549
|
+
import psl from "psl";
|
|
550
|
+
var defaultNormalizerOptions = {
|
|
551
|
+
ignoreCase: true,
|
|
552
|
+
removeHash: true,
|
|
553
|
+
removeTrailingSlash: true,
|
|
554
|
+
removeQuery: false,
|
|
555
|
+
removeIndex: true
|
|
556
|
+
};
|
|
557
|
+
function normalizeUrl(url, options = defaultNormalizerOptions) {
|
|
558
|
+
try {
|
|
559
|
+
const parsedUrl = new URL(url);
|
|
560
|
+
const finalOptions = { ...defaultNormalizerOptions, ...options };
|
|
561
|
+
const normalized = new URL(parsedUrl.origin + parsedUrl.pathname);
|
|
562
|
+
if (finalOptions.removeIndex) {
|
|
563
|
+
normalized.pathname = normalized.pathname.replace(
|
|
564
|
+
/\/index\.(html|htm|asp|php|jsp)$/i,
|
|
565
|
+
"/"
|
|
566
|
+
);
|
|
567
|
+
}
|
|
568
|
+
if (finalOptions.removeTrailingSlash && normalized.pathname.length > 1) {
|
|
569
|
+
normalized.pathname = normalized.pathname.replace(/\/+$/, "");
|
|
570
|
+
}
|
|
571
|
+
const preservedHash = !finalOptions.removeHash ? parsedUrl.hash : "";
|
|
572
|
+
const preservedSearch = !finalOptions.removeQuery ? parsedUrl.search : "";
|
|
573
|
+
let result = normalized.origin + normalized.pathname;
|
|
574
|
+
if (preservedSearch) {
|
|
575
|
+
result += preservedSearch;
|
|
576
|
+
}
|
|
577
|
+
if (preservedHash) {
|
|
578
|
+
result += preservedHash;
|
|
579
|
+
}
|
|
580
|
+
if (finalOptions.ignoreCase) {
|
|
581
|
+
result = result.toLowerCase();
|
|
582
|
+
}
|
|
583
|
+
return result;
|
|
584
|
+
} catch {
|
|
585
|
+
return url;
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
function validateUrl(url) {
|
|
589
|
+
try {
|
|
590
|
+
new URL(url);
|
|
591
|
+
} catch (error) {
|
|
592
|
+
throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
function hasSameHostname(urlA, urlB) {
|
|
596
|
+
return urlA.hostname.toLowerCase() === urlB.hostname.toLowerCase();
|
|
597
|
+
}
|
|
598
|
+
function hasSameDomain(urlA, urlB) {
|
|
599
|
+
const domainA = psl.get(urlA.hostname.toLowerCase());
|
|
600
|
+
const domainB = psl.get(urlB.hostname.toLowerCase());
|
|
601
|
+
return domainA !== null && domainA === domainB;
|
|
602
|
+
}
|
|
603
|
+
function isSubpath(baseUrl, targetUrl) {
|
|
604
|
+
const basePath = baseUrl.pathname.endsWith("/") ? baseUrl.pathname : `${baseUrl.pathname}/`;
|
|
605
|
+
return targetUrl.pathname.startsWith(basePath);
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
// src/scraper/strategies/BaseScraperStrategy.ts
|
|
609
|
+
import { URL as URL2 } from "node:url";
|
|
610
|
+
|
|
611
|
+
// src/pipeline/errors.ts
|
|
612
|
+
var PipelineError = class extends Error {
|
|
613
|
+
constructor(message, cause) {
|
|
614
|
+
super(message);
|
|
615
|
+
this.cause = cause;
|
|
616
|
+
this.name = this.constructor.name;
|
|
617
|
+
if (cause?.stack) {
|
|
618
|
+
this.stack = `${this.stack}
|
|
619
|
+
Caused by: ${cause.stack}`;
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
};
|
|
623
|
+
var PipelineStateError = class extends PipelineError {
|
|
624
|
+
};
|
|
625
|
+
var CancellationError = class extends PipelineError {
|
|
626
|
+
constructor(message = "Operation cancelled") {
|
|
627
|
+
super(message);
|
|
628
|
+
}
|
|
629
|
+
};
|
|
630
|
+
|
|
644
631
|
// src/scraper/strategies/BaseScraperStrategy.ts
|
|
645
632
|
var DEFAULT_MAX_PAGES = 100;
|
|
646
633
|
var DEFAULT_MAX_DEPTH = 3;
|
|
@@ -1330,61 +1317,58 @@ var LibraryNotFoundError = class extends ToolError {
|
|
|
1330
1317
|
}
|
|
1331
1318
|
super(message, "SearchTool");
|
|
1332
1319
|
this.requestedLibrary = requestedLibrary;
|
|
1333
|
-
this.suggestions = suggestions;
|
|
1334
|
-
}
|
|
1335
|
-
};
|
|
1336
|
-
|
|
1337
|
-
// src/tools/
|
|
1338
|
-
var
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
this.
|
|
1342
|
-
}
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1320
|
+
this.suggestions = suggestions;
|
|
1321
|
+
}
|
|
1322
|
+
};
|
|
1323
|
+
|
|
1324
|
+
// src/tools/FetchUrlTool.ts
|
|
1325
|
+
var FetchUrlTool = class {
|
|
1326
|
+
constructor(httpFetcher, fileFetcher, processor) {
|
|
1327
|
+
this.processor = processor;
|
|
1328
|
+
this.fetchers = [httpFetcher, fileFetcher];
|
|
1329
|
+
}
|
|
1330
|
+
/**
|
|
1331
|
+
* Collection of fetchers that will be tried in order for a given URL.
|
|
1332
|
+
*/
|
|
1333
|
+
fetchers;
|
|
1334
|
+
/**
|
|
1335
|
+
* Fetches content from a URL and converts it to Markdown.
|
|
1336
|
+
* Supports both HTTP/HTTPS URLs and local file URLs (file://).
|
|
1337
|
+
* @returns The processed Markdown content
|
|
1338
|
+
* @throws {ToolError} If fetching or processing fails
|
|
1339
|
+
*/
|
|
1340
|
+
async execute(options) {
|
|
1341
|
+
const { url } = options;
|
|
1342
|
+
const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
|
|
1343
|
+
const fetcherIndex = canFetchResults.findIndex((result) => result === true);
|
|
1344
|
+
if (fetcherIndex === -1) {
|
|
1345
|
+
throw new ToolError(
|
|
1346
|
+
`Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
|
|
1347
|
+
this.constructor.name
|
|
1360
1348
|
);
|
|
1361
|
-
|
|
1362
|
-
|
|
1349
|
+
}
|
|
1350
|
+
const fetcher = this.fetchers[fetcherIndex];
|
|
1351
|
+
try {
|
|
1352
|
+
logger.info(`\u{1F4E1} Fetching ${url}...`);
|
|
1353
|
+
const rawContent = await fetcher.fetch(url, {
|
|
1354
|
+
followRedirects: options.followRedirects ?? true,
|
|
1355
|
+
maxRetries: 3
|
|
1356
|
+
});
|
|
1357
|
+
logger.info("\u{1F504} Converting to Markdown...");
|
|
1358
|
+
const processed = await this.processor.process(rawContent);
|
|
1359
|
+
logger.info(`\u2705 Successfully converted ${url} to Markdown`);
|
|
1360
|
+
return processed.content;
|
|
1363
1361
|
} catch (error) {
|
|
1364
|
-
if (error instanceof
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
message: error.message,
|
|
1370
|
-
suggestions: error.suggestions
|
|
1371
|
-
}
|
|
1372
|
-
};
|
|
1373
|
-
}
|
|
1374
|
-
if (error instanceof VersionNotFoundError) {
|
|
1375
|
-
logger.info(`\u2139\uFE0F Version not found: ${error.message}`);
|
|
1376
|
-
return {
|
|
1377
|
-
results: [],
|
|
1378
|
-
error: {
|
|
1379
|
-
message: error.message,
|
|
1380
|
-
availableVersions: error.availableVersions
|
|
1381
|
-
}
|
|
1382
|
-
};
|
|
1362
|
+
if (error instanceof ScraperError) {
|
|
1363
|
+
throw new ToolError(
|
|
1364
|
+
`Failed to fetch or process URL: ${error.message}`,
|
|
1365
|
+
this.constructor.name
|
|
1366
|
+
);
|
|
1383
1367
|
}
|
|
1384
|
-
|
|
1385
|
-
|
|
1368
|
+
throw new ToolError(
|
|
1369
|
+
`Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
|
|
1370
|
+
this.constructor.name
|
|
1386
1371
|
);
|
|
1387
|
-
throw error;
|
|
1388
1372
|
}
|
|
1389
1373
|
}
|
|
1390
1374
|
};
|
|
@@ -1550,72 +1534,69 @@ var ScrapeTool = class {
|
|
|
1550
1534
|
}
|
|
1551
1535
|
};
|
|
1552
1536
|
|
|
1553
|
-
// src/tools/
|
|
1554
|
-
var
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
* Creates an instance of ListJobsTool.
|
|
1559
|
-
* @param manager The PipelineManager instance.
|
|
1560
|
-
*/
|
|
1561
|
-
constructor(manager) {
|
|
1562
|
-
this.manager = manager;
|
|
1537
|
+
// src/tools/SearchTool.ts
|
|
1538
|
+
var SearchTool = class {
|
|
1539
|
+
docService;
|
|
1540
|
+
constructor(docService) {
|
|
1541
|
+
this.docService = docService;
|
|
1563
1542
|
}
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
startedAt: job.startedAt?.toISOString() ?? null,
|
|
1580
|
-
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
1581
|
-
error: job.error?.message ?? null
|
|
1582
|
-
})
|
|
1543
|
+
async execute(options) {
|
|
1544
|
+
const { library, version, query, limit = 5, exactMatch = false } = options;
|
|
1545
|
+
if (exactMatch && (!version || version === "latest")) {
|
|
1546
|
+
await this.docService.validateLibraryExists(library);
|
|
1547
|
+
const versions = await this.docService.listVersions(library);
|
|
1548
|
+
throw new VersionNotFoundError(
|
|
1549
|
+
library,
|
|
1550
|
+
"latest",
|
|
1551
|
+
versions
|
|
1552
|
+
// versions already has the correct { version: string, indexed: boolean } format
|
|
1553
|
+
);
|
|
1554
|
+
}
|
|
1555
|
+
const resolvedVersion = version || "latest";
|
|
1556
|
+
logger.info(
|
|
1557
|
+
`\u{1F50D} Searching ${library}@${resolvedVersion} for: ${query}${exactMatch ? " (exact match)" : ""}`
|
|
1583
1558
|
);
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1559
|
+
try {
|
|
1560
|
+
await this.docService.validateLibraryExists(library);
|
|
1561
|
+
let versionToSearch = resolvedVersion;
|
|
1562
|
+
if (!exactMatch) {
|
|
1563
|
+
const versionResult = await this.docService.findBestVersion(library, version);
|
|
1564
|
+
versionToSearch = versionResult.bestMatch;
|
|
1565
|
+
}
|
|
1566
|
+
const results = await this.docService.searchStore(
|
|
1567
|
+
library,
|
|
1568
|
+
versionToSearch,
|
|
1569
|
+
query,
|
|
1570
|
+
limit
|
|
1571
|
+
);
|
|
1572
|
+
logger.info(`\u2705 Found ${results.length} matching results`);
|
|
1573
|
+
return { results };
|
|
1574
|
+
} catch (error) {
|
|
1575
|
+
if (error instanceof LibraryNotFoundError) {
|
|
1576
|
+
logger.info(`\u2139\uFE0F Library not found: ${error.message}`);
|
|
1577
|
+
return {
|
|
1578
|
+
results: [],
|
|
1579
|
+
error: {
|
|
1580
|
+
message: error.message,
|
|
1581
|
+
suggestions: error.suggestions
|
|
1582
|
+
}
|
|
1583
|
+
};
|
|
1584
|
+
}
|
|
1585
|
+
if (error instanceof VersionNotFoundError) {
|
|
1586
|
+
logger.info(`\u2139\uFE0F Version not found: ${error.message}`);
|
|
1587
|
+
return {
|
|
1588
|
+
results: [],
|
|
1589
|
+
error: {
|
|
1590
|
+
message: error.message,
|
|
1591
|
+
availableVersions: error.availableVersions
|
|
1592
|
+
}
|
|
1593
|
+
};
|
|
1594
|
+
}
|
|
1595
|
+
logger.error(
|
|
1596
|
+
`\u274C Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1597
|
+
);
|
|
1598
|
+
throw error;
|
|
1607
1599
|
}
|
|
1608
|
-
const jobInfo = {
|
|
1609
|
-
id: job.id,
|
|
1610
|
-
library: job.library,
|
|
1611
|
-
version: job.version,
|
|
1612
|
-
status: job.status,
|
|
1613
|
-
createdAt: job.createdAt.toISOString(),
|
|
1614
|
-
startedAt: job.startedAt?.toISOString() ?? null,
|
|
1615
|
-
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
1616
|
-
error: job.error?.message ?? null
|
|
1617
|
-
};
|
|
1618
|
-
return { job: jobInfo };
|
|
1619
1600
|
}
|
|
1620
1601
|
};
|
|
1621
1602
|
|
|
@@ -1676,6 +1657,75 @@ var CancelJobTool = class {
|
|
|
1676
1657
|
}
|
|
1677
1658
|
};
|
|
1678
1659
|
|
|
1660
|
+
// src/tools/GetJobInfoTool.ts
|
|
1661
|
+
var GetJobInfoTool = class {
|
|
1662
|
+
manager;
|
|
1663
|
+
/**
|
|
1664
|
+
* Creates an instance of GetJobInfoTool.
|
|
1665
|
+
* @param manager The PipelineManager instance.
|
|
1666
|
+
*/
|
|
1667
|
+
constructor(manager) {
|
|
1668
|
+
this.manager = manager;
|
|
1669
|
+
}
|
|
1670
|
+
/**
|
|
1671
|
+
* Executes the tool to retrieve simplified info for a specific job.
|
|
1672
|
+
* @param input - The input parameters, containing the jobId.
|
|
1673
|
+
* @returns A promise that resolves with the simplified job info or null if not found.
|
|
1674
|
+
*/
|
|
1675
|
+
async execute(input) {
|
|
1676
|
+
const job = await this.manager.getJob(input.jobId);
|
|
1677
|
+
if (!job) {
|
|
1678
|
+
return { job: null };
|
|
1679
|
+
}
|
|
1680
|
+
const jobInfo = {
|
|
1681
|
+
id: job.id,
|
|
1682
|
+
library: job.library,
|
|
1683
|
+
version: job.version,
|
|
1684
|
+
status: job.status,
|
|
1685
|
+
createdAt: job.createdAt.toISOString(),
|
|
1686
|
+
startedAt: job.startedAt?.toISOString() ?? null,
|
|
1687
|
+
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
1688
|
+
error: job.error?.message ?? null
|
|
1689
|
+
};
|
|
1690
|
+
return { job: jobInfo };
|
|
1691
|
+
}
|
|
1692
|
+
};
|
|
1693
|
+
|
|
1694
|
+
// src/tools/ListJobsTool.ts
|
|
1695
|
+
var ListJobsTool = class {
|
|
1696
|
+
manager;
|
|
1697
|
+
// Change property name and type
|
|
1698
|
+
/**
|
|
1699
|
+
* Creates an instance of ListJobsTool.
|
|
1700
|
+
* @param manager The PipelineManager instance.
|
|
1701
|
+
*/
|
|
1702
|
+
constructor(manager) {
|
|
1703
|
+
this.manager = manager;
|
|
1704
|
+
}
|
|
1705
|
+
/**
|
|
1706
|
+
* Executes the tool to retrieve a list of pipeline jobs.
|
|
1707
|
+
* @param input - The input parameters, optionally including a status filter.
|
|
1708
|
+
* @returns A promise that resolves with the list of simplified job objects.
|
|
1709
|
+
* @throws {PipelineStateError} If the pipeline manager is somehow unavailable.
|
|
1710
|
+
*/
|
|
1711
|
+
async execute(input) {
|
|
1712
|
+
const jobs = await this.manager.getJobs(input.status);
|
|
1713
|
+
const simplifiedJobs = jobs.map(
|
|
1714
|
+
(job) => ({
|
|
1715
|
+
id: job.id,
|
|
1716
|
+
library: job.library,
|
|
1717
|
+
version: job.version,
|
|
1718
|
+
status: job.status,
|
|
1719
|
+
createdAt: job.createdAt.toISOString(),
|
|
1720
|
+
startedAt: job.startedAt?.toISOString() ?? null,
|
|
1721
|
+
finishedAt: job.finishedAt?.toISOString() ?? null,
|
|
1722
|
+
error: job.error?.message ?? null
|
|
1723
|
+
})
|
|
1724
|
+
);
|
|
1725
|
+
return { jobs: simplifiedJobs };
|
|
1726
|
+
}
|
|
1727
|
+
};
|
|
1728
|
+
|
|
1679
1729
|
// src/tools/RemoveTool.ts
|
|
1680
1730
|
var RemoveToolInputSchema = {
|
|
1681
1731
|
type: "object",
|
|
@@ -10774,92 +10824,9 @@ ${subsequentSiblings.map((d) => d.pageContent).join("\n\n")}`;
|
|
|
10774
10824
|
};
|
|
10775
10825
|
|
|
10776
10826
|
// src/store/DocumentStore.ts
|
|
10777
|
-
import { OpenAIEmbeddings } from "@langchain/openai";
|
|
10778
10827
|
import Database from "better-sqlite3";
|
|
10779
10828
|
import * as sqliteVec from "sqlite-vec";
|
|
10780
10829
|
|
|
10781
|
-
// src/store/errors.ts
|
|
10782
|
-
var StoreError = class extends Error {
|
|
10783
|
-
constructor(message, cause) {
|
|
10784
|
-
super(cause ? `${message} caused by ${cause}` : message);
|
|
10785
|
-
this.cause = cause;
|
|
10786
|
-
this.name = this.constructor.name;
|
|
10787
|
-
const causeError = cause instanceof Error ? cause : cause ? new Error(String(cause)) : void 0;
|
|
10788
|
-
if (causeError?.stack) {
|
|
10789
|
-
this.stack = causeError.stack;
|
|
10790
|
-
}
|
|
10791
|
-
}
|
|
10792
|
-
};
|
|
10793
|
-
var DimensionError = class extends StoreError {
|
|
10794
|
-
constructor(modelName, modelDimension, dbDimension) {
|
|
10795
|
-
super(
|
|
10796
|
-
`Model "${modelName}" produces ${modelDimension}-dimensional vectors, which exceeds the database's fixed dimension of ${dbDimension}. Please use a model with dimension \u2264 ${dbDimension}.`
|
|
10797
|
-
);
|
|
10798
|
-
this.modelName = modelName;
|
|
10799
|
-
this.modelDimension = modelDimension;
|
|
10800
|
-
this.dbDimension = dbDimension;
|
|
10801
|
-
}
|
|
10802
|
-
};
|
|
10803
|
-
var ConnectionError = class extends StoreError {
|
|
10804
|
-
};
|
|
10805
|
-
|
|
10806
|
-
// src/store/schema.ts
|
|
10807
|
-
var createTablesSQL = `
|
|
10808
|
-
-- Documents table
|
|
10809
|
-
CREATE TABLE IF NOT EXISTS documents(
|
|
10810
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
10811
|
-
library TEXT NOT NULL,
|
|
10812
|
-
version TEXT NOT NULL DEFAULT '',
|
|
10813
|
-
url TEXT NOT NULL,
|
|
10814
|
-
content TEXT,
|
|
10815
|
-
metadata JSON,
|
|
10816
|
-
sort_order INTEGER NOT NULL,
|
|
10817
|
-
UNIQUE(url, library, version, sort_order)
|
|
10818
|
-
);
|
|
10819
|
-
|
|
10820
|
-
-- Indexes
|
|
10821
|
-
CREATE INDEX IF NOT EXISTS idx_documents_library_lower ON documents(lower(library));
|
|
10822
|
-
CREATE INDEX IF NOT EXISTS idx_documents_version_lower ON documents(lower(library), lower(version));
|
|
10823
|
-
|
|
10824
|
-
-- Create Embeddings virtual table
|
|
10825
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS documents_vec USING vec0(
|
|
10826
|
-
library TEXT NOT NULL,
|
|
10827
|
-
version TEXT NOT NULL,
|
|
10828
|
-
embedding FLOAT[1536]
|
|
10829
|
-
);
|
|
10830
|
-
|
|
10831
|
-
-- Create FTS5 virtual table
|
|
10832
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
|
|
10833
|
-
content,
|
|
10834
|
-
title,
|
|
10835
|
-
url,
|
|
10836
|
-
path,
|
|
10837
|
-
tokenize='porter unicode61',
|
|
10838
|
-
content='documents',
|
|
10839
|
-
content_rowid='id'
|
|
10840
|
-
);
|
|
10841
|
-
|
|
10842
|
-
-- Delete trigger to maintain FTS index
|
|
10843
|
-
CREATE TRIGGER IF NOT EXISTS documents_fts_after_delete AFTER DELETE ON documents BEGIN
|
|
10844
|
-
INSERT INTO documents_fts(documents_fts, rowid, content, title, url, path)
|
|
10845
|
-
VALUES('delete', old.id, old.content, json_extract(old.metadata, '$.title'), old.url, json_extract(old.metadata, '$.path'));
|
|
10846
|
-
END;
|
|
10847
|
-
|
|
10848
|
-
-- Update trigger to maintain FTS index
|
|
10849
|
-
CREATE TRIGGER IF NOT EXISTS documents_fts_after_update AFTER UPDATE ON documents BEGIN
|
|
10850
|
-
INSERT INTO documents_fts(documents_fts, rowid, content, title, url, path)
|
|
10851
|
-
VALUES('delete', old.id, old.content, json_extract(old.metadata, '$.title'), old.url, json_extract(old.metadata, '$.path'));
|
|
10852
|
-
INSERT INTO documents_fts(rowid, content, title, url, path)
|
|
10853
|
-
VALUES(new.id, new.content, json_extract(new.metadata, '$.title'), new.url, json_extract(new.metadata, '$.path'));
|
|
10854
|
-
END;
|
|
10855
|
-
|
|
10856
|
-
-- Insert trigger to maintain FTS index
|
|
10857
|
-
CREATE TRIGGER IF NOT EXISTS documents_fts_after_insert AFTER INSERT ON documents BEGIN
|
|
10858
|
-
INSERT INTO documents_fts(rowid, content, title, url, path)
|
|
10859
|
-
VALUES(new.id, new.content, json_extract(new.metadata, '$.title'), new.url, json_extract(new.metadata, '$.path'));
|
|
10860
|
-
END;
|
|
10861
|
-
`;
|
|
10862
|
-
|
|
10863
10830
|
// src/store/types.ts
|
|
10864
10831
|
function mapDbDocumentToDocument(doc) {
|
|
10865
10832
|
return {
|
|
@@ -10873,8 +10840,7 @@ function mapDbDocumentToDocument(doc) {
|
|
|
10873
10840
|
var DocumentStore = class {
|
|
10874
10841
|
db;
|
|
10875
10842
|
embeddings;
|
|
10876
|
-
dbDimension =
|
|
10877
|
-
// Fixed dimension from schema.ts
|
|
10843
|
+
dbDimension = VECTOR_DIMENSION;
|
|
10878
10844
|
modelDimension;
|
|
10879
10845
|
statements;
|
|
10880
10846
|
/**
|
|
@@ -11001,28 +10967,24 @@ var DocumentStore = class {
|
|
|
11001
10967
|
/**
|
|
11002
10968
|
* Initializes embeddings client using environment variables for configuration.
|
|
11003
10969
|
*
|
|
11004
|
-
*
|
|
11005
|
-
*
|
|
11006
|
-
*
|
|
11007
|
-
*
|
|
11008
|
-
*
|
|
10970
|
+
* The embedding model is configured using DOCS_MCP_EMBEDDING_MODEL environment variable.
|
|
10971
|
+
* Format: "provider:model_name" (e.g., "google:text-embedding-004") or just "model_name"
|
|
10972
|
+
* for OpenAI (default).
|
|
10973
|
+
*
|
|
10974
|
+
* Supported providers and their required environment variables:
|
|
10975
|
+
* - openai: OPENAI_API_KEY (and optionally OPENAI_API_BASE, OPENAI_ORG_ID)
|
|
10976
|
+
* - google: GOOGLE_APPLICATION_CREDENTIALS (path to service account JSON)
|
|
10977
|
+
* - aws: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION (or BEDROCK_AWS_REGION)
|
|
10978
|
+
* - microsoft: Azure OpenAI credentials (AZURE_OPENAI_API_*)
|
|
11009
10979
|
*/
|
|
11010
10980
|
async initializeEmbeddings() {
|
|
11011
|
-
const
|
|
11012
|
-
const
|
|
11013
|
-
|
|
11014
|
-
stripNewLines: true,
|
|
11015
|
-
batchSize: 512,
|
|
11016
|
-
modelName
|
|
11017
|
-
};
|
|
11018
|
-
if (baseURL) {
|
|
11019
|
-
config.configuration = { baseURL };
|
|
11020
|
-
}
|
|
11021
|
-
this.embeddings = new OpenAIEmbeddings(config);
|
|
10981
|
+
const modelSpec = process.env.DOCS_MCP_EMBEDDING_MODEL || "text-embedding-3-small";
|
|
10982
|
+
const { createEmbeddingModel } = await import("./EmbeddingFactory-6UEXNF44.js");
|
|
10983
|
+
this.embeddings = createEmbeddingModel(modelSpec);
|
|
11022
10984
|
const testVector = await this.embeddings.embedQuery("test");
|
|
11023
10985
|
this.modelDimension = testVector.length;
|
|
11024
10986
|
if (this.modelDimension > this.dbDimension) {
|
|
11025
|
-
throw new DimensionError(
|
|
10987
|
+
throw new DimensionError(modelSpec, this.modelDimension, this.dbDimension);
|
|
11026
10988
|
}
|
|
11027
10989
|
}
|
|
11028
10990
|
/**
|
|
@@ -11376,7 +11338,8 @@ var DocumentManagementService = class {
|
|
|
11376
11338
|
dbPath = path3.join(dbDir, "documents.db");
|
|
11377
11339
|
logger.debug(`\u{1F4BE} Using database directory from DOCS_MCP_STORE_PATH: ${dbDir}`);
|
|
11378
11340
|
} else {
|
|
11379
|
-
const
|
|
11341
|
+
const projectRoot = path3.resolve(import.meta.dirname, "..");
|
|
11342
|
+
const oldDbDir = path3.join(projectRoot, ".store");
|
|
11380
11343
|
const oldDbPath = path3.join(oldDbDir, "documents.db");
|
|
11381
11344
|
const oldDbExists = existsSync(oldDbPath);
|
|
11382
11345
|
if (oldDbExists) {
|
|
@@ -11605,17 +11568,21 @@ var DocumentManagementService = class {
|
|
|
11605
11568
|
export {
|
|
11606
11569
|
setLogLevel,
|
|
11607
11570
|
logger,
|
|
11571
|
+
HttpFetcher,
|
|
11572
|
+
FileFetcher,
|
|
11573
|
+
HtmlProcessor,
|
|
11608
11574
|
PipelineJobStatus,
|
|
11609
11575
|
PipelineManager,
|
|
11576
|
+
CancelJobTool,
|
|
11610
11577
|
VersionNotFoundError,
|
|
11611
|
-
|
|
11578
|
+
FetchUrlTool,
|
|
11612
11579
|
FindVersionTool,
|
|
11613
|
-
ListLibrariesTool,
|
|
11614
|
-
ScrapeTool,
|
|
11615
|
-
ListJobsTool,
|
|
11616
11580
|
GetJobInfoTool,
|
|
11617
|
-
|
|
11581
|
+
ListJobsTool,
|
|
11582
|
+
ListLibrariesTool,
|
|
11618
11583
|
RemoveTool,
|
|
11584
|
+
ScrapeTool,
|
|
11585
|
+
SearchTool,
|
|
11619
11586
|
DocumentManagementService
|
|
11620
11587
|
};
|
|
11621
|
-
//# sourceMappingURL=chunk-
|
|
11588
|
+
//# sourceMappingURL=chunk-ADZQJG2M.js.map
|