@arabold/docs-mcp-server 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,13 @@
1
- var __create = Object.create;
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __getProtoOf = Object.getPrototypeOf;
6
- var __hasOwnProp = Object.prototype.hasOwnProperty;
7
- var __commonJS = (cb, mod) => function __require() {
8
- return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
9
- };
10
- var __export = (target, all2) => {
11
- for (var name in all2)
12
- __defProp(target, name, { get: all2[name], enumerable: true });
13
- };
14
- var __copyProps = (to, from, except, desc) => {
15
- if (from && typeof from === "object" || typeof from === "function") {
16
- for (let key of __getOwnPropNames(from))
17
- if (!__hasOwnProp.call(to, key) && key !== except)
18
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
19
- }
20
- return to;
21
- };
22
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
23
- // If the importer is in node compatibility mode or this is not an ESM
24
- // file that has been converted to a CommonJS file using a Babel-
25
- // compatible transform (i.e. "__esModule" has not been set), then set
26
- // "default" to the CommonJS "module.exports" for node compatibility.
27
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
28
- mod
29
- ));
1
+ import {
2
+ ConnectionError,
3
+ DimensionError,
4
+ StoreError,
5
+ VECTOR_DIMENSION,
6
+ __commonJS,
7
+ __export,
8
+ __toESM,
9
+ createTablesSQL
10
+ } from "./chunk-YCXNASA6.js";
30
11
 
31
12
  // node_modules/extend/index.js
32
13
  var require_extend = __commonJS({
@@ -163,55 +144,8 @@ var logger = {
163
144
  }
164
145
  };
165
146
 
166
- // node_modules/uuid/dist/esm-node/stringify.js
167
- var byteToHex = [];
168
- for (let i = 0; i < 256; ++i) {
169
- byteToHex.push((i + 256).toString(16).slice(1));
170
- }
171
- function unsafeStringify(arr, offset = 0) {
172
- return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
173
- }
174
-
175
- // node_modules/uuid/dist/esm-node/rng.js
176
- import crypto from "node:crypto";
177
- var rnds8Pool = new Uint8Array(256);
178
- var poolPtr = rnds8Pool.length;
179
- function rng() {
180
- if (poolPtr > rnds8Pool.length - 16) {
181
- crypto.randomFillSync(rnds8Pool);
182
- poolPtr = 0;
183
- }
184
- return rnds8Pool.slice(poolPtr, poolPtr += 16);
185
- }
186
-
187
- // node_modules/uuid/dist/esm-node/native.js
188
- import crypto2 from "node:crypto";
189
- var native_default = {
190
- randomUUID: crypto2.randomUUID
191
- };
192
-
193
- // node_modules/uuid/dist/esm-node/v4.js
194
- function v4(options, buf, offset) {
195
- if (native_default.randomUUID && !buf && !options) {
196
- return native_default.randomUUID();
197
- }
198
- options = options || {};
199
- const rnds = options.random || (options.rng || rng)();
200
- rnds[6] = rnds[6] & 15 | 64;
201
- rnds[8] = rnds[8] & 63 | 128;
202
- if (buf) {
203
- offset = offset || 0;
204
- for (let i = 0; i < 16; ++i) {
205
- buf[offset + i] = rnds[i];
206
- }
207
- return buf;
208
- }
209
- return unsafeStringify(rnds);
210
- }
211
- var v4_default = v4;
212
-
213
- // src/utils/url.ts
214
- import psl from "psl";
147
+ // src/scraper/fetcher/HttpFetcher.ts
148
+ import axios from "axios";
215
149
 
216
150
  // src/utils/errors.ts
217
151
  var ScraperError = class extends Error {
@@ -243,67 +177,7 @@ var RedirectError = class extends ScraperError {
243
177
  }
244
178
  };
245
179
 
246
- // src/utils/url.ts
247
- var defaultNormalizerOptions = {
248
- ignoreCase: true,
249
- removeHash: true,
250
- removeTrailingSlash: true,
251
- removeQuery: false,
252
- removeIndex: true
253
- };
254
- function normalizeUrl(url, options = defaultNormalizerOptions) {
255
- try {
256
- const parsedUrl = new URL(url);
257
- const finalOptions = { ...defaultNormalizerOptions, ...options };
258
- const normalized = new URL(parsedUrl.origin + parsedUrl.pathname);
259
- if (finalOptions.removeIndex) {
260
- normalized.pathname = normalized.pathname.replace(
261
- /\/index\.(html|htm|asp|php|jsp)$/i,
262
- "/"
263
- );
264
- }
265
- if (finalOptions.removeTrailingSlash && normalized.pathname.length > 1) {
266
- normalized.pathname = normalized.pathname.replace(/\/+$/, "");
267
- }
268
- const preservedHash = !finalOptions.removeHash ? parsedUrl.hash : "";
269
- const preservedSearch = !finalOptions.removeQuery ? parsedUrl.search : "";
270
- let result = normalized.origin + normalized.pathname;
271
- if (preservedSearch) {
272
- result += preservedSearch;
273
- }
274
- if (preservedHash) {
275
- result += preservedHash;
276
- }
277
- if (finalOptions.ignoreCase) {
278
- result = result.toLowerCase();
279
- }
280
- return result;
281
- } catch {
282
- return url;
283
- }
284
- }
285
- function validateUrl(url) {
286
- try {
287
- new URL(url);
288
- } catch (error) {
289
- throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
290
- }
291
- }
292
- function hasSameHostname(urlA, urlB) {
293
- return urlA.hostname.toLowerCase() === urlB.hostname.toLowerCase();
294
- }
295
- function hasSameDomain(urlA, urlB) {
296
- const domainA = psl.get(urlA.hostname.toLowerCase());
297
- const domainB = psl.get(urlB.hostname.toLowerCase());
298
- return domainA !== null && domainA === domainB;
299
- }
300
- function isSubpath(baseUrl, targetUrl) {
301
- const basePath = baseUrl.pathname.endsWith("/") ? baseUrl.pathname : `${baseUrl.pathname}/`;
302
- return targetUrl.pathname.startsWith(basePath);
303
- }
304
-
305
180
  // src/scraper/fetcher/HttpFetcher.ts
306
- import axios from "axios";
307
181
  var HttpFetcher = class {
308
182
  MAX_RETRIES = 6;
309
183
  BASE_DELAY = 1e3;
@@ -413,29 +287,6 @@ var FileFetcher = class {
413
287
  }
414
288
  };
415
289
 
416
- // src/scraper/strategies/BaseScraperStrategy.ts
417
- import { URL as URL2 } from "node:url";
418
-
419
- // src/pipeline/errors.ts
420
- var PipelineError = class extends Error {
421
- constructor(message, cause) {
422
- super(message);
423
- this.cause = cause;
424
- this.name = this.constructor.name;
425
- if (cause?.stack) {
426
- this.stack = `${this.stack}
427
- Caused by: ${cause.stack}`;
428
- }
429
- }
430
- };
431
- var PipelineStateError = class extends PipelineError {
432
- };
433
- var CancellationError = class extends PipelineError {
434
- constructor(message = "Operation cancelled") {
435
- super(message);
436
- }
437
- };
438
-
439
290
  // src/scraper/processor/HtmlProcessor.ts
440
291
  import createDOMPurify from "dompurify";
441
292
  import { JSDOM } from "jsdom";
@@ -458,7 +309,7 @@ var HtmlProcessor = class {
458
309
  "input",
459
310
  "textarea",
460
311
  "select",
461
- "form",
312
+ // "form", // Known issue: Some pages use alerts for important content
462
313
  ".ads",
463
314
  ".advertisement",
464
315
  ".banner",
@@ -491,18 +342,16 @@ var HtmlProcessor = class {
491
342
  ".signup-form",
492
343
  ".tooltip",
493
344
  ".dropdown-menu",
494
- ".alert",
345
+ // ".alert", // Known issue: Some pages use alerts for important content
495
346
  ".breadcrumb",
496
347
  ".pagination",
497
- '[role="alert"]',
348
+ // '[role="alert"]', // Known issue: Some pages use alerts for important content
498
349
  '[role="banner"]',
499
350
  '[role="dialog"]',
500
351
  '[role="alertdialog"]',
501
352
  '[role="region"][aria-label*="skip" i]',
502
353
  '[aria-modal="true"]',
503
- ".noprint",
504
- "figure",
505
- "sup"
354
+ ".noprint"
506
355
  ];
507
356
  constructor(options) {
508
357
  this.turndownService = new TurndownService({
@@ -533,9 +382,17 @@ var HtmlProcessor = class {
533
382
  }
534
383
  }
535
384
  }
385
+ const text3 = (() => {
386
+ const clone = element.cloneNode(true);
387
+ const brElements = Array.from(clone.querySelectorAll("br"));
388
+ for (const br of brElements) {
389
+ br.replaceWith("\n");
390
+ }
391
+ return clone.textContent;
392
+ })();
536
393
  return `
537
394
  \`\`\`${language}
538
- ${node2.textContent}
395
+ ${text3}
539
396
  \`\`\`
540
397
  `;
541
398
  }
@@ -641,6 +498,136 @@ var MarkdownProcessor = class {
641
498
  }
642
499
  };
643
500
 
501
+ // node_modules/uuid/dist/esm-node/stringify.js
502
+ var byteToHex = [];
503
+ for (let i = 0; i < 256; ++i) {
504
+ byteToHex.push((i + 256).toString(16).slice(1));
505
+ }
506
+ function unsafeStringify(arr, offset = 0) {
507
+ return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
508
+ }
509
+
510
+ // node_modules/uuid/dist/esm-node/rng.js
511
+ import crypto from "node:crypto";
512
+ var rnds8Pool = new Uint8Array(256);
513
+ var poolPtr = rnds8Pool.length;
514
+ function rng() {
515
+ if (poolPtr > rnds8Pool.length - 16) {
516
+ crypto.randomFillSync(rnds8Pool);
517
+ poolPtr = 0;
518
+ }
519
+ return rnds8Pool.slice(poolPtr, poolPtr += 16);
520
+ }
521
+
522
+ // node_modules/uuid/dist/esm-node/native.js
523
+ import crypto2 from "node:crypto";
524
+ var native_default = {
525
+ randomUUID: crypto2.randomUUID
526
+ };
527
+
528
+ // node_modules/uuid/dist/esm-node/v4.js
529
+ function v4(options, buf, offset) {
530
+ if (native_default.randomUUID && !buf && !options) {
531
+ return native_default.randomUUID();
532
+ }
533
+ options = options || {};
534
+ const rnds = options.random || (options.rng || rng)();
535
+ rnds[6] = rnds[6] & 15 | 64;
536
+ rnds[8] = rnds[8] & 63 | 128;
537
+ if (buf) {
538
+ offset = offset || 0;
539
+ for (let i = 0; i < 16; ++i) {
540
+ buf[offset + i] = rnds[i];
541
+ }
542
+ return buf;
543
+ }
544
+ return unsafeStringify(rnds);
545
+ }
546
+ var v4_default = v4;
547
+
548
+ // src/utils/url.ts
549
+ import psl from "psl";
550
+ var defaultNormalizerOptions = {
551
+ ignoreCase: true,
552
+ removeHash: true,
553
+ removeTrailingSlash: true,
554
+ removeQuery: false,
555
+ removeIndex: true
556
+ };
557
+ function normalizeUrl(url, options = defaultNormalizerOptions) {
558
+ try {
559
+ const parsedUrl = new URL(url);
560
+ const finalOptions = { ...defaultNormalizerOptions, ...options };
561
+ const normalized = new URL(parsedUrl.origin + parsedUrl.pathname);
562
+ if (finalOptions.removeIndex) {
563
+ normalized.pathname = normalized.pathname.replace(
564
+ /\/index\.(html|htm|asp|php|jsp)$/i,
565
+ "/"
566
+ );
567
+ }
568
+ if (finalOptions.removeTrailingSlash && normalized.pathname.length > 1) {
569
+ normalized.pathname = normalized.pathname.replace(/\/+$/, "");
570
+ }
571
+ const preservedHash = !finalOptions.removeHash ? parsedUrl.hash : "";
572
+ const preservedSearch = !finalOptions.removeQuery ? parsedUrl.search : "";
573
+ let result = normalized.origin + normalized.pathname;
574
+ if (preservedSearch) {
575
+ result += preservedSearch;
576
+ }
577
+ if (preservedHash) {
578
+ result += preservedHash;
579
+ }
580
+ if (finalOptions.ignoreCase) {
581
+ result = result.toLowerCase();
582
+ }
583
+ return result;
584
+ } catch {
585
+ return url;
586
+ }
587
+ }
588
+ function validateUrl(url) {
589
+ try {
590
+ new URL(url);
591
+ } catch (error) {
592
+ throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
593
+ }
594
+ }
595
+ function hasSameHostname(urlA, urlB) {
596
+ return urlA.hostname.toLowerCase() === urlB.hostname.toLowerCase();
597
+ }
598
+ function hasSameDomain(urlA, urlB) {
599
+ const domainA = psl.get(urlA.hostname.toLowerCase());
600
+ const domainB = psl.get(urlB.hostname.toLowerCase());
601
+ return domainA !== null && domainA === domainB;
602
+ }
603
+ function isSubpath(baseUrl, targetUrl) {
604
+ const basePath = baseUrl.pathname.endsWith("/") ? baseUrl.pathname : `${baseUrl.pathname}/`;
605
+ return targetUrl.pathname.startsWith(basePath);
606
+ }
607
+
608
+ // src/scraper/strategies/BaseScraperStrategy.ts
609
+ import { URL as URL2 } from "node:url";
610
+
611
+ // src/pipeline/errors.ts
612
+ var PipelineError = class extends Error {
613
+ constructor(message, cause) {
614
+ super(message);
615
+ this.cause = cause;
616
+ this.name = this.constructor.name;
617
+ if (cause?.stack) {
618
+ this.stack = `${this.stack}
619
+ Caused by: ${cause.stack}`;
620
+ }
621
+ }
622
+ };
623
+ var PipelineStateError = class extends PipelineError {
624
+ };
625
+ var CancellationError = class extends PipelineError {
626
+ constructor(message = "Operation cancelled") {
627
+ super(message);
628
+ }
629
+ };
630
+
644
631
  // src/scraper/strategies/BaseScraperStrategy.ts
645
632
  var DEFAULT_MAX_PAGES = 100;
646
633
  var DEFAULT_MAX_DEPTH = 3;
@@ -1330,61 +1317,58 @@ var LibraryNotFoundError = class extends ToolError {
1330
1317
  }
1331
1318
  super(message, "SearchTool");
1332
1319
  this.requestedLibrary = requestedLibrary;
1333
- this.suggestions = suggestions;
1334
- }
1335
- };
1336
-
1337
- // src/tools/SearchTool.ts
1338
- var SearchTool = class {
1339
- docService;
1340
- constructor(docService) {
1341
- this.docService = docService;
1342
- }
1343
- async execute(options) {
1344
- const { library, version = "latest", query, limit = 5, exactMatch = false } = options;
1345
- logger.info(
1346
- `\u{1F50D} Searching ${library}@${version} for: ${query}${exactMatch ? " (exact match)" : ""}`
1347
- );
1348
- try {
1349
- await this.docService.validateLibraryExists(library);
1350
- let versionToSearch = version;
1351
- if (!exactMatch) {
1352
- const versionResult = await this.docService.findBestVersion(library, version);
1353
- versionToSearch = versionResult.bestMatch;
1354
- }
1355
- const results = await this.docService.searchStore(
1356
- library,
1357
- versionToSearch,
1358
- query,
1359
- limit
1320
+ this.suggestions = suggestions;
1321
+ }
1322
+ };
1323
+
1324
+ // src/tools/FetchUrlTool.ts
1325
+ var FetchUrlTool = class {
1326
+ constructor(httpFetcher, fileFetcher, processor) {
1327
+ this.processor = processor;
1328
+ this.fetchers = [httpFetcher, fileFetcher];
1329
+ }
1330
+ /**
1331
+ * Collection of fetchers that will be tried in order for a given URL.
1332
+ */
1333
+ fetchers;
1334
+ /**
1335
+ * Fetches content from a URL and converts it to Markdown.
1336
+ * Supports both HTTP/HTTPS URLs and local file URLs (file://).
1337
+ * @returns The processed Markdown content
1338
+ * @throws {ToolError} If fetching or processing fails
1339
+ */
1340
+ async execute(options) {
1341
+ const { url } = options;
1342
+ const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
1343
+ const fetcherIndex = canFetchResults.findIndex((result) => result === true);
1344
+ if (fetcherIndex === -1) {
1345
+ throw new ToolError(
1346
+ `Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
1347
+ this.constructor.name
1360
1348
  );
1361
- logger.info(`\u2705 Found ${results.length} matching results`);
1362
- return { results };
1349
+ }
1350
+ const fetcher = this.fetchers[fetcherIndex];
1351
+ try {
1352
+ logger.info(`\u{1F4E1} Fetching ${url}...`);
1353
+ const rawContent = await fetcher.fetch(url, {
1354
+ followRedirects: options.followRedirects ?? true,
1355
+ maxRetries: 3
1356
+ });
1357
+ logger.info("\u{1F504} Converting to Markdown...");
1358
+ const processed = await this.processor.process(rawContent);
1359
+ logger.info(`\u2705 Successfully converted ${url} to Markdown`);
1360
+ return processed.content;
1363
1361
  } catch (error) {
1364
- if (error instanceof LibraryNotFoundError) {
1365
- logger.info(`\u2139\uFE0F Library not found: ${error.message}`);
1366
- return {
1367
- results: [],
1368
- error: {
1369
- message: error.message,
1370
- suggestions: error.suggestions
1371
- }
1372
- };
1373
- }
1374
- if (error instanceof VersionNotFoundError) {
1375
- logger.info(`\u2139\uFE0F Version not found: ${error.message}`);
1376
- return {
1377
- results: [],
1378
- error: {
1379
- message: error.message,
1380
- availableVersions: error.availableVersions
1381
- }
1382
- };
1362
+ if (error instanceof ScraperError) {
1363
+ throw new ToolError(
1364
+ `Failed to fetch or process URL: ${error.message}`,
1365
+ this.constructor.name
1366
+ );
1383
1367
  }
1384
- logger.error(
1385
- `\u274C Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
1368
+ throw new ToolError(
1369
+ `Failed to fetch or process URL: ${error instanceof Error ? error.message : String(error)}`,
1370
+ this.constructor.name
1386
1371
  );
1387
- throw error;
1388
1372
  }
1389
1373
  }
1390
1374
  };
@@ -1550,72 +1534,69 @@ var ScrapeTool = class {
1550
1534
  }
1551
1535
  };
1552
1536
 
1553
- // src/tools/ListJobsTool.ts
1554
- var ListJobsTool = class {
1555
- manager;
1556
- // Change property name and type
1557
- /**
1558
- * Creates an instance of ListJobsTool.
1559
- * @param manager The PipelineManager instance.
1560
- */
1561
- constructor(manager) {
1562
- this.manager = manager;
1537
+ // src/tools/SearchTool.ts
1538
+ var SearchTool = class {
1539
+ docService;
1540
+ constructor(docService) {
1541
+ this.docService = docService;
1563
1542
  }
1564
- /**
1565
- * Executes the tool to retrieve a list of pipeline jobs.
1566
- * @param input - The input parameters, optionally including a status filter.
1567
- * @returns A promise that resolves with the list of simplified job objects.
1568
- * @throws {PipelineStateError} If the pipeline manager is somehow unavailable.
1569
- */
1570
- async execute(input) {
1571
- const jobs = await this.manager.getJobs(input.status);
1572
- const simplifiedJobs = jobs.map(
1573
- (job) => ({
1574
- id: job.id,
1575
- library: job.library,
1576
- version: job.version,
1577
- status: job.status,
1578
- createdAt: job.createdAt.toISOString(),
1579
- startedAt: job.startedAt?.toISOString() ?? null,
1580
- finishedAt: job.finishedAt?.toISOString() ?? null,
1581
- error: job.error?.message ?? null
1582
- })
1543
+ async execute(options) {
1544
+ const { library, version, query, limit = 5, exactMatch = false } = options;
1545
+ if (exactMatch && (!version || version === "latest")) {
1546
+ await this.docService.validateLibraryExists(library);
1547
+ const versions = await this.docService.listVersions(library);
1548
+ throw new VersionNotFoundError(
1549
+ library,
1550
+ "latest",
1551
+ versions
1552
+ // versions already has the correct { version: string, indexed: boolean } format
1553
+ );
1554
+ }
1555
+ const resolvedVersion = version || "latest";
1556
+ logger.info(
1557
+ `\u{1F50D} Searching ${library}@${resolvedVersion} for: ${query}${exactMatch ? " (exact match)" : ""}`
1583
1558
  );
1584
- return { jobs: simplifiedJobs };
1585
- }
1586
- };
1587
-
1588
- // src/tools/GetJobInfoTool.ts
1589
- var GetJobInfoTool = class {
1590
- manager;
1591
- /**
1592
- * Creates an instance of GetJobInfoTool.
1593
- * @param manager The PipelineManager instance.
1594
- */
1595
- constructor(manager) {
1596
- this.manager = manager;
1597
- }
1598
- /**
1599
- * Executes the tool to retrieve simplified info for a specific job.
1600
- * @param input - The input parameters, containing the jobId.
1601
- * @returns A promise that resolves with the simplified job info or null if not found.
1602
- */
1603
- async execute(input) {
1604
- const job = await this.manager.getJob(input.jobId);
1605
- if (!job) {
1606
- return { job: null };
1559
+ try {
1560
+ await this.docService.validateLibraryExists(library);
1561
+ let versionToSearch = resolvedVersion;
1562
+ if (!exactMatch) {
1563
+ const versionResult = await this.docService.findBestVersion(library, version);
1564
+ versionToSearch = versionResult.bestMatch;
1565
+ }
1566
+ const results = await this.docService.searchStore(
1567
+ library,
1568
+ versionToSearch,
1569
+ query,
1570
+ limit
1571
+ );
1572
+ logger.info(`\u2705 Found ${results.length} matching results`);
1573
+ return { results };
1574
+ } catch (error) {
1575
+ if (error instanceof LibraryNotFoundError) {
1576
+ logger.info(`\u2139\uFE0F Library not found: ${error.message}`);
1577
+ return {
1578
+ results: [],
1579
+ error: {
1580
+ message: error.message,
1581
+ suggestions: error.suggestions
1582
+ }
1583
+ };
1584
+ }
1585
+ if (error instanceof VersionNotFoundError) {
1586
+ logger.info(`\u2139\uFE0F Version not found: ${error.message}`);
1587
+ return {
1588
+ results: [],
1589
+ error: {
1590
+ message: error.message,
1591
+ availableVersions: error.availableVersions
1592
+ }
1593
+ };
1594
+ }
1595
+ logger.error(
1596
+ `\u274C Search failed: ${error instanceof Error ? error.message : "Unknown error"}`
1597
+ );
1598
+ throw error;
1607
1599
  }
1608
- const jobInfo = {
1609
- id: job.id,
1610
- library: job.library,
1611
- version: job.version,
1612
- status: job.status,
1613
- createdAt: job.createdAt.toISOString(),
1614
- startedAt: job.startedAt?.toISOString() ?? null,
1615
- finishedAt: job.finishedAt?.toISOString() ?? null,
1616
- error: job.error?.message ?? null
1617
- };
1618
- return { job: jobInfo };
1619
1600
  }
1620
1601
  };
1621
1602
 
@@ -1676,6 +1657,75 @@ var CancelJobTool = class {
1676
1657
  }
1677
1658
  };
1678
1659
 
1660
+ // src/tools/GetJobInfoTool.ts
1661
+ var GetJobInfoTool = class {
1662
+ manager;
1663
+ /**
1664
+ * Creates an instance of GetJobInfoTool.
1665
+ * @param manager The PipelineManager instance.
1666
+ */
1667
+ constructor(manager) {
1668
+ this.manager = manager;
1669
+ }
1670
+ /**
1671
+ * Executes the tool to retrieve simplified info for a specific job.
1672
+ * @param input - The input parameters, containing the jobId.
1673
+ * @returns A promise that resolves with the simplified job info or null if not found.
1674
+ */
1675
+ async execute(input) {
1676
+ const job = await this.manager.getJob(input.jobId);
1677
+ if (!job) {
1678
+ return { job: null };
1679
+ }
1680
+ const jobInfo = {
1681
+ id: job.id,
1682
+ library: job.library,
1683
+ version: job.version,
1684
+ status: job.status,
1685
+ createdAt: job.createdAt.toISOString(),
1686
+ startedAt: job.startedAt?.toISOString() ?? null,
1687
+ finishedAt: job.finishedAt?.toISOString() ?? null,
1688
+ error: job.error?.message ?? null
1689
+ };
1690
+ return { job: jobInfo };
1691
+ }
1692
+ };
1693
+
1694
+ // src/tools/ListJobsTool.ts
1695
+ var ListJobsTool = class {
1696
+ manager;
1697
+ // Change property name and type
1698
+ /**
1699
+ * Creates an instance of ListJobsTool.
1700
+ * @param manager The PipelineManager instance.
1701
+ */
1702
+ constructor(manager) {
1703
+ this.manager = manager;
1704
+ }
1705
+ /**
1706
+ * Executes the tool to retrieve a list of pipeline jobs.
1707
+ * @param input - The input parameters, optionally including a status filter.
1708
+ * @returns A promise that resolves with the list of simplified job objects.
1709
+ * @throws {PipelineStateError} If the pipeline manager is somehow unavailable.
1710
+ */
1711
+ async execute(input) {
1712
+ const jobs = await this.manager.getJobs(input.status);
1713
+ const simplifiedJobs = jobs.map(
1714
+ (job) => ({
1715
+ id: job.id,
1716
+ library: job.library,
1717
+ version: job.version,
1718
+ status: job.status,
1719
+ createdAt: job.createdAt.toISOString(),
1720
+ startedAt: job.startedAt?.toISOString() ?? null,
1721
+ finishedAt: job.finishedAt?.toISOString() ?? null,
1722
+ error: job.error?.message ?? null
1723
+ })
1724
+ );
1725
+ return { jobs: simplifiedJobs };
1726
+ }
1727
+ };
1728
+
1679
1729
  // src/tools/RemoveTool.ts
1680
1730
  var RemoveToolInputSchema = {
1681
1731
  type: "object",
@@ -10774,92 +10824,9 @@ ${subsequentSiblings.map((d) => d.pageContent).join("\n\n")}`;
10774
10824
  };
10775
10825
 
10776
10826
  // src/store/DocumentStore.ts
10777
- import { OpenAIEmbeddings } from "@langchain/openai";
10778
10827
  import Database from "better-sqlite3";
10779
10828
  import * as sqliteVec from "sqlite-vec";
10780
10829
 
10781
- // src/store/errors.ts
10782
- var StoreError = class extends Error {
10783
- constructor(message, cause) {
10784
- super(cause ? `${message} caused by ${cause}` : message);
10785
- this.cause = cause;
10786
- this.name = this.constructor.name;
10787
- const causeError = cause instanceof Error ? cause : cause ? new Error(String(cause)) : void 0;
10788
- if (causeError?.stack) {
10789
- this.stack = causeError.stack;
10790
- }
10791
- }
10792
- };
10793
- var DimensionError = class extends StoreError {
10794
- constructor(modelName, modelDimension, dbDimension) {
10795
- super(
10796
- `Model "${modelName}" produces ${modelDimension}-dimensional vectors, which exceeds the database's fixed dimension of ${dbDimension}. Please use a model with dimension \u2264 ${dbDimension}.`
10797
- );
10798
- this.modelName = modelName;
10799
- this.modelDimension = modelDimension;
10800
- this.dbDimension = dbDimension;
10801
- }
10802
- };
10803
- var ConnectionError = class extends StoreError {
10804
- };
10805
-
10806
- // src/store/schema.ts
10807
- var createTablesSQL = `
10808
- -- Documents table
10809
- CREATE TABLE IF NOT EXISTS documents(
10810
- id INTEGER PRIMARY KEY AUTOINCREMENT,
10811
- library TEXT NOT NULL,
10812
- version TEXT NOT NULL DEFAULT '',
10813
- url TEXT NOT NULL,
10814
- content TEXT,
10815
- metadata JSON,
10816
- sort_order INTEGER NOT NULL,
10817
- UNIQUE(url, library, version, sort_order)
10818
- );
10819
-
10820
- -- Indexes
10821
- CREATE INDEX IF NOT EXISTS idx_documents_library_lower ON documents(lower(library));
10822
- CREATE INDEX IF NOT EXISTS idx_documents_version_lower ON documents(lower(library), lower(version));
10823
-
10824
- -- Create Embeddings virtual table
10825
- CREATE VIRTUAL TABLE IF NOT EXISTS documents_vec USING vec0(
10826
- library TEXT NOT NULL,
10827
- version TEXT NOT NULL,
10828
- embedding FLOAT[1536]
10829
- );
10830
-
10831
- -- Create FTS5 virtual table
10832
- CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
10833
- content,
10834
- title,
10835
- url,
10836
- path,
10837
- tokenize='porter unicode61',
10838
- content='documents',
10839
- content_rowid='id'
10840
- );
10841
-
10842
- -- Delete trigger to maintain FTS index
10843
- CREATE TRIGGER IF NOT EXISTS documents_fts_after_delete AFTER DELETE ON documents BEGIN
10844
- INSERT INTO documents_fts(documents_fts, rowid, content, title, url, path)
10845
- VALUES('delete', old.id, old.content, json_extract(old.metadata, '$.title'), old.url, json_extract(old.metadata, '$.path'));
10846
- END;
10847
-
10848
- -- Update trigger to maintain FTS index
10849
- CREATE TRIGGER IF NOT EXISTS documents_fts_after_update AFTER UPDATE ON documents BEGIN
10850
- INSERT INTO documents_fts(documents_fts, rowid, content, title, url, path)
10851
- VALUES('delete', old.id, old.content, json_extract(old.metadata, '$.title'), old.url, json_extract(old.metadata, '$.path'));
10852
- INSERT INTO documents_fts(rowid, content, title, url, path)
10853
- VALUES(new.id, new.content, json_extract(new.metadata, '$.title'), new.url, json_extract(new.metadata, '$.path'));
10854
- END;
10855
-
10856
- -- Insert trigger to maintain FTS index
10857
- CREATE TRIGGER IF NOT EXISTS documents_fts_after_insert AFTER INSERT ON documents BEGIN
10858
- INSERT INTO documents_fts(rowid, content, title, url, path)
10859
- VALUES(new.id, new.content, json_extract(new.metadata, '$.title'), new.url, json_extract(new.metadata, '$.path'));
10860
- END;
10861
- `;
10862
-
10863
10830
  // src/store/types.ts
10864
10831
  function mapDbDocumentToDocument(doc) {
10865
10832
  return {
@@ -10873,8 +10840,7 @@ function mapDbDocumentToDocument(doc) {
10873
10840
  var DocumentStore = class {
10874
10841
  db;
10875
10842
  embeddings;
10876
- dbDimension = 1536;
10877
- // Fixed dimension from schema.ts
10843
+ dbDimension = VECTOR_DIMENSION;
10878
10844
  modelDimension;
10879
10845
  statements;
10880
10846
  /**
@@ -11001,28 +10967,24 @@ var DocumentStore = class {
11001
10967
  /**
11002
10968
  * Initializes embeddings client using environment variables for configuration.
11003
10969
  *
11004
- * Supports:
11005
- * - OPENAI_API_KEY (handled automatically by LangChain)
11006
- * - OPENAI_ORG_ID (handled automatically by LangChain)
11007
- * - DOCS_MCP_EMBEDDING_MODEL (optional, defaults to "text-embedding-3-small")
11008
- * - OPENAI_API_BASE (optional)
10970
+ * The embedding model is configured using DOCS_MCP_EMBEDDING_MODEL environment variable.
10971
+ * Format: "provider:model_name" (e.g., "google:text-embedding-004") or just "model_name"
10972
+ * for OpenAI (default).
10973
+ *
10974
+ * Supported providers and their required environment variables:
10975
+ * - openai: OPENAI_API_KEY (and optionally OPENAI_API_BASE, OPENAI_ORG_ID)
10976
+ * - google: GOOGLE_APPLICATION_CREDENTIALS (path to service account JSON)
10977
+ * - aws: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION (or BEDROCK_AWS_REGION)
10978
+ * - microsoft: Azure OpenAI credentials (AZURE_OPENAI_API_*)
11009
10979
  */
11010
10980
  async initializeEmbeddings() {
11011
- const modelName = process.env.DOCS_MCP_EMBEDDING_MODEL || "text-embedding-3-small";
11012
- const baseURL = process.env.OPENAI_API_BASE;
11013
- const config = {
11014
- stripNewLines: true,
11015
- batchSize: 512,
11016
- modelName
11017
- };
11018
- if (baseURL) {
11019
- config.configuration = { baseURL };
11020
- }
11021
- this.embeddings = new OpenAIEmbeddings(config);
10981
+ const modelSpec = process.env.DOCS_MCP_EMBEDDING_MODEL || "text-embedding-3-small";
10982
+ const { createEmbeddingModel } = await import("./EmbeddingFactory-6UEXNF44.js");
10983
+ this.embeddings = createEmbeddingModel(modelSpec);
11022
10984
  const testVector = await this.embeddings.embedQuery("test");
11023
10985
  this.modelDimension = testVector.length;
11024
10986
  if (this.modelDimension > this.dbDimension) {
11025
- throw new DimensionError(modelName, this.modelDimension, this.dbDimension);
10987
+ throw new DimensionError(modelSpec, this.modelDimension, this.dbDimension);
11026
10988
  }
11027
10989
  }
11028
10990
  /**
@@ -11376,7 +11338,8 @@ var DocumentManagementService = class {
11376
11338
  dbPath = path3.join(dbDir, "documents.db");
11377
11339
  logger.debug(`\u{1F4BE} Using database directory from DOCS_MCP_STORE_PATH: ${dbDir}`);
11378
11340
  } else {
11379
- const oldDbDir = path3.join(process.cwd(), ".store");
11341
+ const projectRoot = path3.resolve(import.meta.dirname, "..");
11342
+ const oldDbDir = path3.join(projectRoot, ".store");
11380
11343
  const oldDbPath = path3.join(oldDbDir, "documents.db");
11381
11344
  const oldDbExists = existsSync(oldDbPath);
11382
11345
  if (oldDbExists) {
@@ -11605,17 +11568,21 @@ var DocumentManagementService = class {
11605
11568
  export {
11606
11569
  setLogLevel,
11607
11570
  logger,
11571
+ HttpFetcher,
11572
+ FileFetcher,
11573
+ HtmlProcessor,
11608
11574
  PipelineJobStatus,
11609
11575
  PipelineManager,
11576
+ CancelJobTool,
11610
11577
  VersionNotFoundError,
11611
- SearchTool,
11578
+ FetchUrlTool,
11612
11579
  FindVersionTool,
11613
- ListLibrariesTool,
11614
- ScrapeTool,
11615
- ListJobsTool,
11616
11580
  GetJobInfoTool,
11617
- CancelJobTool,
11581
+ ListJobsTool,
11582
+ ListLibrariesTool,
11618
11583
  RemoveTool,
11584
+ ScrapeTool,
11585
+ SearchTool,
11619
11586
  DocumentManagementService
11620
11587
  };
11621
- //# sourceMappingURL=chunk-S7C2LRQA.js.map
11588
+ //# sourceMappingURL=chunk-ADZQJG2M.js.map