npm - @khoinguyen2002/doc-mcp - Versions diffs - 1.0.4 → 1.0.5 - Mend

@khoinguyen2002/doc-mcp 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/dist/config.d.ts +6 -4
package/dist/config.d.ts.map +1 -1
package/dist/config.js +22 -7
package/dist/db/rateLimiter.d.ts +6 -0
package/dist/db/rateLimiter.d.ts.map +1 -0
package/dist/db/rateLimiter.js +20 -0
package/dist/db/syncState.d.ts +12 -0
package/dist/db/syncState.d.ts.map +1 -0
package/dist/db/syncState.js +69 -0
package/dist/db/vector.d.ts +61 -6
package/dist/db/vector.d.ts.map +1 -1
package/dist/db/vector.js +249 -109
package/dist/mcp-server.js +44 -23
package/dist/tools/driveTools.d.ts +20 -16
package/dist/tools/driveTools.d.ts.map +1 -1
package/dist/tools/driveTools.js +100 -149
package/dist/tools/ingestFlow.d.ts +8 -0
package/dist/tools/ingestFlow.d.ts.map +1 -0
package/dist/tools/ingestFlow.js +407 -0
package/dist/tools/knowledgeTools.d.ts +25 -6
package/dist/tools/knowledgeTools.d.ts.map +1 -1
package/dist/tools/knowledgeTools.js +29 -40
package/package.json +8 -1
package/src/config.ts +28 -9
package/src/db/rateLimiter.ts +25 -0
package/src/db/syncState.ts +87 -0
package/src/db/vector.ts +305 -115
package/src/mcp-server.ts +55 -33
package/src/tools/driveTools.ts +111 -175
package/src/tools/ingestFlow.ts +508 -0
package/src/tools/knowledgeTools.ts +34 -38
package/src/types/turndown-plugin-gfm.d.ts +8 -0

package/src/mcp-server.ts CHANGED Viewed

@@ -3,44 +3,27 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { z } from "zod";
 import { listDriveFiles, readDriveDocument } from "./tools/driveTools.js";
-import { saveAgentNote, searchKnowledge } from "./tools/knowledgeTools.js";
-import { config } from "./config.js";
-const DRIVE_FOLDER_ID = config.DOC_MCP_DRIVE_FOLDER_ID;
-if (!DRIVE_FOLDER_ID) {
-  console.error(
-    "Missing DOC_MCP_DRIVE_FOLDER_ID environment variable. The doc-agent requires a target folder ID.",
-  );
-  process.exit(1);
-}
+import { saveAgentNote, searchKnowledge, searchExact } from "./tools/knowledgeTools.js";
 const server = new McpServer({
   name: "doc-agent",
-  version: "1.0.4",
+  version: "1.2.0",
 });
-// Register tools
 server.registerTool(
   "list_drive_files",
   {
     description:
-      "List and search for Google Drive documents and subfolders in a specific folder.",
+      "List all Google Drive documents accessible to this agent. Returns file IDs, names, and types. Use keyword to filter by title.",
     inputSchema: {
       keyword: z
         .string()
         .optional()
-        .describe("Optional keyword to search for in document titles"),
-      targetFolderId: z
-        .string()
-        .optional()
-        .describe(
-          "Optional Google Drive folder ID to list contents from. Defaults to the root knowledge folder.",
-        ),
+        .describe("Optional keyword to filter documents by title"),
     },
   },
-  async ({ keyword, targetFolderId }) => {
-    const res = await listDriveFiles(keyword, targetFolderId);
+  async ({ keyword }) => {
+    const res = await listDriveFiles(keyword);
     if (!res.success) {
       return {
         content: [{ type: "text", text: `Error: ${res.error}` }],
@@ -50,24 +33,24 @@ server.registerTool(
     return {
       content: [{ type: "text", text: JSON.stringify(res.results, null, 2) }],
     };
-  },
+  }
 );
 server.registerTool(
   "read_drive_document",
   {
     description:
-      "Read the content of a specific Google Drive document. You can use the 'offset' parameter (obtained from search_knowledge) to read a specific chunk of text.",
+      "Read the Markdown content of a specific Google Drive document. Automatically syncs the latest version. Use 'offset' (from search_knowledge results) to navigate to a specific section, and 'limit' to control how much content to return.",
     inputSchema: {
       fileId: z.string().describe("The Google Drive file ID to read"),
       offset: z
         .number()
         .optional()
-        .describe("Starting character index (default: 0)"),
+        .describe("Starting character index in the Markdown content (default: 0)"),
       limit: z
         .number()
         .optional()
-        .describe("Maximum number of characters to return (default: 10000)"),
+        .describe("Maximum characters to return (default: 10000)"),
     },
   },
   async ({ fileId, offset, limit }) => {
@@ -81,16 +64,14 @@ server.registerTool(
     return {
       content: [{ type: "text", text: JSON.stringify(res.data, null, 2) }],
     };
-  },
+  }
 );
 server.registerTool(
   "search_knowledge",
   {
     description:
-      "Search the folder's vector memory for relevant context or knowledge. Returns structured JSON array of matching chunks.",
+      "Semantic vector search across all accessible Google Drive documents. Automatically syncs latest document changes before searching. Returns relevant Markdown chunks with title and character offset.",
     inputSchema: {
       query: z.string().describe("The search query"),
       topK: z
@@ -118,14 +99,55 @@ server.registerTool(
         },
       ],
     };
+  }
+);
+server.registerTool(
+  "search_exact",
+  {
+    description:
+      "Exhaustive keyword search across all accessible Google Drive documents using full-text index. " +
+      "Unlike search_knowledge (semantic/vector), this finds EVERY chunk containing the exact term — " +
+      "ideal for specific identifiers: API paths (/v1/foo/bar), function names, config keys, error codes. " +
+      "Case-insensitive. Automatically syncs latest document changes before searching.",
+    inputSchema: {
+      term: z
+        .string()
+        .describe(
+          "Exact term to search for (e.g. '/product-orchestrator/v1/products/filter', 'ServiceCode.mkp')"
+        ),
+      limit: z
+        .number()
+        .optional()
+        .describe("Max results to return (default: 50)"),
+    },
   },
+  async ({ term, limit }) => {
+    const res = await searchExact(term, limit);
+    if (!res.success) {
+      return {
+        content: [{ type: "text", text: `Error: ${res.error}` }],
+        isError: true,
+      };
+    }
+    return {
+      content: [
+        {
+          type: "text",
+          text:
+            typeof res.results === "string"
+              ? res.results
+              : JSON.stringify(res, null, 2),
+        },
+      ],
+    };
+  }
 );
-// Start the server
 async function run() {
   const transport = new StdioServerTransport();
   await server.connect(transport);
-  console.error("doc-agent MCP server running on stdio");
+  console.error("doc-agent MCP server v1.2.0 running on stdio");
 }
 run().catch((error) => {

package/src/tools/driveTools.ts CHANGED Viewed

@@ -1,20 +1,15 @@
 import { google } from "googleapis";
-import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
 import { config } from "../config.js";
-import {
-  upsertProjectDocument,
-  getProjectDocumentMetadata,
-  deleteProjectDocument,
-} from "../db/vector.js";
+import { deletePointsByIds, getBlockPointId } from "../db/vector.js";
+import { getAllSyncEntries, deleteSyncEntry } from "../db/syncState.js";
+import { syncSingleDocument } from "./ingestFlow.js";
 function getDriveClient() {
   const clientEmail = config.DOC_MCP_GOOGLE_CLIENT_EMAIL;
   let privateKey = config.DOC_MCP_GOOGLE_PRIVATE_KEY;
   if (!clientEmail || !privateKey) {
-    throw new Error(
-      "Google Drive credentials not configured. Please set DOC_MCP_GOOGLE_CLIENT_EMAIL and DOC_MCP_GOOGLE_PRIVATE_KEY in .env",
-    );
+    throw new Error("Google Drive credentials not configured.");
   }
   if (privateKey.startsWith('"') && privateKey.endsWith('"')) {
@@ -31,205 +26,146 @@ function getDriveClient() {
   return google.drive({ version: "v3", auth });
 }
-export async function listDriveFiles(keyword?: string, targetFolderId?: string) {
-  const folderId = targetFolderId || config.DOC_MCP_DRIVE_FOLDER_ID;
-  if (!folderId) {
-    return {
-      success: false,
-      error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
-    };
-  }
+/**
+ * List all Google Docs the Service Account can read.
+ * Optional keyword filter on document title.
+ */
+export async function listDriveFiles(keyword?: string) {
   try {
     const drive = getDriveClient();
-    let q = "(mimeType = 'application/vnd.google-apps.document' or mimeType = 'application/vnd.google-apps.folder') and trashed = false";
-    q = `'${folderId}' in parents and ${q}`;
+    let q =
+      "mimeType = 'application/vnd.google-apps.document' and trashed = false";
     if (keyword) {
-      q = `name contains '${keyword}' and ${q}`;
+      const safe = keyword.replace(/'/g, "\\'");
+      q = `name contains '${safe}' and ${q}`;
     }
-    const res = await drive.files.list({
-      q,
-      fields: "files(id, name, description, mimeType)",
-      spaces: "drive",
-      pageSize: 50,
-      supportsAllDrives: true,
-      includeItemsFromAllDrives: true,
-    });
-    const files = res.data.files;
-    if (!files || files.length === 0) {
-      return { success: true, results: [] };
-    }
+    const allFiles: any[] = [];
+    let pageToken: string | undefined;
+    do {
+      const res: any = await drive.files.list({
+        q,
+        fields: "nextPageToken, files(id, name, mimeType, modifiedTime)",
+        spaces: "drive",
+        pageSize: 100,
+        pageToken,
+        supportsAllDrives: true,
+        includeItemsFromAllDrives: true,
+      });
+      if (res.data.files) allFiles.push(...res.data.files);
+      pageToken = res.data.nextPageToken || undefined;
+    } while (pageToken);
-    return { success: true, results: files };
+    return { success: true, results: allFiles };
   } catch (err: any) {
     return { success: false, error: err.message };
   }
 }
-export async function syncSingleDocument(fileId: string, folderId: string) {
-  const drive = getDriveClient();
-  const fileInfo = await drive.files.get({
-    fileId,
-    fields: "id, name, modifiedTime",
-    supportsAllDrives: true,
-  });
-  const driveModifiedTime = fileInfo.data.modifiedTime || "";
-  const dbMetaMap = await getProjectDocumentMetadata(folderId);
-  const dbModifiedTime = dbMetaMap[fileId];
-  if (!dbModifiedTime || dbModifiedTime !== driveModifiedTime) {
-    if (dbModifiedTime) {
-      await deleteProjectDocument(folderId, fileId);
-    }
-    const res = await drive.files.export({
-      fileId: fileId,
-      mimeType: "text/plain",
-    });
+/**
+ * Sync all documents the SA can see:
+ * - New/changed files → syncSingleDocument()
+ * - Files removed from Drive → delete from Qdrant + Redis
+ */
+export async function syncAllDocuments() {
+  try {
+    const drive = getDriveClient();
-    const content = res.data;
-    if (typeof content !== "string" || content.trim() === "") {
-      throw new Error("Empty or invalid file content");
+    // List all docs (paginated)
+    const allDocs: any[] = [];
+    let pageToken: string | undefined;
+    do {
+      const res: any = await drive.files.list({
+        q: "mimeType = 'application/vnd.google-apps.document' and trashed = false",
+        fields: "nextPageToken, files(id, name, modifiedTime)",
+        spaces: "drive",
+        pageSize: 100,
+        pageToken,
+        supportsAllDrives: true,
+        includeItemsFromAllDrives: true,
+      });
+      if (res.data.files) allDocs.push(...res.data.files);
+      pageToken = res.data.nextPageToken || undefined;
+    } while (pageToken);
+    // Get all Redis sync entries
+    const syncEntries = await getAllSyncEntries();
+    // Sync new or changed files
+    for (const file of allDocs) {
+      if (!file.id || !file.modifiedTime) continue;
+      const existing = syncEntries[file.id];
+      if (!existing || existing.modifiedTime !== file.modifiedTime) {
+        console.error(`[Sync] Detected change: "${file.name}"`);
+        await syncSingleDocument(
+          file.id,
+          file.modifiedTime,
+          file.name || "Untitled"
+        );
+      }
     }
-    const splitter = new RecursiveCharacterTextSplitter({
-      chunkSize: config.CHUNK_SIZE,
-      chunkOverlap: config.CHUNK_OVERLAP,
-    });
-    const chunks = await splitter.splitText(content);
-    let currentOffset = 0;
-    for (const chunk of chunks) {
-      const offset = content.indexOf(chunk, currentOffset);
-      if (offset !== -1) {
-        currentOffset = offset;
+    // Clean up files removed from Drive
+    const driveFileIds = new Set(allDocs.map((f) => f.id).filter(Boolean));
+    for (const [fileId, entry] of Object.entries(syncEntries)) {
+      if (!driveFileIds.has(fileId)) {
+        console.error(`[Sync] Removing deleted doc: "${entry.title}"`);
+        const pointIds = Array.from({ length: entry.blockCount }, (_, i) =>
+          getBlockPointId(fileId, i)
+        );
+        await deletePointsByIds(pointIds);
+        await deleteSyncEntry(fileId);
       }
-      await upsertProjectDocument(folderId, chunk, {
-        title: fileInfo.data.name || "Untitled Google Doc",
-        source: "google_drive",
-        file_id: fileId,
-        modified_time: driveModifiedTime,
-        offset: offset !== -1 ? offset : 0,
-      });
     }
-    return { synced: true, content, driveModifiedTime };
-  }
-  return { synced: false, driveModifiedTime };
-}
-export async function readDriveDocument(fileId: string, offset: number = 0, limit: number = 10000) {
-  const folderId = config.DOC_MCP_DRIVE_FOLDER_ID;
-  if (!folderId) {
-    return {
-      success: false,
-      error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
-    };
+    return { success: true };
+  } catch (err: any) {
+    console.error("syncAllDocuments failed:", err.message);
+    return { success: false, error: err.message };
   }
+}
+/**
+ * Read a specific Google Drive document, triggering incremental sync first.
+ * Returns paginated Markdown content.
+ */
+export async function readDriveDocument(
+  fileId: string,
+  offset: number = 0,
+  limit: number = 10000
+) {
   try {
-    const result = await syncSingleDocument(fileId, folderId);
-    // If not synced just now, we need to fetch content to return to the user
-    let content = result.content;
-    if (!content) {
-      const drive = getDriveClient();
-      const res = await drive.files.export({
-        fileId: fileId,
-        mimeType: "text/plain",
-      });
-      content = typeof res.data === "string" ? res.data : "";
-    }
-    let finalContent = content;
-    const totalSize = finalContent ? finalContent.length : 0;
+    const drive = getDriveClient();
+    const fileInfo = await drive.files.get({
+      fileId,
+      fields: "id, name, modifiedTime",
+      supportsAllDrives: true,
+    });
-    if (finalContent) {
-      finalContent = finalContent.substring(offset, offset + limit);
-    }
+    const modifiedTime = fileInfo.data.modifiedTime || "";
+    const title = fileInfo.data.name || "Untitled";
-    const isTruncated = offset + (finalContent?.length || 0) < totalSize;
-    let warning = undefined;
+    const result = await syncSingleDocument(fileId, modifiedTime, title);
+    const content = result.content;
+    const totalSize = content.length;
+    const sliced = content.substring(offset, offset + limit);
+    const isTruncated = offset + sliced.length < totalSize;
+    let finalContent = sliced;
+    let warning: string | undefined;
     if (isTruncated) {
-      warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + finalContent!.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
+      warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + sliced.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
       finalContent += `\n\n${warning}`;
     }
     return {
       success: true,
       data: {
-        content: finalContent || "Empty file",
-        metadata: {
-          totalSize,
-          offset,
-          limit,
-          isTruncated,
-          warning,
-        },
+        content: finalContent || "Empty document",
+        metadata: { totalSize, offset, limit, isTruncated, warning },
       },
     };
   } catch (err: any) {
     return { success: false, error: err.message };
   }
 }
-export async function syncFolderState(folderId: string) {
-  try {
-    const drive = getDriveClient();
-    async function getAllDocumentsFlat(): Promise<any[]> {
-      let allDocs: any[] = [];
-      let pageToken: string | undefined = undefined;
-      do {
-        const docsRes: any = await drive.files.list({
-          // Chú ý: Đéo check parentId nữa, gom sạch sành sanh mọi file .doc mà Service Account nhìn thấy
-          q: `mimeType = 'application/vnd.google-apps.document' and trashed = false`,
-          fields: "nextPageToken, files(id, name, modifiedTime)",
-          spaces: "drive",
-          pageSize: 100, // Google API limit mỗi page, tự động nhảy trang nếu nhiều hơn
-          pageToken,
-          supportsAllDrives: true,
-          includeItemsFromAllDrives: true,
-        });
-        if (docsRes.data.files) {
-          allDocs = allDocs.concat(docsRes.data.files);
-        }
-        pageToken = docsRes.data.nextPageToken || undefined;
-      } while (pageToken);
-      return allDocs;
-    }
-    const driveFiles = await getAllDocumentsFlat();
-    const dbMetaMap = await getProjectDocumentMetadata(folderId);
-    // Sync updated or new files
-    for (const file of driveFiles) {
-      if (!file.id) continue;
-      const dbModTime = dbMetaMap[file.id];
-      if (!dbModTime || dbModTime !== file.modifiedTime) {
-        await syncSingleDocument(file.id, folderId);
-      }
-    }
-    // Delete removed files from DB
-    for (const dbFileId of Object.keys(dbMetaMap)) {
-      if (!driveFiles.find((f) => f.id === dbFileId)) {
-        await deleteProjectDocument(folderId, dbFileId);
-      }
-    }
-    return { success: true };
-  } catch (err: any) {
-    console.error("Auto-sync failed:", err.message);
-    return { success: false, error: err.message };
-  }
-}