@khoinguyen2002/doc-mcp 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/mcp-server.ts CHANGED
@@ -3,44 +3,27 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { z } from "zod";
5
5
  import { listDriveFiles, readDriveDocument } from "./tools/driveTools.js";
6
- import { saveAgentNote, searchKnowledge } from "./tools/knowledgeTools.js";
7
- import { config } from "./config.js";
8
-
9
- const DRIVE_FOLDER_ID = config.DOC_MCP_DRIVE_FOLDER_ID;
10
-
11
- if (!DRIVE_FOLDER_ID) {
12
- console.error(
13
- "Missing DOC_MCP_DRIVE_FOLDER_ID environment variable. The doc-agent requires a target folder ID.",
14
- );
15
- process.exit(1);
16
- }
6
+ import { saveAgentNote, searchKnowledge, searchExact } from "./tools/knowledgeTools.js";
17
7
 
18
8
  const server = new McpServer({
19
9
  name: "doc-agent",
20
- version: "1.0.4",
10
+ version: "1.2.0",
21
11
  });
22
12
 
23
- // Register tools
24
13
  server.registerTool(
25
14
  "list_drive_files",
26
15
  {
27
16
  description:
28
- "List and search for Google Drive documents and subfolders in a specific folder.",
17
+ "List all Google Drive documents accessible to this agent. Returns file IDs, names, and types. Use keyword to filter by title.",
29
18
  inputSchema: {
30
19
  keyword: z
31
20
  .string()
32
21
  .optional()
33
- .describe("Optional keyword to search for in document titles"),
34
- targetFolderId: z
35
- .string()
36
- .optional()
37
- .describe(
38
- "Optional Google Drive folder ID to list contents from. Defaults to the root knowledge folder.",
39
- ),
22
+ .describe("Optional keyword to filter documents by title"),
40
23
  },
41
24
  },
42
- async ({ keyword, targetFolderId }) => {
43
- const res = await listDriveFiles(keyword, targetFolderId);
25
+ async ({ keyword }) => {
26
+ const res = await listDriveFiles(keyword);
44
27
  if (!res.success) {
45
28
  return {
46
29
  content: [{ type: "text", text: `Error: ${res.error}` }],
@@ -50,24 +33,24 @@ server.registerTool(
50
33
  return {
51
34
  content: [{ type: "text", text: JSON.stringify(res.results, null, 2) }],
52
35
  };
53
- },
36
+ }
54
37
  );
55
38
 
56
39
  server.registerTool(
57
40
  "read_drive_document",
58
41
  {
59
42
  description:
60
- "Read the content of a specific Google Drive document. You can use the 'offset' parameter (obtained from search_knowledge) to read a specific chunk of text.",
43
+ "Read the Markdown content of a specific Google Drive document. Automatically syncs the latest version. Use 'offset' (from search_knowledge results) to navigate to a specific section, and 'limit' to control how much content to return.",
61
44
  inputSchema: {
62
45
  fileId: z.string().describe("The Google Drive file ID to read"),
63
46
  offset: z
64
47
  .number()
65
48
  .optional()
66
- .describe("Starting character index (default: 0)"),
49
+ .describe("Starting character index in the Markdown content (default: 0)"),
67
50
  limit: z
68
51
  .number()
69
52
  .optional()
70
- .describe("Maximum number of characters to return (default: 10000)"),
53
+ .describe("Maximum characters to return (default: 10000)"),
71
54
  },
72
55
  },
73
56
  async ({ fileId, offset, limit }) => {
@@ -81,16 +64,14 @@ server.registerTool(
81
64
  return {
82
65
  content: [{ type: "text", text: JSON.stringify(res.data, null, 2) }],
83
66
  };
84
- },
67
+ }
85
68
  );
86
69
 
87
-
88
-
89
70
  server.registerTool(
90
71
  "search_knowledge",
91
72
  {
92
73
  description:
93
- "Search the folder's vector memory for relevant context or knowledge. Returns structured JSON array of matching chunks.",
74
+ "Semantic vector search across all accessible Google Drive documents. Automatically syncs latest document changes before searching. Returns relevant Markdown chunks with title and character offset.",
94
75
  inputSchema: {
95
76
  query: z.string().describe("The search query"),
96
77
  topK: z
@@ -118,14 +99,55 @@ server.registerTool(
118
99
  },
119
100
  ],
120
101
  };
102
+ }
103
+ );
104
+
105
+ server.registerTool(
106
+ "search_exact",
107
+ {
108
+ description:
109
+ "Exhaustive keyword search across all accessible Google Drive documents using full-text index. " +
110
+ "Unlike search_knowledge (semantic/vector), this finds EVERY chunk containing the exact term — " +
111
+ "ideal for specific identifiers: API paths (/v1/foo/bar), function names, config keys, error codes. " +
112
+ "Case-insensitive. Automatically syncs latest document changes before searching.",
113
+ inputSchema: {
114
+ term: z
115
+ .string()
116
+ .describe(
117
+ "Exact term to search for (e.g. '/product-orchestrator/v1/products/filter', 'ServiceCode.mkp')"
118
+ ),
119
+ limit: z
120
+ .number()
121
+ .optional()
122
+ .describe("Max results to return (default: 50)"),
123
+ },
121
124
  },
125
+ async ({ term, limit }) => {
126
+ const res = await searchExact(term, limit);
127
+ if (!res.success) {
128
+ return {
129
+ content: [{ type: "text", text: `Error: ${res.error}` }],
130
+ isError: true,
131
+ };
132
+ }
133
+ return {
134
+ content: [
135
+ {
136
+ type: "text",
137
+ text:
138
+ typeof res.results === "string"
139
+ ? res.results
140
+ : JSON.stringify(res, null, 2),
141
+ },
142
+ ],
143
+ };
144
+ }
122
145
  );
123
146
 
124
- // Start the server
125
147
  async function run() {
126
148
  const transport = new StdioServerTransport();
127
149
  await server.connect(transport);
128
- console.error("doc-agent MCP server running on stdio");
150
+ console.error("doc-agent MCP server v1.2.0 running on stdio");
129
151
  }
130
152
 
131
153
  run().catch((error) => {
@@ -1,20 +1,15 @@
1
1
  import { google } from "googleapis";
2
- import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
3
2
  import { config } from "../config.js";
4
- import {
5
- upsertProjectDocument,
6
- getProjectDocumentMetadata,
7
- deleteProjectDocument,
8
- } from "../db/vector.js";
3
+ import { deletePointsByIds, getBlockPointId } from "../db/vector.js";
4
+ import { getAllSyncEntries, deleteSyncEntry } from "../db/syncState.js";
5
+ import { syncSingleDocument } from "./ingestFlow.js";
9
6
 
10
7
  function getDriveClient() {
11
8
  const clientEmail = config.DOC_MCP_GOOGLE_CLIENT_EMAIL;
12
9
  let privateKey = config.DOC_MCP_GOOGLE_PRIVATE_KEY;
13
10
 
14
11
  if (!clientEmail || !privateKey) {
15
- throw new Error(
16
- "Google Drive credentials not configured. Please set DOC_MCP_GOOGLE_CLIENT_EMAIL and DOC_MCP_GOOGLE_PRIVATE_KEY in .env",
17
- );
12
+ throw new Error("Google Drive credentials not configured.");
18
13
  }
19
14
 
20
15
  if (privateKey.startsWith('"') && privateKey.endsWith('"')) {
@@ -31,205 +26,146 @@ function getDriveClient() {
31
26
  return google.drive({ version: "v3", auth });
32
27
  }
33
28
 
34
- export async function listDriveFiles(keyword?: string, targetFolderId?: string) {
35
- const folderId = targetFolderId || config.DOC_MCP_DRIVE_FOLDER_ID;
36
- if (!folderId) {
37
- return {
38
- success: false,
39
- error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
40
- };
41
- }
42
-
29
+ /**
30
+ * List all Google Docs the Service Account can read.
31
+ * Optional keyword filter on document title.
32
+ */
33
+ export async function listDriveFiles(keyword?: string) {
43
34
  try {
44
35
  const drive = getDriveClient();
45
- let q = "(mimeType = 'application/vnd.google-apps.document' or mimeType = 'application/vnd.google-apps.folder') and trashed = false";
46
- q = `'${folderId}' in parents and ${q}`;
47
-
36
+ let q =
37
+ "mimeType = 'application/vnd.google-apps.document' and trashed = false";
48
38
  if (keyword) {
49
- q = `name contains '${keyword}' and ${q}`;
39
+ const safe = keyword.replace(/'/g, "\\'");
40
+ q = `name contains '${safe}' and ${q}`;
50
41
  }
51
42
 
52
- const res = await drive.files.list({
53
- q,
54
- fields: "files(id, name, description, mimeType)",
55
- spaces: "drive",
56
- pageSize: 50,
57
- supportsAllDrives: true,
58
- includeItemsFromAllDrives: true,
59
- });
60
-
61
- const files = res.data.files;
62
- if (!files || files.length === 0) {
63
- return { success: true, results: [] };
64
- }
43
+ const allFiles: any[] = [];
44
+ let pageToken: string | undefined;
45
+ do {
46
+ const res: any = await drive.files.list({
47
+ q,
48
+ fields: "nextPageToken, files(id, name, mimeType, modifiedTime)",
49
+ spaces: "drive",
50
+ pageSize: 100,
51
+ pageToken,
52
+ supportsAllDrives: true,
53
+ includeItemsFromAllDrives: true,
54
+ });
55
+ if (res.data.files) allFiles.push(...res.data.files);
56
+ pageToken = res.data.nextPageToken || undefined;
57
+ } while (pageToken);
65
58
 
66
- return { success: true, results: files };
59
+ return { success: true, results: allFiles };
67
60
  } catch (err: any) {
68
61
  return { success: false, error: err.message };
69
62
  }
70
63
  }
71
64
 
72
- export async function syncSingleDocument(fileId: string, folderId: string) {
73
- const drive = getDriveClient();
74
- const fileInfo = await drive.files.get({
75
- fileId,
76
- fields: "id, name, modifiedTime",
77
- supportsAllDrives: true,
78
- });
79
-
80
- const driveModifiedTime = fileInfo.data.modifiedTime || "";
81
- const dbMetaMap = await getProjectDocumentMetadata(folderId);
82
- const dbModifiedTime = dbMetaMap[fileId];
83
-
84
- if (!dbModifiedTime || dbModifiedTime !== driveModifiedTime) {
85
- if (dbModifiedTime) {
86
- await deleteProjectDocument(folderId, fileId);
87
- }
88
-
89
- const res = await drive.files.export({
90
- fileId: fileId,
91
- mimeType: "text/plain",
92
- });
65
+ /**
66
+ * Sync all documents the SA can see:
67
+ * - New/changed files → syncSingleDocument()
68
+ * - Files removed from Drive → delete from Qdrant + Redis
69
+ */
70
+ export async function syncAllDocuments() {
71
+ try {
72
+ const drive = getDriveClient();
93
73
 
94
- const content = res.data;
95
- if (typeof content !== "string" || content.trim() === "") {
96
- throw new Error("Empty or invalid file content");
74
+ // List all docs (paginated)
75
+ const allDocs: any[] = [];
76
+ let pageToken: string | undefined;
77
+ do {
78
+ const res: any = await drive.files.list({
79
+ q: "mimeType = 'application/vnd.google-apps.document' and trashed = false",
80
+ fields: "nextPageToken, files(id, name, modifiedTime)",
81
+ spaces: "drive",
82
+ pageSize: 100,
83
+ pageToken,
84
+ supportsAllDrives: true,
85
+ includeItemsFromAllDrives: true,
86
+ });
87
+ if (res.data.files) allDocs.push(...res.data.files);
88
+ pageToken = res.data.nextPageToken || undefined;
89
+ } while (pageToken);
90
+
91
+ // Get all Redis sync entries
92
+ const syncEntries = await getAllSyncEntries();
93
+
94
+ // Sync new or changed files
95
+ for (const file of allDocs) {
96
+ if (!file.id || !file.modifiedTime) continue;
97
+ const existing = syncEntries[file.id];
98
+ if (!existing || existing.modifiedTime !== file.modifiedTime) {
99
+ console.error(`[Sync] Detected change: "${file.name}"`);
100
+ await syncSingleDocument(
101
+ file.id,
102
+ file.modifiedTime,
103
+ file.name || "Untitled"
104
+ );
105
+ }
97
106
  }
98
107
 
99
- const splitter = new RecursiveCharacterTextSplitter({
100
- chunkSize: config.CHUNK_SIZE,
101
- chunkOverlap: config.CHUNK_OVERLAP,
102
- });
103
- const chunks = await splitter.splitText(content);
104
-
105
- let currentOffset = 0;
106
- for (const chunk of chunks) {
107
- const offset = content.indexOf(chunk, currentOffset);
108
- if (offset !== -1) {
109
- currentOffset = offset;
108
+ // Clean up files removed from Drive
109
+ const driveFileIds = new Set(allDocs.map((f) => f.id).filter(Boolean));
110
+ for (const [fileId, entry] of Object.entries(syncEntries)) {
111
+ if (!driveFileIds.has(fileId)) {
112
+ console.error(`[Sync] Removing deleted doc: "${entry.title}"`);
113
+ const pointIds = Array.from({ length: entry.blockCount }, (_, i) =>
114
+ getBlockPointId(fileId, i)
115
+ );
116
+ await deletePointsByIds(pointIds);
117
+ await deleteSyncEntry(fileId);
110
118
  }
111
-
112
- await upsertProjectDocument(folderId, chunk, {
113
- title: fileInfo.data.name || "Untitled Google Doc",
114
- source: "google_drive",
115
- file_id: fileId,
116
- modified_time: driveModifiedTime,
117
- offset: offset !== -1 ? offset : 0,
118
- });
119
119
  }
120
- return { synced: true, content, driveModifiedTime };
121
- }
122
120
 
123
- return { synced: false, driveModifiedTime };
124
- }
125
-
126
- export async function readDriveDocument(fileId: string, offset: number = 0, limit: number = 10000) {
127
- const folderId = config.DOC_MCP_DRIVE_FOLDER_ID;
128
- if (!folderId) {
129
- return {
130
- success: false,
131
- error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
132
- };
121
+ return { success: true };
122
+ } catch (err: any) {
123
+ console.error("syncAllDocuments failed:", err.message);
124
+ return { success: false, error: err.message };
133
125
  }
126
+ }
134
127
 
128
+ /**
129
+ * Read a specific Google Drive document, triggering incremental sync first.
130
+ * Returns paginated Markdown content.
131
+ */
132
+ export async function readDriveDocument(
133
+ fileId: string,
134
+ offset: number = 0,
135
+ limit: number = 10000
136
+ ) {
135
137
  try {
136
- const result = await syncSingleDocument(fileId, folderId);
137
-
138
- // If not synced just now, we need to fetch content to return to the user
139
- let content = result.content;
140
- if (!content) {
141
- const drive = getDriveClient();
142
- const res = await drive.files.export({
143
- fileId: fileId,
144
- mimeType: "text/plain",
145
- });
146
- content = typeof res.data === "string" ? res.data : "";
147
- }
148
-
149
- let finalContent = content;
150
- const totalSize = finalContent ? finalContent.length : 0;
138
+ const drive = getDriveClient();
139
+ const fileInfo = await drive.files.get({
140
+ fileId,
141
+ fields: "id, name, modifiedTime",
142
+ supportsAllDrives: true,
143
+ });
151
144
 
152
- if (finalContent) {
153
- finalContent = finalContent.substring(offset, offset + limit);
154
- }
145
+ const modifiedTime = fileInfo.data.modifiedTime || "";
146
+ const title = fileInfo.data.name || "Untitled";
155
147
 
156
- const isTruncated = offset + (finalContent?.length || 0) < totalSize;
157
- let warning = undefined;
148
+ const result = await syncSingleDocument(fileId, modifiedTime, title);
149
+ const content = result.content;
150
+ const totalSize = content.length;
151
+ const sliced = content.substring(offset, offset + limit);
152
+ const isTruncated = offset + sliced.length < totalSize;
158
153
 
154
+ let finalContent = sliced;
155
+ let warning: string | undefined;
159
156
  if (isTruncated) {
160
- warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + finalContent!.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
157
+ warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + sliced.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
161
158
  finalContent += `\n\n${warning}`;
162
159
  }
163
160
 
164
161
  return {
165
162
  success: true,
166
163
  data: {
167
- content: finalContent || "Empty file",
168
- metadata: {
169
- totalSize,
170
- offset,
171
- limit,
172
- isTruncated,
173
- warning,
174
- },
164
+ content: finalContent || "Empty document",
165
+ metadata: { totalSize, offset, limit, isTruncated, warning },
175
166
  },
176
167
  };
177
168
  } catch (err: any) {
178
169
  return { success: false, error: err.message };
179
170
  }
180
171
  }
181
-
182
- export async function syncFolderState(folderId: string) {
183
- try {
184
- const drive = getDriveClient();
185
-
186
- async function getAllDocumentsFlat(): Promise<any[]> {
187
- let allDocs: any[] = [];
188
- let pageToken: string | undefined = undefined;
189
-
190
- do {
191
- const docsRes: any = await drive.files.list({
192
- // Chú ý: Đéo check parentId nữa, gom sạch sành sanh mọi file .doc mà Service Account nhìn thấy
193
- q: `mimeType = 'application/vnd.google-apps.document' and trashed = false`,
194
- fields: "nextPageToken, files(id, name, modifiedTime)",
195
- spaces: "drive",
196
- pageSize: 100, // Google API limit mỗi page, tự động nhảy trang nếu nhiều hơn
197
- pageToken,
198
- supportsAllDrives: true,
199
- includeItemsFromAllDrives: true,
200
- });
201
-
202
- if (docsRes.data.files) {
203
- allDocs = allDocs.concat(docsRes.data.files);
204
- }
205
- pageToken = docsRes.data.nextPageToken || undefined;
206
- } while (pageToken);
207
-
208
- return allDocs;
209
- }
210
-
211
- const driveFiles = await getAllDocumentsFlat();
212
- const dbMetaMap = await getProjectDocumentMetadata(folderId);
213
-
214
- // Sync updated or new files
215
- for (const file of driveFiles) {
216
- if (!file.id) continue;
217
- const dbModTime = dbMetaMap[file.id];
218
- if (!dbModTime || dbModTime !== file.modifiedTime) {
219
- await syncSingleDocument(file.id, folderId);
220
- }
221
- }
222
-
223
- // Delete removed files from DB
224
- for (const dbFileId of Object.keys(dbMetaMap)) {
225
- if (!driveFiles.find((f) => f.id === dbFileId)) {
226
- await deleteProjectDocument(folderId, dbFileId);
227
- }
228
- }
229
-
230
- return { success: true };
231
- } catch (err: any) {
232
- console.error("Auto-sync failed:", err.message);
233
- return { success: false, error: err.message };
234
- }
235
- }