@khoinguyen2002/doc-mcp 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/mcp-server.ts CHANGED
@@ -2,45 +2,28 @@
2
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { z } from "zod";
5
- import {
6
- listDriveFiles,
7
- readDriveDocument,
8
- } from "./tools/driveTools.js";
9
- import { saveAgentNote, searchKnowledge } from "./tools/knowledgeTools.js";
10
- import { config } from "./config.js";
11
-
12
- const DRIVE_FOLDER_ID = config.DOC_MCP_DRIVE_FOLDER_ID;
13
-
14
- if (!DRIVE_FOLDER_ID) {
15
- console.error(
16
- "Missing DOC_MCP_DRIVE_FOLDER_ID environment variable. The doc-agent requires a target folder ID.",
17
- );
18
- process.exit(1);
19
- }
5
+ import { listDriveFiles, readDriveDocument } from "./tools/driveTools.js";
6
+ import { saveAgentNote, searchKnowledge, searchExact } from "./tools/knowledgeTools.js";
20
7
 
21
8
  const server = new McpServer({
22
9
  name: "doc-agent",
23
- version: "1.0.0",
10
+ version: "1.2.0",
24
11
  });
25
12
 
26
- // Register tools
27
13
  server.registerTool(
28
14
  "list_drive_files",
29
15
  {
30
- description: "List and search for Google Drive documents and subfolders in a specific folder.",
16
+ description:
17
+ "List all Google Drive documents accessible to this agent. Returns file IDs, names, and types. Use keyword to filter by title.",
31
18
  inputSchema: {
32
19
  keyword: z
33
20
  .string()
34
21
  .optional()
35
- .describe("Optional keyword to search for in document titles"),
36
- targetFolderId: z
37
- .string()
38
- .optional()
39
- .describe("Optional Google Drive folder ID to list contents from. Defaults to the root knowledge folder."),
22
+ .describe("Optional keyword to filter documents by title"),
40
23
  },
41
24
  },
42
- async ({ keyword, targetFolderId }) => {
43
- const res = await listDriveFiles(keyword, targetFolderId);
25
+ async ({ keyword }) => {
26
+ const res = await listDriveFiles(keyword);
44
27
  if (!res.success) {
45
28
  return {
46
29
  content: [{ type: "text", text: `Error: ${res.error}` }],
@@ -50,18 +33,24 @@ server.registerTool(
50
33
  return {
51
34
  content: [{ type: "text", text: JSON.stringify(res.results, null, 2) }],
52
35
  };
53
- },
36
+ }
54
37
  );
55
38
 
56
39
  server.registerTool(
57
40
  "read_drive_document",
58
41
  {
59
42
  description:
60
- "Read the content of a specific Google Drive document. The document will also be automatically ingested into vector memory for future semantic search.",
43
+ "Read the Markdown content of a specific Google Drive document. Automatically syncs the latest version. Use 'offset' (from search_knowledge results) to navigate to a specific section, and 'limit' to control how much content to return.",
61
44
  inputSchema: {
62
45
  fileId: z.string().describe("The Google Drive file ID to read"),
63
- offset: z.number().optional().describe("Starting character index (default: 0)"),
64
- limit: z.number().optional().describe("Maximum number of characters to return (default: 10000)"),
46
+ offset: z
47
+ .number()
48
+ .optional()
49
+ .describe("Starting character index in the Markdown content (default: 0)"),
50
+ limit: z
51
+ .number()
52
+ .optional()
53
+ .describe("Maximum characters to return (default: 10000)"),
65
54
  },
66
55
  },
67
56
  async ({ fileId, offset, limit }) => {
@@ -75,19 +64,24 @@ server.registerTool(
75
64
  return {
76
65
  content: [{ type: "text", text: JSON.stringify(res.data, null, 2) }],
77
66
  };
78
- },
67
+ }
79
68
  );
80
69
 
81
70
  server.registerTool(
82
- "save_agent_note",
71
+ "search_knowledge",
83
72
  {
84
- description: "Save an agent note, thought, or summary directly into the vector memory.",
73
+ description:
74
+ "Semantic vector search across all accessible Google Drive documents. Automatically syncs latest document changes before searching. Returns relevant Markdown chunks with title and character offset.",
85
75
  inputSchema: {
86
- content: z.string().describe("The note or knowledge content to store"),
76
+ query: z.string().describe("The search query"),
77
+ topK: z
78
+ .number()
79
+ .optional()
80
+ .describe("Number of results to return (default: 3)"),
87
81
  },
88
82
  },
89
- async ({ content }) => {
90
- const res = await saveAgentNote(content);
83
+ async ({ query, topK }) => {
84
+ const res = await searchKnowledge(query, topK);
91
85
  if (!res.success) {
92
86
  return {
93
87
  content: [{ type: "text", text: `Error: ${res.error}` }],
@@ -95,26 +89,41 @@ server.registerTool(
95
89
  };
96
90
  }
97
91
  return {
98
- content: [{ type: "text", text: res.message || "Saved successfully" }],
92
+ content: [
93
+ {
94
+ type: "text",
95
+ text:
96
+ typeof res.results === "string"
97
+ ? res.results
98
+ : JSON.stringify(res.results),
99
+ },
100
+ ],
99
101
  };
100
- },
102
+ }
101
103
  );
102
104
 
103
105
  server.registerTool(
104
- "search_knowledge",
106
+ "search_exact",
105
107
  {
106
108
  description:
107
- "Search the folder's vector memory for relevant context or knowledge.",
109
+ "Exhaustive keyword search across all accessible Google Drive documents using full-text index. " +
110
+ "Unlike search_knowledge (semantic/vector), this finds EVERY chunk containing the exact term — " +
111
+ "ideal for specific identifiers: API paths (/v1/foo/bar), function names, config keys, error codes. " +
112
+ "Case-insensitive. Automatically syncs latest document changes before searching.",
108
113
  inputSchema: {
109
- query: z.string().describe("The search query"),
110
- topK: z
114
+ term: z
115
+ .string()
116
+ .describe(
117
+ "Exact term to search for (e.g. '/product-orchestrator/v1/products/filter', 'ServiceCode.mkp')"
118
+ ),
119
+ limit: z
111
120
  .number()
112
121
  .optional()
113
- .describe("Number of results to return (default: 3)"),
122
+ .describe("Max results to return (default: 50)"),
114
123
  },
115
124
  },
116
- async ({ query, topK }) => {
117
- const res = await searchKnowledge(query, topK);
125
+ async ({ term, limit }) => {
126
+ const res = await searchExact(term, limit);
118
127
  if (!res.success) {
119
128
  return {
120
129
  content: [{ type: "text", text: `Error: ${res.error}` }],
@@ -128,18 +137,17 @@ server.registerTool(
128
137
  text:
129
138
  typeof res.results === "string"
130
139
  ? res.results
131
- : JSON.stringify(res.results),
140
+ : JSON.stringify(res, null, 2),
132
141
  },
133
142
  ],
134
143
  };
135
- },
144
+ }
136
145
  );
137
146
 
138
- // Start the server
139
147
  async function run() {
140
148
  const transport = new StdioServerTransport();
141
149
  await server.connect(transport);
142
- console.error("doc-agent MCP server running on stdio");
150
+ console.error("doc-agent MCP server v1.2.0 running on stdio");
143
151
  }
144
152
 
145
153
  run().catch((error) => {
@@ -1,20 +1,15 @@
1
1
  import { google } from "googleapis";
2
- import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
3
2
  import { config } from "../config.js";
4
- import {
5
- upsertProjectDocument,
6
- getProjectDocumentMetadata,
7
- deleteProjectDocument,
8
- } from "../db/vector.js";
3
+ import { deletePointsByIds, getBlockPointId } from "../db/vector.js";
4
+ import { getAllSyncEntries, deleteSyncEntry } from "../db/syncState.js";
5
+ import { syncSingleDocument } from "./ingestFlow.js";
9
6
 
10
7
  function getDriveClient() {
11
8
  const clientEmail = config.DOC_MCP_GOOGLE_CLIENT_EMAIL;
12
9
  let privateKey = config.DOC_MCP_GOOGLE_PRIVATE_KEY;
13
10
 
14
11
  if (!clientEmail || !privateKey) {
15
- throw new Error(
16
- "Google Drive credentials not configured. Please set DOC_MCP_GOOGLE_CLIENT_EMAIL and DOC_MCP_GOOGLE_PRIVATE_KEY in .env",
17
- );
12
+ throw new Error("Google Drive credentials not configured.");
18
13
  }
19
14
 
20
15
  if (privateKey.startsWith('"') && privateKey.endsWith('"')) {
@@ -31,198 +26,146 @@ function getDriveClient() {
31
26
  return google.drive({ version: "v3", auth });
32
27
  }
33
28
 
34
- export async function listDriveFiles(keyword?: string, targetFolderId?: string) {
35
- const folderId = targetFolderId || config.DOC_MCP_DRIVE_FOLDER_ID;
36
- if (!folderId) {
37
- return {
38
- success: false,
39
- error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
40
- };
41
- }
42
-
29
+ /**
30
+ * List all Google Docs the Service Account can read.
31
+ * Optional keyword filter on document title.
32
+ */
33
+ export async function listDriveFiles(keyword?: string) {
43
34
  try {
44
35
  const drive = getDriveClient();
45
- let q = "(mimeType = 'application/vnd.google-apps.document' or mimeType = 'application/vnd.google-apps.folder') and trashed = false";
46
- q = `'${folderId}' in parents and ${q}`;
47
-
36
+ let q =
37
+ "mimeType = 'application/vnd.google-apps.document' and trashed = false";
48
38
  if (keyword) {
49
- q = `name contains '${keyword}' and ${q}`;
39
+ const safe = keyword.replace(/'/g, "\\'");
40
+ q = `name contains '${safe}' and ${q}`;
50
41
  }
51
42
 
52
- const res = await drive.files.list({
53
- q,
54
- fields: "files(id, name, description, mimeType)",
55
- spaces: "drive",
56
- pageSize: 50,
57
- supportsAllDrives: true,
58
- includeItemsFromAllDrives: true,
59
- });
60
-
61
- const files = res.data.files;
62
- if (!files || files.length === 0) {
63
- return { success: true, results: [] };
64
- }
43
+ const allFiles: any[] = [];
44
+ let pageToken: string | undefined;
45
+ do {
46
+ const res: any = await drive.files.list({
47
+ q,
48
+ fields: "nextPageToken, files(id, name, mimeType, modifiedTime)",
49
+ spaces: "drive",
50
+ pageSize: 100,
51
+ pageToken,
52
+ supportsAllDrives: true,
53
+ includeItemsFromAllDrives: true,
54
+ });
55
+ if (res.data.files) allFiles.push(...res.data.files);
56
+ pageToken = res.data.nextPageToken || undefined;
57
+ } while (pageToken);
65
58
 
66
- return { success: true, results: files };
59
+ return { success: true, results: allFiles };
67
60
  } catch (err: any) {
68
61
  return { success: false, error: err.message };
69
62
  }
70
63
  }
71
64
 
72
- export async function syncSingleDocument(fileId: string, folderId: string) {
73
- const drive = getDriveClient();
74
- const fileInfo = await drive.files.get({
75
- fileId,
76
- fields: "id, name, modifiedTime",
77
- supportsAllDrives: true,
78
- });
79
-
80
- const driveModifiedTime = fileInfo.data.modifiedTime || "";
81
- const dbMetaMap = await getProjectDocumentMetadata(folderId);
82
- const dbModifiedTime = dbMetaMap[fileId];
65
+ /**
66
+ * Sync all documents the SA can see:
67
+ * - New/changed files → syncSingleDocument()
68
+ * - Files removed from Drive → delete from Qdrant + Redis
69
+ */
70
+ export async function syncAllDocuments() {
71
+ try {
72
+ const drive = getDriveClient();
83
73
 
84
- if (!dbModifiedTime || dbModifiedTime !== driveModifiedTime) {
85
- if (dbModifiedTime) {
86
- await deleteProjectDocument(folderId, fileId);
74
+ // List all docs (paginated)
75
+ const allDocs: any[] = [];
76
+ let pageToken: string | undefined;
77
+ do {
78
+ const res: any = await drive.files.list({
79
+ q: "mimeType = 'application/vnd.google-apps.document' and trashed = false",
80
+ fields: "nextPageToken, files(id, name, modifiedTime)",
81
+ spaces: "drive",
82
+ pageSize: 100,
83
+ pageToken,
84
+ supportsAllDrives: true,
85
+ includeItemsFromAllDrives: true,
86
+ });
87
+ if (res.data.files) allDocs.push(...res.data.files);
88
+ pageToken = res.data.nextPageToken || undefined;
89
+ } while (pageToken);
90
+
91
+ // Get all Redis sync entries
92
+ const syncEntries = await getAllSyncEntries();
93
+
94
+ // Sync new or changed files
95
+ for (const file of allDocs) {
96
+ if (!file.id || !file.modifiedTime) continue;
97
+ const existing = syncEntries[file.id];
98
+ if (!existing || existing.modifiedTime !== file.modifiedTime) {
99
+ console.error(`[Sync] Detected change: "${file.name}"`);
100
+ await syncSingleDocument(
101
+ file.id,
102
+ file.modifiedTime,
103
+ file.name || "Untitled"
104
+ );
105
+ }
87
106
  }
88
107
 
89
- const res = await drive.files.export({
90
- fileId: fileId,
91
- mimeType: "text/plain",
92
- });
93
-
94
- const content = res.data;
95
- if (typeof content !== "string" || content.trim() === "") {
96
- throw new Error("Empty or invalid file content");
108
+ // Clean up files removed from Drive
109
+ const driveFileIds = new Set(allDocs.map((f) => f.id).filter(Boolean));
110
+ for (const [fileId, entry] of Object.entries(syncEntries)) {
111
+ if (!driveFileIds.has(fileId)) {
112
+ console.error(`[Sync] Removing deleted doc: "${entry.title}"`);
113
+ const pointIds = Array.from({ length: entry.blockCount }, (_, i) =>
114
+ getBlockPointId(fileId, i)
115
+ );
116
+ await deletePointsByIds(pointIds);
117
+ await deleteSyncEntry(fileId);
118
+ }
97
119
  }
98
120
 
99
- const splitter = new RecursiveCharacterTextSplitter({
100
- chunkSize: config.CHUNK_SIZE,
101
- chunkOverlap: config.CHUNK_OVERLAP,
102
- });
103
- const chunks = await splitter.splitText(content);
104
-
105
- for (const chunk of chunks) {
106
- await upsertProjectDocument(folderId, chunk, {
107
- title: fileInfo.data.name || "Untitled Google Doc",
108
- source: "google_drive",
109
- file_id: fileId,
110
- modified_time: driveModifiedTime,
111
- });
112
- }
113
- return { synced: true, content, driveModifiedTime };
121
+ return { success: true };
122
+ } catch (err: any) {
123
+ console.error("syncAllDocuments failed:", err.message);
124
+ return { success: false, error: err.message };
114
125
  }
115
-
116
- return { synced: false, driveModifiedTime };
117
126
  }
118
127
 
119
- export async function readDriveDocument(fileId: string, offset: number = 0, limit: number = 10000) {
120
- const folderId = config.DOC_MCP_DRIVE_FOLDER_ID;
121
- if (!folderId) {
122
- return {
123
- success: false,
124
- error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
125
- };
126
- }
127
-
128
+ /**
129
+ * Read a specific Google Drive document, triggering incremental sync first.
130
+ * Returns paginated Markdown content.
131
+ */
132
+ export async function readDriveDocument(
133
+ fileId: string,
134
+ offset: number = 0,
135
+ limit: number = 10000
136
+ ) {
128
137
  try {
129
- const result = await syncSingleDocument(fileId, folderId);
130
-
131
- // If not synced just now, we need to fetch content to return to the user
132
- let content = result.content;
133
- if (!content) {
134
- const drive = getDriveClient();
135
- const res = await drive.files.export({
136
- fileId: fileId,
137
- mimeType: "text/plain",
138
- });
139
- content = typeof res.data === "string" ? res.data : "";
140
- }
141
-
142
- let finalContent = content;
143
- const totalSize = finalContent ? finalContent.length : 0;
138
+ const drive = getDriveClient();
139
+ const fileInfo = await drive.files.get({
140
+ fileId,
141
+ fields: "id, name, modifiedTime",
142
+ supportsAllDrives: true,
143
+ });
144
144
 
145
- if (finalContent) {
146
- finalContent = finalContent.substring(offset, offset + limit);
147
- }
145
+ const modifiedTime = fileInfo.data.modifiedTime || "";
146
+ const title = fileInfo.data.name || "Untitled";
148
147
 
149
- const isTruncated = offset + (finalContent?.length || 0) < totalSize;
150
- let warning = undefined;
148
+ const result = await syncSingleDocument(fileId, modifiedTime, title);
149
+ const content = result.content;
150
+ const totalSize = content.length;
151
+ const sliced = content.substring(offset, offset + limit);
152
+ const isTruncated = offset + sliced.length < totalSize;
151
153
 
154
+ let finalContent = sliced;
155
+ let warning: string | undefined;
152
156
  if (isTruncated) {
153
- warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + finalContent!.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
157
+ warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + sliced.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
154
158
  finalContent += `\n\n${warning}`;
155
159
  }
156
160
 
157
161
  return {
158
162
  success: true,
159
163
  data: {
160
- content: finalContent || "Empty file",
161
- metadata: {
162
- totalSize,
163
- offset,
164
- limit,
165
- isTruncated,
166
- warning,
167
- },
164
+ content: finalContent || "Empty document",
165
+ metadata: { totalSize, offset, limit, isTruncated, warning },
168
166
  },
169
167
  };
170
168
  } catch (err: any) {
171
169
  return { success: false, error: err.message };
172
170
  }
173
171
  }
174
-
175
- export async function syncFolderState(folderId: string) {
176
- try {
177
- const drive = getDriveClient();
178
-
179
- async function getAllDocumentsFlat(): Promise<any[]> {
180
- let allDocs: any[] = [];
181
- let pageToken: string | undefined = undefined;
182
-
183
- do {
184
- const docsRes: any = await drive.files.list({
185
- // Chú ý: Đéo check parentId nữa, gom sạch sành sanh mọi file .doc mà Service Account nhìn thấy
186
- q: `mimeType = 'application/vnd.google-apps.document' and trashed = false`,
187
- fields: "nextPageToken, files(id, name, modifiedTime)",
188
- spaces: "drive",
189
- pageSize: 100, // Google API limit mỗi page, tự động nhảy trang nếu nhiều hơn
190
- pageToken,
191
- supportsAllDrives: true,
192
- includeItemsFromAllDrives: true,
193
- });
194
-
195
- if (docsRes.data.files) {
196
- allDocs = allDocs.concat(docsRes.data.files);
197
- }
198
- pageToken = docsRes.data.nextPageToken || undefined;
199
- } while (pageToken);
200
-
201
- return allDocs;
202
- }
203
-
204
- const driveFiles = await getAllDocumentsFlat();
205
- const dbMetaMap = await getProjectDocumentMetadata(folderId);
206
-
207
- // Sync updated or new files
208
- for (const file of driveFiles) {
209
- if (!file.id) continue;
210
- const dbModTime = dbMetaMap[file.id];
211
- if (!dbModTime || dbModTime !== file.modifiedTime) {
212
- await syncSingleDocument(file.id, folderId);
213
- }
214
- }
215
-
216
- // Delete removed files from DB
217
- for (const dbFileId of Object.keys(dbMetaMap)) {
218
- if (!driveFiles.find((f) => f.id === dbFileId)) {
219
- await deleteProjectDocument(folderId, dbFileId);
220
- }
221
- }
222
-
223
- return { success: true };
224
- } catch (err: any) {
225
- console.error("Auto-sync failed:", err.message);
226
- return { success: false, error: err.message };
227
- }
228
- }