@khoinguyen2002/doc-mcp 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +6 -4
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +22 -7
- package/dist/db/rateLimiter.d.ts +6 -0
- package/dist/db/rateLimiter.d.ts.map +1 -0
- package/dist/db/rateLimiter.js +20 -0
- package/dist/db/syncState.d.ts +12 -0
- package/dist/db/syncState.d.ts.map +1 -0
- package/dist/db/syncState.js +69 -0
- package/dist/db/vector.d.ts +61 -6
- package/dist/db/vector.d.ts.map +1 -1
- package/dist/db/vector.js +249 -109
- package/dist/mcp-server.js +47 -37
- package/dist/tools/driveTools.d.ts +20 -16
- package/dist/tools/driveTools.d.ts.map +1 -1
- package/dist/tools/driveTools.js +101 -144
- package/dist/tools/ingestFlow.d.ts +8 -0
- package/dist/tools/ingestFlow.d.ts.map +1 -0
- package/dist/tools/ingestFlow.js +407 -0
- package/dist/tools/knowledgeTools.d.ts +32 -4
- package/dist/tools/knowledgeTools.d.ts.map +1 -1
- package/dist/tools/knowledgeTools.js +29 -34
- package/package.json +8 -1
- package/src/config.ts +28 -9
- package/src/db/rateLimiter.ts +25 -0
- package/src/db/syncState.ts +87 -0
- package/src/db/vector.ts +305 -115
- package/src/mcp-server.ts +56 -48
- package/src/tools/driveTools.ts +111 -168
- package/src/tools/ingestFlow.ts +508 -0
- package/src/tools/knowledgeTools.ts +34 -33
- package/src/types/turndown-plugin-gfm.d.ts +8 -0
package/dist/tools/driveTools.js
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import { google } from "googleapis";
|
|
2
|
-
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
|
|
3
2
|
import { config } from "../config.js";
|
|
4
|
-
import {
|
|
3
|
+
import { deletePointsByIds, getBlockPointId } from "../db/vector.js";
|
|
4
|
+
import { getAllSyncEntries, deleteSyncEntry } from "../db/syncState.js";
|
|
5
|
+
import { syncSingleDocument } from "./ingestFlow.js";
|
|
5
6
|
function getDriveClient() {
|
|
6
7
|
const clientEmail = config.DOC_MCP_GOOGLE_CLIENT_EMAIL;
|
|
7
8
|
let privateKey = config.DOC_MCP_GOOGLE_PRIVATE_KEY;
|
|
8
9
|
if (!clientEmail || !privateKey) {
|
|
9
|
-
throw new Error("Google Drive credentials not configured.
|
|
10
|
+
throw new Error("Google Drive credentials not configured.");
|
|
10
11
|
}
|
|
11
12
|
if (privateKey.startsWith('"') && privateKey.endsWith('"')) {
|
|
12
13
|
privateKey = privateKey.slice(1, -1);
|
|
@@ -19,120 +20,124 @@ function getDriveClient() {
|
|
|
19
20
|
});
|
|
20
21
|
return google.drive({ version: "v3", auth });
|
|
21
22
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
|
|
28
|
-
};
|
|
29
|
-
}
|
|
23
|
+
/**
|
|
24
|
+
* List all Google Docs the Service Account can read.
|
|
25
|
+
* Optional keyword filter on document title.
|
|
26
|
+
*/
|
|
27
|
+
export async function listDriveFiles(keyword) {
|
|
30
28
|
try {
|
|
31
29
|
const drive = getDriveClient();
|
|
32
|
-
let q = "
|
|
33
|
-
q = `'${folderId}' in parents and ${q}`;
|
|
30
|
+
let q = "mimeType = 'application/vnd.google-apps.document' and trashed = false";
|
|
34
31
|
if (keyword) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
const res = await drive.files.list({
|
|
38
|
-
q,
|
|
39
|
-
fields: "files(id, name, description, mimeType)",
|
|
40
|
-
spaces: "drive",
|
|
41
|
-
pageSize: 50,
|
|
42
|
-
supportsAllDrives: true,
|
|
43
|
-
includeItemsFromAllDrives: true,
|
|
44
|
-
});
|
|
45
|
-
const files = res.data.files;
|
|
46
|
-
if (!files || files.length === 0) {
|
|
47
|
-
return { success: true, results: [] };
|
|
32
|
+
const safe = keyword.replace(/'/g, "\\'");
|
|
33
|
+
q = `name contains '${safe}' and ${q}`;
|
|
48
34
|
}
|
|
49
|
-
|
|
35
|
+
const allFiles = [];
|
|
36
|
+
let pageToken;
|
|
37
|
+
do {
|
|
38
|
+
const res = await drive.files.list({
|
|
39
|
+
q,
|
|
40
|
+
fields: "nextPageToken, files(id, name, mimeType, modifiedTime)",
|
|
41
|
+
spaces: "drive",
|
|
42
|
+
pageSize: 100,
|
|
43
|
+
pageToken,
|
|
44
|
+
supportsAllDrives: true,
|
|
45
|
+
includeItemsFromAllDrives: true,
|
|
46
|
+
});
|
|
47
|
+
if (res.data.files)
|
|
48
|
+
allFiles.push(...res.data.files);
|
|
49
|
+
pageToken = res.data.nextPageToken || undefined;
|
|
50
|
+
} while (pageToken);
|
|
51
|
+
return { success: true, results: allFiles };
|
|
50
52
|
}
|
|
51
53
|
catch (err) {
|
|
52
54
|
return { success: false, error: err.message };
|
|
53
55
|
}
|
|
54
56
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
await
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
throw new Error("Empty or invalid file content");
|
|
76
|
-
}
|
|
77
|
-
const splitter = new RecursiveCharacterTextSplitter({
|
|
78
|
-
chunkSize: config.CHUNK_SIZE,
|
|
79
|
-
chunkOverlap: config.CHUNK_OVERLAP,
|
|
80
|
-
});
|
|
81
|
-
const chunks = await splitter.splitText(content);
|
|
82
|
-
for (const chunk of chunks) {
|
|
83
|
-
await upsertProjectDocument(folderId, chunk, {
|
|
84
|
-
title: fileInfo.data.name || "Untitled Google Doc",
|
|
85
|
-
source: "google_drive",
|
|
86
|
-
file_id: fileId,
|
|
87
|
-
modified_time: driveModifiedTime,
|
|
57
|
+
/**
|
|
58
|
+
* Sync all documents the SA can see:
|
|
59
|
+
* - New/changed files → syncSingleDocument()
|
|
60
|
+
* - Files removed from Drive → delete from Qdrant + Redis
|
|
61
|
+
*/
|
|
62
|
+
export async function syncAllDocuments() {
|
|
63
|
+
try {
|
|
64
|
+
const drive = getDriveClient();
|
|
65
|
+
// List all docs (paginated)
|
|
66
|
+
const allDocs = [];
|
|
67
|
+
let pageToken;
|
|
68
|
+
do {
|
|
69
|
+
const res = await drive.files.list({
|
|
70
|
+
q: "mimeType = 'application/vnd.google-apps.document' and trashed = false",
|
|
71
|
+
fields: "nextPageToken, files(id, name, modifiedTime)",
|
|
72
|
+
spaces: "drive",
|
|
73
|
+
pageSize: 100,
|
|
74
|
+
pageToken,
|
|
75
|
+
supportsAllDrives: true,
|
|
76
|
+
includeItemsFromAllDrives: true,
|
|
88
77
|
});
|
|
78
|
+
if (res.data.files)
|
|
79
|
+
allDocs.push(...res.data.files);
|
|
80
|
+
pageToken = res.data.nextPageToken || undefined;
|
|
81
|
+
} while (pageToken);
|
|
82
|
+
// Get all Redis sync entries
|
|
83
|
+
const syncEntries = await getAllSyncEntries();
|
|
84
|
+
// Sync new or changed files
|
|
85
|
+
for (const file of allDocs) {
|
|
86
|
+
if (!file.id || !file.modifiedTime)
|
|
87
|
+
continue;
|
|
88
|
+
const existing = syncEntries[file.id];
|
|
89
|
+
if (!existing || existing.modifiedTime !== file.modifiedTime) {
|
|
90
|
+
console.error(`[Sync] Detected change: "${file.name}"`);
|
|
91
|
+
await syncSingleDocument(file.id, file.modifiedTime, file.name || "Untitled");
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// Clean up files removed from Drive
|
|
95
|
+
const driveFileIds = new Set(allDocs.map((f) => f.id).filter(Boolean));
|
|
96
|
+
for (const [fileId, entry] of Object.entries(syncEntries)) {
|
|
97
|
+
if (!driveFileIds.has(fileId)) {
|
|
98
|
+
console.error(`[Sync] Removing deleted doc: "${entry.title}"`);
|
|
99
|
+
const pointIds = Array.from({ length: entry.blockCount }, (_, i) => getBlockPointId(fileId, i));
|
|
100
|
+
await deletePointsByIds(pointIds);
|
|
101
|
+
await deleteSyncEntry(fileId);
|
|
102
|
+
}
|
|
89
103
|
}
|
|
90
|
-
return {
|
|
104
|
+
return { success: true };
|
|
105
|
+
}
|
|
106
|
+
catch (err) {
|
|
107
|
+
console.error("syncAllDocuments failed:", err.message);
|
|
108
|
+
return { success: false, error: err.message };
|
|
91
109
|
}
|
|
92
|
-
return { synced: false, driveModifiedTime };
|
|
93
110
|
}
|
|
111
|
+
/**
|
|
112
|
+
* Read a specific Google Drive document, triggering incremental sync first.
|
|
113
|
+
* Returns paginated Markdown content.
|
|
114
|
+
*/
|
|
94
115
|
export async function readDriveDocument(fileId, offset = 0, limit = 10000) {
|
|
95
|
-
const folderId = config.DOC_MCP_DRIVE_FOLDER_ID;
|
|
96
|
-
if (!folderId) {
|
|
97
|
-
return {
|
|
98
|
-
success: false,
|
|
99
|
-
error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
|
|
100
|
-
};
|
|
101
|
-
}
|
|
102
116
|
try {
|
|
103
|
-
const
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
const
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
}
|
|
119
|
-
const isTruncated = offset + (finalContent?.length || 0) < totalSize;
|
|
120
|
-
let warning = undefined;
|
|
117
|
+
const drive = getDriveClient();
|
|
118
|
+
const fileInfo = await drive.files.get({
|
|
119
|
+
fileId,
|
|
120
|
+
fields: "id, name, modifiedTime",
|
|
121
|
+
supportsAllDrives: true,
|
|
122
|
+
});
|
|
123
|
+
const modifiedTime = fileInfo.data.modifiedTime || "";
|
|
124
|
+
const title = fileInfo.data.name || "Untitled";
|
|
125
|
+
const result = await syncSingleDocument(fileId, modifiedTime, title);
|
|
126
|
+
const content = result.content;
|
|
127
|
+
const totalSize = content.length;
|
|
128
|
+
const sliced = content.substring(offset, offset + limit);
|
|
129
|
+
const isTruncated = offset + sliced.length < totalSize;
|
|
130
|
+
let finalContent = sliced;
|
|
131
|
+
let warning;
|
|
121
132
|
if (isTruncated) {
|
|
122
|
-
warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset +
|
|
133
|
+
warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + sliced.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
|
|
123
134
|
finalContent += `\n\n${warning}`;
|
|
124
135
|
}
|
|
125
136
|
return {
|
|
126
137
|
success: true,
|
|
127
138
|
data: {
|
|
128
|
-
content: finalContent || "Empty
|
|
129
|
-
metadata: {
|
|
130
|
-
totalSize,
|
|
131
|
-
offset,
|
|
132
|
-
limit,
|
|
133
|
-
isTruncated,
|
|
134
|
-
warning,
|
|
135
|
-
},
|
|
139
|
+
content: finalContent || "Empty document",
|
|
140
|
+
metadata: { totalSize, offset, limit, isTruncated, warning },
|
|
136
141
|
},
|
|
137
142
|
};
|
|
138
143
|
}
|
|
@@ -140,51 +145,3 @@ export async function readDriveDocument(fileId, offset = 0, limit = 10000) {
|
|
|
140
145
|
return { success: false, error: err.message };
|
|
141
146
|
}
|
|
142
147
|
}
|
|
143
|
-
export async function syncFolderState(folderId) {
|
|
144
|
-
try {
|
|
145
|
-
const drive = getDriveClient();
|
|
146
|
-
async function getAllDocumentsFlat() {
|
|
147
|
-
let allDocs = [];
|
|
148
|
-
let pageToken = undefined;
|
|
149
|
-
do {
|
|
150
|
-
const docsRes = await drive.files.list({
|
|
151
|
-
// Chú ý: Đéo check parentId nữa, gom sạch sành sanh mọi file .doc mà Service Account nhìn thấy
|
|
152
|
-
q: `mimeType = 'application/vnd.google-apps.document' and trashed = false`,
|
|
153
|
-
fields: "nextPageToken, files(id, name, modifiedTime)",
|
|
154
|
-
spaces: "drive",
|
|
155
|
-
pageSize: 100, // Google API limit mỗi page, tự động nhảy trang nếu nhiều hơn
|
|
156
|
-
pageToken,
|
|
157
|
-
supportsAllDrives: true,
|
|
158
|
-
includeItemsFromAllDrives: true,
|
|
159
|
-
});
|
|
160
|
-
if (docsRes.data.files) {
|
|
161
|
-
allDocs = allDocs.concat(docsRes.data.files);
|
|
162
|
-
}
|
|
163
|
-
pageToken = docsRes.data.nextPageToken || undefined;
|
|
164
|
-
} while (pageToken);
|
|
165
|
-
return allDocs;
|
|
166
|
-
}
|
|
167
|
-
const driveFiles = await getAllDocumentsFlat();
|
|
168
|
-
const dbMetaMap = await getProjectDocumentMetadata(folderId);
|
|
169
|
-
// Sync updated or new files
|
|
170
|
-
for (const file of driveFiles) {
|
|
171
|
-
if (!file.id)
|
|
172
|
-
continue;
|
|
173
|
-
const dbModTime = dbMetaMap[file.id];
|
|
174
|
-
if (!dbModTime || dbModTime !== file.modifiedTime) {
|
|
175
|
-
await syncSingleDocument(file.id, folderId);
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
// Delete removed files from DB
|
|
179
|
-
for (const dbFileId of Object.keys(dbMetaMap)) {
|
|
180
|
-
if (!driveFiles.find((f) => f.id === dbFileId)) {
|
|
181
|
-
await deleteProjectDocument(folderId, dbFileId);
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
return { success: true };
|
|
185
|
-
}
|
|
186
|
-
catch (err) {
|
|
187
|
-
console.error("Auto-sync failed:", err.message);
|
|
188
|
-
return { success: false, error: err.message };
|
|
189
|
-
}
|
|
190
|
-
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export declare function googleDocToMarkdown(docJson: any): Promise<string>;
|
|
2
|
+
export declare function syncSingleDocument(fileId: string, driveModifiedTime: string, title: string): Promise<{
|
|
3
|
+
synced: boolean;
|
|
4
|
+
content: string;
|
|
5
|
+
upsertedCount?: number;
|
|
6
|
+
skippedCount?: number;
|
|
7
|
+
}>;
|
|
8
|
+
//# sourceMappingURL=ingestFlow.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingestFlow.d.ts","sourceRoot":"","sources":["../../src/tools/ingestFlow.ts"],"names":[],"mappings":"AA8MA,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,GAAG,GACX,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAoJD,wBAAsB,kBAAkB,CACtC,MAAM,EAAE,MAAM,EACd,iBAAiB,EAAE,MAAM,EACzB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC;IACT,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC,CAuGD"}
|