@khoinguyen2002/doc-mcp 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +6 -4
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +22 -7
- package/dist/db/rateLimiter.d.ts +6 -0
- package/dist/db/rateLimiter.d.ts.map +1 -0
- package/dist/db/rateLimiter.js +20 -0
- package/dist/db/syncState.d.ts +12 -0
- package/dist/db/syncState.d.ts.map +1 -0
- package/dist/db/syncState.js +69 -0
- package/dist/db/vector.d.ts +61 -6
- package/dist/db/vector.d.ts.map +1 -1
- package/dist/db/vector.js +249 -109
- package/dist/mcp-server.js +44 -23
- package/dist/tools/driveTools.d.ts +20 -16
- package/dist/tools/driveTools.d.ts.map +1 -1
- package/dist/tools/driveTools.js +100 -149
- package/dist/tools/ingestFlow.d.ts +8 -0
- package/dist/tools/ingestFlow.d.ts.map +1 -0
- package/dist/tools/ingestFlow.js +407 -0
- package/dist/tools/knowledgeTools.d.ts +25 -6
- package/dist/tools/knowledgeTools.d.ts.map +1 -1
- package/dist/tools/knowledgeTools.js +29 -40
- package/package.json +8 -1
- package/src/config.ts +28 -9
- package/src/db/rateLimiter.ts +25 -0
- package/src/db/syncState.ts +87 -0
- package/src/db/vector.ts +305 -115
- package/src/mcp-server.ts +55 -33
- package/src/tools/driveTools.ts +111 -175
- package/src/tools/ingestFlow.ts +508 -0
- package/src/tools/knowledgeTools.ts +34 -38
- package/src/types/turndown-plugin-gfm.d.ts +8 -0
package/dist/tools/driveTools.js
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import { google } from "googleapis";
|
|
2
|
-
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
|
|
3
2
|
import { config } from "../config.js";
|
|
4
|
-
import {
|
|
3
|
+
import { deletePointsByIds, getBlockPointId } from "../db/vector.js";
|
|
4
|
+
import { getAllSyncEntries, deleteSyncEntry } from "../db/syncState.js";
|
|
5
|
+
import { syncSingleDocument } from "./ingestFlow.js";
|
|
5
6
|
function getDriveClient() {
|
|
6
7
|
const clientEmail = config.DOC_MCP_GOOGLE_CLIENT_EMAIL;
|
|
7
8
|
let privateKey = config.DOC_MCP_GOOGLE_PRIVATE_KEY;
|
|
8
9
|
if (!clientEmail || !privateKey) {
|
|
9
|
-
throw new Error("Google Drive credentials not configured.
|
|
10
|
+
throw new Error("Google Drive credentials not configured.");
|
|
10
11
|
}
|
|
11
12
|
if (privateKey.startsWith('"') && privateKey.endsWith('"')) {
|
|
12
13
|
privateKey = privateKey.slice(1, -1);
|
|
@@ -19,126 +20,124 @@ function getDriveClient() {
|
|
|
19
20
|
});
|
|
20
21
|
return google.drive({ version: "v3", auth });
|
|
21
22
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
|
|
28
|
-
};
|
|
29
|
-
}
|
|
23
|
+
/**
|
|
24
|
+
* List all Google Docs the Service Account can read.
|
|
25
|
+
* Optional keyword filter on document title.
|
|
26
|
+
*/
|
|
27
|
+
export async function listDriveFiles(keyword) {
|
|
30
28
|
try {
|
|
31
29
|
const drive = getDriveClient();
|
|
32
|
-
let q = "
|
|
33
|
-
q = `'${folderId}' in parents and ${q}`;
|
|
30
|
+
let q = "mimeType = 'application/vnd.google-apps.document' and trashed = false";
|
|
34
31
|
if (keyword) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
const res = await drive.files.list({
|
|
38
|
-
q,
|
|
39
|
-
fields: "files(id, name, description, mimeType)",
|
|
40
|
-
spaces: "drive",
|
|
41
|
-
pageSize: 50,
|
|
42
|
-
supportsAllDrives: true,
|
|
43
|
-
includeItemsFromAllDrives: true,
|
|
44
|
-
});
|
|
45
|
-
const files = res.data.files;
|
|
46
|
-
if (!files || files.length === 0) {
|
|
47
|
-
return { success: true, results: [] };
|
|
32
|
+
const safe = keyword.replace(/'/g, "\\'");
|
|
33
|
+
q = `name contains '${safe}' and ${q}`;
|
|
48
34
|
}
|
|
49
|
-
|
|
35
|
+
const allFiles = [];
|
|
36
|
+
let pageToken;
|
|
37
|
+
do {
|
|
38
|
+
const res = await drive.files.list({
|
|
39
|
+
q,
|
|
40
|
+
fields: "nextPageToken, files(id, name, mimeType, modifiedTime)",
|
|
41
|
+
spaces: "drive",
|
|
42
|
+
pageSize: 100,
|
|
43
|
+
pageToken,
|
|
44
|
+
supportsAllDrives: true,
|
|
45
|
+
includeItemsFromAllDrives: true,
|
|
46
|
+
});
|
|
47
|
+
if (res.data.files)
|
|
48
|
+
allFiles.push(...res.data.files);
|
|
49
|
+
pageToken = res.data.nextPageToken || undefined;
|
|
50
|
+
} while (pageToken);
|
|
51
|
+
return { success: true, results: allFiles };
|
|
50
52
|
}
|
|
51
53
|
catch (err) {
|
|
52
54
|
return { success: false, error: err.message };
|
|
53
55
|
}
|
|
54
56
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
await
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
57
|
+
/**
|
|
58
|
+
* Sync all documents the SA can see:
|
|
59
|
+
* - New/changed files → syncSingleDocument()
|
|
60
|
+
* - Files removed from Drive → delete from Qdrant + Redis
|
|
61
|
+
*/
|
|
62
|
+
export async function syncAllDocuments() {
|
|
63
|
+
try {
|
|
64
|
+
const drive = getDriveClient();
|
|
65
|
+
// List all docs (paginated)
|
|
66
|
+
const allDocs = [];
|
|
67
|
+
let pageToken;
|
|
68
|
+
do {
|
|
69
|
+
const res = await drive.files.list({
|
|
70
|
+
q: "mimeType = 'application/vnd.google-apps.document' and trashed = false",
|
|
71
|
+
fields: "nextPageToken, files(id, name, modifiedTime)",
|
|
72
|
+
spaces: "drive",
|
|
73
|
+
pageSize: 100,
|
|
74
|
+
pageToken,
|
|
75
|
+
supportsAllDrives: true,
|
|
76
|
+
includeItemsFromAllDrives: true,
|
|
77
|
+
});
|
|
78
|
+
if (res.data.files)
|
|
79
|
+
allDocs.push(...res.data.files);
|
|
80
|
+
pageToken = res.data.nextPageToken || undefined;
|
|
81
|
+
} while (pageToken);
|
|
82
|
+
// Get all Redis sync entries
|
|
83
|
+
const syncEntries = await getAllSyncEntries();
|
|
84
|
+
// Sync new or changed files
|
|
85
|
+
for (const file of allDocs) {
|
|
86
|
+
if (!file.id || !file.modifiedTime)
|
|
87
|
+
continue;
|
|
88
|
+
const existing = syncEntries[file.id];
|
|
89
|
+
if (!existing || existing.modifiedTime !== file.modifiedTime) {
|
|
90
|
+
console.error(`[Sync] Detected change: "${file.name}"`);
|
|
91
|
+
await syncSingleDocument(file.id, file.modifiedTime, file.name || "Untitled");
|
|
92
|
+
}
|
|
76
93
|
}
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if (offset !== -1) {
|
|
86
|
-
currentOffset = offset;
|
|
94
|
+
// Clean up files removed from Drive
|
|
95
|
+
const driveFileIds = new Set(allDocs.map((f) => f.id).filter(Boolean));
|
|
96
|
+
for (const [fileId, entry] of Object.entries(syncEntries)) {
|
|
97
|
+
if (!driveFileIds.has(fileId)) {
|
|
98
|
+
console.error(`[Sync] Removing deleted doc: "${entry.title}"`);
|
|
99
|
+
const pointIds = Array.from({ length: entry.blockCount }, (_, i) => getBlockPointId(fileId, i));
|
|
100
|
+
await deletePointsByIds(pointIds);
|
|
101
|
+
await deleteSyncEntry(fileId);
|
|
87
102
|
}
|
|
88
|
-
await upsertProjectDocument(folderId, chunk, {
|
|
89
|
-
title: fileInfo.data.name || "Untitled Google Doc",
|
|
90
|
-
source: "google_drive",
|
|
91
|
-
file_id: fileId,
|
|
92
|
-
modified_time: driveModifiedTime,
|
|
93
|
-
offset: offset !== -1 ? offset : 0,
|
|
94
|
-
});
|
|
95
103
|
}
|
|
96
|
-
return {
|
|
104
|
+
return { success: true };
|
|
105
|
+
}
|
|
106
|
+
catch (err) {
|
|
107
|
+
console.error("syncAllDocuments failed:", err.message);
|
|
108
|
+
return { success: false, error: err.message };
|
|
97
109
|
}
|
|
98
|
-
return { synced: false, driveModifiedTime };
|
|
99
110
|
}
|
|
111
|
+
/**
|
|
112
|
+
* Read a specific Google Drive document, triggering incremental sync first.
|
|
113
|
+
* Returns paginated Markdown content.
|
|
114
|
+
*/
|
|
100
115
|
export async function readDriveDocument(fileId, offset = 0, limit = 10000) {
|
|
101
|
-
const folderId = config.DOC_MCP_DRIVE_FOLDER_ID;
|
|
102
|
-
if (!folderId) {
|
|
103
|
-
return {
|
|
104
|
-
success: false,
|
|
105
|
-
error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
116
|
try {
|
|
109
|
-
const
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
}
|
|
125
|
-
const isTruncated = offset + (finalContent?.length || 0) < totalSize;
|
|
126
|
-
let warning = undefined;
|
|
117
|
+
const drive = getDriveClient();
|
|
118
|
+
const fileInfo = await drive.files.get({
|
|
119
|
+
fileId,
|
|
120
|
+
fields: "id, name, modifiedTime",
|
|
121
|
+
supportsAllDrives: true,
|
|
122
|
+
});
|
|
123
|
+
const modifiedTime = fileInfo.data.modifiedTime || "";
|
|
124
|
+
const title = fileInfo.data.name || "Untitled";
|
|
125
|
+
const result = await syncSingleDocument(fileId, modifiedTime, title);
|
|
126
|
+
const content = result.content;
|
|
127
|
+
const totalSize = content.length;
|
|
128
|
+
const sliced = content.substring(offset, offset + limit);
|
|
129
|
+
const isTruncated = offset + sliced.length < totalSize;
|
|
130
|
+
let finalContent = sliced;
|
|
131
|
+
let warning;
|
|
127
132
|
if (isTruncated) {
|
|
128
|
-
warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset +
|
|
133
|
+
warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + sliced.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
|
|
129
134
|
finalContent += `\n\n${warning}`;
|
|
130
135
|
}
|
|
131
136
|
return {
|
|
132
137
|
success: true,
|
|
133
138
|
data: {
|
|
134
|
-
content: finalContent || "Empty
|
|
135
|
-
metadata: {
|
|
136
|
-
totalSize,
|
|
137
|
-
offset,
|
|
138
|
-
limit,
|
|
139
|
-
isTruncated,
|
|
140
|
-
warning,
|
|
141
|
-
},
|
|
139
|
+
content: finalContent || "Empty document",
|
|
140
|
+
metadata: { totalSize, offset, limit, isTruncated, warning },
|
|
142
141
|
},
|
|
143
142
|
};
|
|
144
143
|
}
|
|
@@ -146,51 +145,3 @@ export async function readDriveDocument(fileId, offset = 0, limit = 10000) {
|
|
|
146
145
|
return { success: false, error: err.message };
|
|
147
146
|
}
|
|
148
147
|
}
|
|
149
|
-
export async function syncFolderState(folderId) {
|
|
150
|
-
try {
|
|
151
|
-
const drive = getDriveClient();
|
|
152
|
-
async function getAllDocumentsFlat() {
|
|
153
|
-
let allDocs = [];
|
|
154
|
-
let pageToken = undefined;
|
|
155
|
-
do {
|
|
156
|
-
const docsRes = await drive.files.list({
|
|
157
|
-
// Chú ý: Đéo check parentId nữa, gom sạch sành sanh mọi file .doc mà Service Account nhìn thấy
|
|
158
|
-
q: `mimeType = 'application/vnd.google-apps.document' and trashed = false`,
|
|
159
|
-
fields: "nextPageToken, files(id, name, modifiedTime)",
|
|
160
|
-
spaces: "drive",
|
|
161
|
-
pageSize: 100, // Google API limit mỗi page, tự động nhảy trang nếu nhiều hơn
|
|
162
|
-
pageToken,
|
|
163
|
-
supportsAllDrives: true,
|
|
164
|
-
includeItemsFromAllDrives: true,
|
|
165
|
-
});
|
|
166
|
-
if (docsRes.data.files) {
|
|
167
|
-
allDocs = allDocs.concat(docsRes.data.files);
|
|
168
|
-
}
|
|
169
|
-
pageToken = docsRes.data.nextPageToken || undefined;
|
|
170
|
-
} while (pageToken);
|
|
171
|
-
return allDocs;
|
|
172
|
-
}
|
|
173
|
-
const driveFiles = await getAllDocumentsFlat();
|
|
174
|
-
const dbMetaMap = await getProjectDocumentMetadata(folderId);
|
|
175
|
-
// Sync updated or new files
|
|
176
|
-
for (const file of driveFiles) {
|
|
177
|
-
if (!file.id)
|
|
178
|
-
continue;
|
|
179
|
-
const dbModTime = dbMetaMap[file.id];
|
|
180
|
-
if (!dbModTime || dbModTime !== file.modifiedTime) {
|
|
181
|
-
await syncSingleDocument(file.id, folderId);
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
// Delete removed files from DB
|
|
185
|
-
for (const dbFileId of Object.keys(dbMetaMap)) {
|
|
186
|
-
if (!driveFiles.find((f) => f.id === dbFileId)) {
|
|
187
|
-
await deleteProjectDocument(folderId, dbFileId);
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
return { success: true };
|
|
191
|
-
}
|
|
192
|
-
catch (err) {
|
|
193
|
-
console.error("Auto-sync failed:", err.message);
|
|
194
|
-
return { success: false, error: err.message };
|
|
195
|
-
}
|
|
196
|
-
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export declare function googleDocToMarkdown(docJson: any): Promise<string>;
|
|
2
|
+
export declare function syncSingleDocument(fileId: string, driveModifiedTime: string, title: string): Promise<{
|
|
3
|
+
synced: boolean;
|
|
4
|
+
content: string;
|
|
5
|
+
upsertedCount?: number;
|
|
6
|
+
skippedCount?: number;
|
|
7
|
+
}>;
|
|
8
|
+
//# sourceMappingURL=ingestFlow.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingestFlow.d.ts","sourceRoot":"","sources":["../../src/tools/ingestFlow.ts"],"names":[],"mappings":"AA8MA,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,GAAG,GACX,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAoJD,wBAAsB,kBAAkB,CACtC,MAAM,EAAE,MAAM,EACd,iBAAiB,EAAE,MAAM,EACzB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC;IACT,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC,CAuGD"}
|