@khoinguyen2002/doc-mcp 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,13 @@
1
1
  import { google } from "googleapis";
2
- import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
3
2
  import { config } from "../config.js";
4
- import { upsertProjectDocument, getProjectDocumentMetadata, deleteProjectDocument, } from "../db/vector.js";
3
+ import { deletePointsByIds, getBlockPointId } from "../db/vector.js";
4
+ import { getAllSyncEntries, deleteSyncEntry } from "../db/syncState.js";
5
+ import { syncSingleDocument } from "./ingestFlow.js";
5
6
  function getDriveClient() {
6
7
  const clientEmail = config.DOC_MCP_GOOGLE_CLIENT_EMAIL;
7
8
  let privateKey = config.DOC_MCP_GOOGLE_PRIVATE_KEY;
8
9
  if (!clientEmail || !privateKey) {
9
- throw new Error("Google Drive credentials not configured. Please set DOC_MCP_GOOGLE_CLIENT_EMAIL and DOC_MCP_GOOGLE_PRIVATE_KEY in .env");
10
+ throw new Error("Google Drive credentials not configured.");
10
11
  }
11
12
  if (privateKey.startsWith('"') && privateKey.endsWith('"')) {
12
13
  privateKey = privateKey.slice(1, -1);
@@ -19,126 +20,124 @@ function getDriveClient() {
19
20
  });
20
21
  return google.drive({ version: "v3", auth });
21
22
  }
22
- export async function listDriveFiles(keyword, targetFolderId) {
23
- const folderId = targetFolderId || config.DOC_MCP_DRIVE_FOLDER_ID;
24
- if (!folderId) {
25
- return {
26
- success: false,
27
- error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
28
- };
29
- }
23
+ /**
24
+ * List all Google Docs the Service Account can read.
25
+ * Optional keyword filter on document title.
26
+ */
27
+ export async function listDriveFiles(keyword) {
30
28
  try {
31
29
  const drive = getDriveClient();
32
- let q = "(mimeType = 'application/vnd.google-apps.document' or mimeType = 'application/vnd.google-apps.folder') and trashed = false";
33
- q = `'${folderId}' in parents and ${q}`;
30
+ let q = "mimeType = 'application/vnd.google-apps.document' and trashed = false";
34
31
  if (keyword) {
35
- q = `name contains '${keyword}' and ${q}`;
36
- }
37
- const res = await drive.files.list({
38
- q,
39
- fields: "files(id, name, description, mimeType)",
40
- spaces: "drive",
41
- pageSize: 50,
42
- supportsAllDrives: true,
43
- includeItemsFromAllDrives: true,
44
- });
45
- const files = res.data.files;
46
- if (!files || files.length === 0) {
47
- return { success: true, results: [] };
32
+ const safe = keyword.replace(/'/g, "\\'");
33
+ q = `name contains '${safe}' and ${q}`;
48
34
  }
49
- return { success: true, results: files };
35
+ const allFiles = [];
36
+ let pageToken;
37
+ do {
38
+ const res = await drive.files.list({
39
+ q,
40
+ fields: "nextPageToken, files(id, name, mimeType, modifiedTime)",
41
+ spaces: "drive",
42
+ pageSize: 100,
43
+ pageToken,
44
+ supportsAllDrives: true,
45
+ includeItemsFromAllDrives: true,
46
+ });
47
+ if (res.data.files)
48
+ allFiles.push(...res.data.files);
49
+ pageToken = res.data.nextPageToken || undefined;
50
+ } while (pageToken);
51
+ return { success: true, results: allFiles };
50
52
  }
51
53
  catch (err) {
52
54
  return { success: false, error: err.message };
53
55
  }
54
56
  }
55
- export async function syncSingleDocument(fileId, folderId) {
56
- const drive = getDriveClient();
57
- const fileInfo = await drive.files.get({
58
- fileId,
59
- fields: "id, name, modifiedTime",
60
- supportsAllDrives: true,
61
- });
62
- const driveModifiedTime = fileInfo.data.modifiedTime || "";
63
- const dbMetaMap = await getProjectDocumentMetadata(folderId);
64
- const dbModifiedTime = dbMetaMap[fileId];
65
- if (!dbModifiedTime || dbModifiedTime !== driveModifiedTime) {
66
- if (dbModifiedTime) {
67
- await deleteProjectDocument(folderId, fileId);
68
- }
69
- const res = await drive.files.export({
70
- fileId: fileId,
71
- mimeType: "text/plain",
72
- });
73
- const content = res.data;
74
- if (typeof content !== "string" || content.trim() === "") {
75
- throw new Error("Empty or invalid file content");
57
+ /**
58
+ * Sync all documents the SA can see:
59
+ * - New/changed files → syncSingleDocument()
60
+ * - Files removed from Drive → delete from Qdrant + Redis
61
+ */
62
+ export async function syncAllDocuments() {
63
+ try {
64
+ const drive = getDriveClient();
65
+ // List all docs (paginated)
66
+ const allDocs = [];
67
+ let pageToken;
68
+ do {
69
+ const res = await drive.files.list({
70
+ q: "mimeType = 'application/vnd.google-apps.document' and trashed = false",
71
+ fields: "nextPageToken, files(id, name, modifiedTime)",
72
+ spaces: "drive",
73
+ pageSize: 100,
74
+ pageToken,
75
+ supportsAllDrives: true,
76
+ includeItemsFromAllDrives: true,
77
+ });
78
+ if (res.data.files)
79
+ allDocs.push(...res.data.files);
80
+ pageToken = res.data.nextPageToken || undefined;
81
+ } while (pageToken);
82
+ // Get all Redis sync entries
83
+ const syncEntries = await getAllSyncEntries();
84
+ // Sync new or changed files
85
+ for (const file of allDocs) {
86
+ if (!file.id || !file.modifiedTime)
87
+ continue;
88
+ const existing = syncEntries[file.id];
89
+ if (!existing || existing.modifiedTime !== file.modifiedTime) {
90
+ console.error(`[Sync] Detected change: "${file.name}"`);
91
+ await syncSingleDocument(file.id, file.modifiedTime, file.name || "Untitled");
92
+ }
76
93
  }
77
- const splitter = new RecursiveCharacterTextSplitter({
78
- chunkSize: config.CHUNK_SIZE,
79
- chunkOverlap: config.CHUNK_OVERLAP,
80
- });
81
- const chunks = await splitter.splitText(content);
82
- let currentOffset = 0;
83
- for (const chunk of chunks) {
84
- const offset = content.indexOf(chunk, currentOffset);
85
- if (offset !== -1) {
86
- currentOffset = offset;
94
+ // Clean up files removed from Drive
95
+ const driveFileIds = new Set(allDocs.map((f) => f.id).filter(Boolean));
96
+ for (const [fileId, entry] of Object.entries(syncEntries)) {
97
+ if (!driveFileIds.has(fileId)) {
98
+ console.error(`[Sync] Removing deleted doc: "${entry.title}"`);
99
+ const pointIds = Array.from({ length: entry.blockCount }, (_, i) => getBlockPointId(fileId, i));
100
+ await deletePointsByIds(pointIds);
101
+ await deleteSyncEntry(fileId);
87
102
  }
88
- await upsertProjectDocument(folderId, chunk, {
89
- title: fileInfo.data.name || "Untitled Google Doc",
90
- source: "google_drive",
91
- file_id: fileId,
92
- modified_time: driveModifiedTime,
93
- offset: offset !== -1 ? offset : 0,
94
- });
95
103
  }
96
- return { synced: true, content, driveModifiedTime };
104
+ return { success: true };
105
+ }
106
+ catch (err) {
107
+ console.error("syncAllDocuments failed:", err.message);
108
+ return { success: false, error: err.message };
97
109
  }
98
- return { synced: false, driveModifiedTime };
99
110
  }
111
+ /**
112
+ * Read a specific Google Drive document, triggering incremental sync first.
113
+ * Returns paginated Markdown content.
114
+ */
100
115
  export async function readDriveDocument(fileId, offset = 0, limit = 10000) {
101
- const folderId = config.DOC_MCP_DRIVE_FOLDER_ID;
102
- if (!folderId) {
103
- return {
104
- success: false,
105
- error: "DOC_MCP_DRIVE_FOLDER_ID is not configured for this agent.",
106
- };
107
- }
108
116
  try {
109
- const result = await syncSingleDocument(fileId, folderId);
110
- // If not synced just now, we need to fetch content to return to the user
111
- let content = result.content;
112
- if (!content) {
113
- const drive = getDriveClient();
114
- const res = await drive.files.export({
115
- fileId: fileId,
116
- mimeType: "text/plain",
117
- });
118
- content = typeof res.data === "string" ? res.data : "";
119
- }
120
- let finalContent = content;
121
- const totalSize = finalContent ? finalContent.length : 0;
122
- if (finalContent) {
123
- finalContent = finalContent.substring(offset, offset + limit);
124
- }
125
- const isTruncated = offset + (finalContent?.length || 0) < totalSize;
126
- let warning = undefined;
117
+ const drive = getDriveClient();
118
+ const fileInfo = await drive.files.get({
119
+ fileId,
120
+ fields: "id, name, modifiedTime",
121
+ supportsAllDrives: true,
122
+ });
123
+ const modifiedTime = fileInfo.data.modifiedTime || "";
124
+ const title = fileInfo.data.name || "Untitled";
125
+ const result = await syncSingleDocument(fileId, modifiedTime, title);
126
+ const content = result.content;
127
+ const totalSize = content.length;
128
+ const sliced = content.substring(offset, offset + limit);
129
+ const isTruncated = offset + sliced.length < totalSize;
130
+ let finalContent = sliced;
131
+ let warning;
127
132
  if (isTruncated) {
128
- warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + finalContent.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
133
+ warning = `[WARNING]: This is not the entire document. Content has been truncated from character ${offset} to ${offset + sliced.length} out of ${totalSize} total characters. Please use 'offset' and 'limit' parameters to read the rest of the document, or use search_knowledge to query specific details.`;
129
134
  finalContent += `\n\n${warning}`;
130
135
  }
131
136
  return {
132
137
  success: true,
133
138
  data: {
134
- content: finalContent || "Empty file",
135
- metadata: {
136
- totalSize,
137
- offset,
138
- limit,
139
- isTruncated,
140
- warning,
141
- },
139
+ content: finalContent || "Empty document",
140
+ metadata: { totalSize, offset, limit, isTruncated, warning },
142
141
  },
143
142
  };
144
143
  }
@@ -146,51 +145,3 @@ export async function readDriveDocument(fileId, offset = 0, limit = 10000) {
146
145
  return { success: false, error: err.message };
147
146
  }
148
147
  }
149
- export async function syncFolderState(folderId) {
150
- try {
151
- const drive = getDriveClient();
152
- async function getAllDocumentsFlat() {
153
- let allDocs = [];
154
- let pageToken = undefined;
155
- do {
156
- const docsRes = await drive.files.list({
157
- // Chú ý: Đéo check parentId nữa, gom sạch sành sanh mọi file .doc mà Service Account nhìn thấy
158
- q: `mimeType = 'application/vnd.google-apps.document' and trashed = false`,
159
- fields: "nextPageToken, files(id, name, modifiedTime)",
160
- spaces: "drive",
161
- pageSize: 100, // Google API limit mỗi page, tự động nhảy trang nếu nhiều hơn
162
- pageToken,
163
- supportsAllDrives: true,
164
- includeItemsFromAllDrives: true,
165
- });
166
- if (docsRes.data.files) {
167
- allDocs = allDocs.concat(docsRes.data.files);
168
- }
169
- pageToken = docsRes.data.nextPageToken || undefined;
170
- } while (pageToken);
171
- return allDocs;
172
- }
173
- const driveFiles = await getAllDocumentsFlat();
174
- const dbMetaMap = await getProjectDocumentMetadata(folderId);
175
- // Sync updated or new files
176
- for (const file of driveFiles) {
177
- if (!file.id)
178
- continue;
179
- const dbModTime = dbMetaMap[file.id];
180
- if (!dbModTime || dbModTime !== file.modifiedTime) {
181
- await syncSingleDocument(file.id, folderId);
182
- }
183
- }
184
- // Delete removed files from DB
185
- for (const dbFileId of Object.keys(dbMetaMap)) {
186
- if (!driveFiles.find((f) => f.id === dbFileId)) {
187
- await deleteProjectDocument(folderId, dbFileId);
188
- }
189
- }
190
- return { success: true };
191
- }
192
- catch (err) {
193
- console.error("Auto-sync failed:", err.message);
194
- return { success: false, error: err.message };
195
- }
196
- }
@@ -0,0 +1,8 @@
1
+ export declare function googleDocToMarkdown(docJson: any): Promise<string>;
2
+ export declare function syncSingleDocument(fileId: string, driveModifiedTime: string, title: string): Promise<{
3
+ synced: boolean;
4
+ content: string;
5
+ upsertedCount?: number;
6
+ skippedCount?: number;
7
+ }>;
8
+ //# sourceMappingURL=ingestFlow.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ingestFlow.d.ts","sourceRoot":"","sources":["../../src/tools/ingestFlow.ts"],"names":[],"mappings":"AA8MA,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,GAAG,GACX,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAoJD,wBAAsB,kBAAkB,CACtC,MAAM,EAAE,MAAM,EACd,iBAAiB,EAAE,MAAM,EACzB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC;IACT,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC,CAuGD"}