@khoinguyen2002/doc-mcp 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +6 -4
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +22 -7
- package/dist/db/rateLimiter.d.ts +6 -0
- package/dist/db/rateLimiter.d.ts.map +1 -0
- package/dist/db/rateLimiter.js +20 -0
- package/dist/db/syncState.d.ts +12 -0
- package/dist/db/syncState.d.ts.map +1 -0
- package/dist/db/syncState.js +69 -0
- package/dist/db/vector.d.ts +61 -6
- package/dist/db/vector.d.ts.map +1 -1
- package/dist/db/vector.js +249 -109
- package/dist/mcp-server.js +47 -37
- package/dist/tools/driveTools.d.ts +20 -16
- package/dist/tools/driveTools.d.ts.map +1 -1
- package/dist/tools/driveTools.js +101 -144
- package/dist/tools/ingestFlow.d.ts +8 -0
- package/dist/tools/ingestFlow.d.ts.map +1 -0
- package/dist/tools/ingestFlow.js +407 -0
- package/dist/tools/knowledgeTools.d.ts +32 -4
- package/dist/tools/knowledgeTools.d.ts.map +1 -1
- package/dist/tools/knowledgeTools.js +29 -34
- package/package.json +8 -1
- package/src/config.ts +28 -9
- package/src/db/rateLimiter.ts +25 -0
- package/src/db/syncState.ts +87 -0
- package/src/db/vector.ts +305 -115
- package/src/mcp-server.ts +56 -48
- package/src/tools/driveTools.ts +111 -168
- package/src/tools/ingestFlow.ts +508 -0
- package/src/tools/knowledgeTools.ts +34 -33
- package/src/types/turndown-plugin-gfm.d.ts +8 -0
package/dist/db/vector.js
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
|
-
import { QdrantClient } from
|
|
2
|
-
import { v4 as uuidv4 } from
|
|
3
|
-
import { config } from
|
|
1
|
+
import { QdrantClient } from "@qdrant/js-client-rest";
|
|
2
|
+
import { v4 as uuidv4, v5 as uuidv5 } from "uuid";
|
|
3
|
+
import { config } from "../config.js";
|
|
4
4
|
let client = null;
|
|
5
|
-
const COLLECTION_NAME =
|
|
5
|
+
const COLLECTION_NAME = "project_memory";
|
|
6
|
+
// Fixed namespace for deterministic point IDs (uuid v5)
|
|
7
|
+
const POINT_NAMESPACE = "1b671a64-40d5-491e-99b0-da01ff1f3341";
|
|
8
|
+
/**
|
|
9
|
+
* Deterministic Qdrant point ID: uuidv5(fileId:blockIndex, NS)
|
|
10
|
+
* Same input → same ID → upsert overwrites correctly.
|
|
11
|
+
*/
|
|
12
|
+
export function getBlockPointId(fileId, blockIndex) {
|
|
13
|
+
return uuidv5(`${fileId}:${blockIndex}`, POINT_NAMESPACE);
|
|
14
|
+
}
|
|
6
15
|
export async function initVectorDB() {
|
|
7
16
|
if (!client) {
|
|
8
17
|
client = new QdrantClient({
|
|
@@ -10,83 +19,215 @@ export async function initVectorDB() {
|
|
|
10
19
|
apiKey: config.QDRANT_API_KEY,
|
|
11
20
|
});
|
|
12
21
|
console.error(`Connected to Qdrant at ${config.QDRANT_URL}`);
|
|
13
|
-
// Check if collection exists
|
|
14
22
|
const res = await client.getCollections();
|
|
15
|
-
const exists = res.collections.some(c => c.name === COLLECTION_NAME);
|
|
23
|
+
const exists = res.collections.some((c) => c.name === COLLECTION_NAME);
|
|
16
24
|
if (!exists) {
|
|
17
25
|
console.error(`Creating Qdrant collection: ${COLLECTION_NAME}`);
|
|
18
26
|
const dummyVector = await embedText("test");
|
|
19
27
|
const dimension = dummyVector.length;
|
|
20
28
|
await client.createCollection(COLLECTION_NAME, {
|
|
21
|
-
vectors: {
|
|
22
|
-
size: dimension,
|
|
23
|
-
distance: "Cosine",
|
|
24
|
-
},
|
|
29
|
+
vectors: { size: dimension, distance: "Cosine" },
|
|
25
30
|
});
|
|
26
31
|
await client.createPayloadIndex(COLLECTION_NAME, {
|
|
27
|
-
field_name: "
|
|
32
|
+
field_name: "source",
|
|
28
33
|
field_schema: "keyword",
|
|
29
34
|
});
|
|
30
35
|
await client.createPayloadIndex(COLLECTION_NAME, {
|
|
31
|
-
field_name: "
|
|
32
|
-
field_schema: "
|
|
36
|
+
field_name: "block_index",
|
|
37
|
+
field_schema: "integer",
|
|
33
38
|
});
|
|
34
39
|
await client.createPayloadIndex(COLLECTION_NAME, {
|
|
35
|
-
field_name: "
|
|
40
|
+
field_name: "block_hash",
|
|
36
41
|
field_schema: "keyword",
|
|
37
42
|
});
|
|
43
|
+
// Full-text index on `text` payload for exact/keyword search.
|
|
44
|
+
// whitespace tokenizer keeps API paths (e.g. /v1/foo/bar) as single tokens.
|
|
45
|
+
// lowercase=true makes searches case-insensitive.
|
|
46
|
+
await client.createPayloadIndex(COLLECTION_NAME, {
|
|
47
|
+
field_name: "text",
|
|
48
|
+
field_schema: {
|
|
49
|
+
type: "text",
|
|
50
|
+
tokenizer: "whitespace",
|
|
51
|
+
min_token_len: 2,
|
|
52
|
+
max_token_len: 200,
|
|
53
|
+
lowercase: true,
|
|
54
|
+
},
|
|
55
|
+
});
|
|
38
56
|
console.error(`Collection ${COLLECTION_NAME} created with dimension ${dimension}.`);
|
|
39
57
|
}
|
|
40
58
|
}
|
|
41
59
|
}
|
|
42
|
-
export async function embedText(text) {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
60
|
+
export async function embedText(text, maxRetries = 5) {
|
|
61
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
62
|
+
try {
|
|
63
|
+
const response = await fetch("https://openrouter.ai/api/v1/embeddings", {
|
|
64
|
+
method: "POST",
|
|
65
|
+
headers: {
|
|
66
|
+
Authorization: `Bearer ${config.OPENROUTER_API_KEY}`,
|
|
67
|
+
"Content-Type": "application/json",
|
|
68
|
+
},
|
|
69
|
+
body: JSON.stringify({
|
|
70
|
+
model: config.EMBEDDING_MODEL_ID,
|
|
71
|
+
input: text,
|
|
72
|
+
}),
|
|
73
|
+
});
|
|
74
|
+
if (!response.ok) {
|
|
75
|
+
if (response.status === 429 && attempt < maxRetries - 1) {
|
|
76
|
+
const delay = Math.pow(2, attempt) * 1000 + Math.random() * 1000;
|
|
77
|
+
console.error(`[Rate Limit] OpenRouter 429. Retrying in ${Math.round(delay)}ms... (Attempt ${attempt + 1}/${maxRetries})`);
|
|
78
|
+
await new Promise((res) => setTimeout(res, delay));
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
const errText = await response.text();
|
|
82
|
+
throw new Error(`OpenRouter Embedding API failed: ${response.status} ${errText}`);
|
|
83
|
+
}
|
|
84
|
+
const json = await response.json();
|
|
85
|
+
if (!json.data || !json.data[0] || !json.data[0].embedding) {
|
|
86
|
+
throw new Error(`Invalid response from OpenRouter: ${JSON.stringify(json)}`);
|
|
87
|
+
}
|
|
88
|
+
return json.data[0].embedding;
|
|
89
|
+
}
|
|
90
|
+
catch (err) {
|
|
91
|
+
if (attempt >= maxRetries - 1)
|
|
92
|
+
throw err;
|
|
93
|
+
const delay = Math.pow(2, attempt) * 1000 + Math.random() * 1000;
|
|
94
|
+
console.error(`[Error] ${err.message}. Retrying in ${Math.round(delay)}ms... (Attempt ${attempt + 1}/${maxRetries})`);
|
|
95
|
+
await new Promise((res) => setTimeout(res, delay));
|
|
96
|
+
}
|
|
57
97
|
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
98
|
+
throw new Error("Max retries reached for embedding");
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Embed nhiều texts trong 1 API call (batch).
|
|
102
|
+
* OpenRouter hỗ trợ input: string[] → trả data[i].embedding.
|
|
103
|
+
*/
|
|
104
|
+
export async function embedBatch(texts, maxRetries = 5) {
|
|
105
|
+
if (texts.length === 0)
|
|
106
|
+
return [];
|
|
107
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
108
|
+
try {
|
|
109
|
+
const response = await fetch("https://openrouter.ai/api/v1/embeddings", {
|
|
110
|
+
method: "POST",
|
|
111
|
+
headers: {
|
|
112
|
+
Authorization: `Bearer ${config.OPENROUTER_API_KEY}`,
|
|
113
|
+
"Content-Type": "application/json",
|
|
114
|
+
},
|
|
115
|
+
body: JSON.stringify({
|
|
116
|
+
model: config.EMBEDDING_MODEL_ID,
|
|
117
|
+
input: texts,
|
|
118
|
+
}),
|
|
119
|
+
});
|
|
120
|
+
if (!response.ok) {
|
|
121
|
+
if (response.status === 429 && attempt < maxRetries - 1) {
|
|
122
|
+
const delay = Math.pow(2, attempt) * 1000 + Math.random() * 1000;
|
|
123
|
+
console.error(`[Rate Limit] OpenRouter 429 (batch). Retrying in ${Math.round(delay)}ms... (Attempt ${attempt + 1}/${maxRetries})`);
|
|
124
|
+
await new Promise((res) => setTimeout(res, delay));
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
const errText = await response.text();
|
|
128
|
+
throw new Error(`OpenRouter Batch Embedding API failed: ${response.status} ${errText}`);
|
|
129
|
+
}
|
|
130
|
+
const json = await response.json();
|
|
131
|
+
if (!json.data || !Array.isArray(json.data)) {
|
|
132
|
+
throw new Error(`Invalid batch response from OpenRouter: ${JSON.stringify(json)}`);
|
|
133
|
+
}
|
|
134
|
+
return json.data.map((item) => item.embedding);
|
|
135
|
+
}
|
|
136
|
+
catch (err) {
|
|
137
|
+
if (attempt >= maxRetries - 1)
|
|
138
|
+
throw err;
|
|
139
|
+
const delay = Math.pow(2, attempt) * 1000 + Math.random() * 1000;
|
|
140
|
+
console.error(`[Error] ${err.message}. Retrying in ${Math.round(delay)}ms... (Attempt ${attempt + 1}/${maxRetries})`);
|
|
141
|
+
await new Promise((res) => setTimeout(res, delay));
|
|
142
|
+
}
|
|
61
143
|
}
|
|
62
|
-
|
|
144
|
+
throw new Error("Max retries reached for batch embedding");
|
|
63
145
|
}
|
|
64
|
-
|
|
146
|
+
/**
|
|
147
|
+
* Bulk upsert nhiều chunks vào Qdrant trong 1 HTTP call.
|
|
148
|
+
*/
|
|
149
|
+
export async function upsertChunkBatch(chunks) {
|
|
65
150
|
await initVectorDB();
|
|
66
151
|
if (!client)
|
|
67
152
|
throw new Error("Qdrant not initialized");
|
|
68
|
-
|
|
153
|
+
if (chunks.length === 0)
|
|
154
|
+
return;
|
|
69
155
|
await client.upsert(COLLECTION_NAME, {
|
|
70
156
|
wait: true,
|
|
71
|
-
points:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
]
|
|
157
|
+
points: chunks.map((c) => ({
|
|
158
|
+
id: c.pointId,
|
|
159
|
+
vector: c.vector,
|
|
160
|
+
payload: {
|
|
161
|
+
text: c.text,
|
|
162
|
+
title: c.title,
|
|
163
|
+
block_index: c.blockIndex,
|
|
164
|
+
block_hash: c.blockHash,
|
|
165
|
+
source: c.source,
|
|
166
|
+
offset: c.offset,
|
|
167
|
+
},
|
|
168
|
+
})),
|
|
86
169
|
});
|
|
87
|
-
console.error(`Upserted
|
|
170
|
+
console.error(`Upserted ${chunks.length} chunk(s) to Qdrant.`);
|
|
88
171
|
}
|
|
89
|
-
|
|
172
|
+
/**
|
|
173
|
+
* Fetch block_hash AND offset for a list of point IDs.
|
|
174
|
+
* Used to diff block-level changes during re-sync (hash) and
|
|
175
|
+
* detect stale offsets in unchanged blocks (offset).
|
|
176
|
+
*/
|
|
177
|
+
export async function getBlockMetaByIds(pointIds) {
|
|
178
|
+
await initVectorDB();
|
|
179
|
+
if (!client || pointIds.length === 0)
|
|
180
|
+
return {};
|
|
181
|
+
const results = await client.retrieve(COLLECTION_NAME, {
|
|
182
|
+
ids: pointIds,
|
|
183
|
+
with_payload: ["block_hash", "offset"],
|
|
184
|
+
with_vector: false,
|
|
185
|
+
});
|
|
186
|
+
const metaMap = {};
|
|
187
|
+
for (const point of results) {
|
|
188
|
+
const hash = point.payload?.block_hash;
|
|
189
|
+
const offset = point.payload?.offset;
|
|
190
|
+
if (hash !== undefined) {
|
|
191
|
+
metaMap[point.id] = { hash, offset: offset ?? 0 };
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return metaMap;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Update only the `offset` payload field for a set of points (no re-embedding).
|
|
198
|
+
* Called for unchanged blocks whose character position shifted due to earlier edits.
|
|
199
|
+
* Uses parallel setPayload calls (lightweight metadata-only updates).
|
|
200
|
+
*/
|
|
201
|
+
export async function updateBlockOffsets(updates) {
|
|
202
|
+
if (updates.length === 0)
|
|
203
|
+
return;
|
|
204
|
+
await initVectorDB();
|
|
205
|
+
if (!client)
|
|
206
|
+
throw new Error("Qdrant not initialized");
|
|
207
|
+
await Promise.all(updates.map(({ pointId, offset }) => client.setPayload(COLLECTION_NAME, {
|
|
208
|
+
payload: { offset },
|
|
209
|
+
points: [pointId],
|
|
210
|
+
wait: false, // fire-and-forget per point; all resolve before function returns
|
|
211
|
+
})));
|
|
212
|
+
console.error(`[Sync] Updated offset for ${updates.length} unchanged block(s).`);
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Xóa Qdrant points theo danh sách IDs.
|
|
216
|
+
*/
|
|
217
|
+
export async function deletePointsByIds(pointIds) {
|
|
218
|
+
await initVectorDB();
|
|
219
|
+
if (!client || pointIds.length === 0)
|
|
220
|
+
return;
|
|
221
|
+
await client.delete(COLLECTION_NAME, {
|
|
222
|
+
wait: true,
|
|
223
|
+
points: pointIds,
|
|
224
|
+
});
|
|
225
|
+
console.error(`Deleted ${pointIds.length} obsolete block(s) from Qdrant.`);
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Global semantic search — không filter theo folder hay file.
|
|
229
|
+
*/
|
|
230
|
+
export async function searchProjectMemory(query, topK = 3) {
|
|
90
231
|
await initVectorDB();
|
|
91
232
|
if (!client)
|
|
92
233
|
throw new Error("Qdrant not initialized");
|
|
@@ -96,22 +237,10 @@ export async function searchProjectMemory(folderId, query, topK = 3) {
|
|
|
96
237
|
vector: queryVector,
|
|
97
238
|
limit: topK,
|
|
98
239
|
with_payload: true,
|
|
99
|
-
filter: {
|
|
100
|
-
must: [
|
|
101
|
-
{
|
|
102
|
-
key: "folderId",
|
|
103
|
-
match: {
|
|
104
|
-
value: folderId
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
]
|
|
108
|
-
}
|
|
109
240
|
});
|
|
110
|
-
|
|
111
|
-
return results.map(r => ({
|
|
241
|
+
return results.map((r) => ({
|
|
112
242
|
id: r.id,
|
|
113
|
-
|
|
114
|
-
...r.payload
|
|
243
|
+
...r.payload,
|
|
115
244
|
}));
|
|
116
245
|
}
|
|
117
246
|
catch (err) {
|
|
@@ -119,54 +248,65 @@ export async function searchProjectMemory(folderId, query, topK = 3) {
|
|
|
119
248
|
return [];
|
|
120
249
|
}
|
|
121
250
|
}
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
must: [
|
|
129
|
-
{ key: "folderId", match: { value: folderId } },
|
|
130
|
-
{ key: "file_id", match: { value: fileId } }
|
|
131
|
-
]
|
|
132
|
-
}
|
|
133
|
-
});
|
|
134
|
-
console.error(`Deleted old chunks from Qdrant for ${folderId} / ${fileId}`);
|
|
135
|
-
}
|
|
136
|
-
export async function checkProjectDocumentExists(folderId, fileId) {
|
|
251
|
+
/**
|
|
252
|
+
* Exhaustive full-text search using Qdrant's inverted index on the `text` field.
|
|
253
|
+
* Uses whitespace tokenizer → API paths like /v1/foo/bar match as single tokens.
|
|
254
|
+
* Paginates through all results server-side (no full collection scan in JS).
|
|
255
|
+
*/
|
|
256
|
+
export async function exactSearchChunks(term, limit = 50) {
|
|
137
257
|
await initVectorDB();
|
|
138
258
|
if (!client)
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
259
|
+
throw new Error("Qdrant not initialized");
|
|
260
|
+
const filter = {
|
|
261
|
+
must: [
|
|
262
|
+
{
|
|
263
|
+
key: "text",
|
|
264
|
+
match: { text: term.toLowerCase() },
|
|
265
|
+
},
|
|
266
|
+
],
|
|
267
|
+
};
|
|
268
|
+
const results = [];
|
|
269
|
+
let offset = undefined;
|
|
270
|
+
// Paginate until all matching points are collected or limit is reached
|
|
271
|
+
do {
|
|
272
|
+
const page = await client.scroll(COLLECTION_NAME, {
|
|
273
|
+
filter,
|
|
274
|
+
with_payload: true,
|
|
275
|
+
with_vector: false,
|
|
276
|
+
limit: Math.min(100, limit - results.length),
|
|
277
|
+
...(offset !== undefined ? { offset } : {}),
|
|
278
|
+
});
|
|
279
|
+
for (const point of page.points) {
|
|
280
|
+
results.push({ id: point.id, ...point.payload });
|
|
146
281
|
}
|
|
147
|
-
|
|
148
|
-
|
|
282
|
+
offset = page.next_page_offset;
|
|
283
|
+
} while (offset != null && results.length < limit);
|
|
284
|
+
return results;
|
|
149
285
|
}
|
|
150
|
-
|
|
286
|
+
/**
|
|
287
|
+
* Upsert agent note với random UUID (không có fileId).
|
|
288
|
+
*/
|
|
289
|
+
export async function upsertAgentNote(text) {
|
|
151
290
|
await initVectorDB();
|
|
152
291
|
if (!client)
|
|
153
|
-
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
292
|
+
throw new Error("Qdrant not initialized");
|
|
293
|
+
const vector = await embedText(text);
|
|
294
|
+
await client.upsert(COLLECTION_NAME, {
|
|
295
|
+
wait: true,
|
|
296
|
+
points: [
|
|
297
|
+
{
|
|
298
|
+
id: uuidv4(),
|
|
299
|
+
vector,
|
|
300
|
+
payload: {
|
|
301
|
+
text,
|
|
302
|
+
title: "Agent Note",
|
|
303
|
+
block_index: 0,
|
|
304
|
+
block_hash: "",
|
|
305
|
+
source: "agent",
|
|
306
|
+
offset: 0,
|
|
307
|
+
},
|
|
308
|
+
},
|
|
309
|
+
],
|
|
164
310
|
});
|
|
165
|
-
|
|
166
|
-
for (const r of res.points) {
|
|
167
|
-
if (r.payload && r.payload.file_id && r.payload.modified_time) {
|
|
168
|
-
fileMap[r.payload.file_id] = r.payload.modified_time;
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
return fileMap;
|
|
311
|
+
console.error("Upserted agent note to Qdrant.");
|
|
172
312
|
}
|
package/dist/mcp-server.js
CHANGED
|
@@ -2,33 +2,22 @@
|
|
|
2
2
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
3
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
4
|
import { z } from "zod";
|
|
5
|
-
import { listDriveFiles, readDriveDocument
|
|
6
|
-
import {
|
|
7
|
-
import { config } from "./config.js";
|
|
8
|
-
const DRIVE_FOLDER_ID = config.DOC_MCP_DRIVE_FOLDER_ID;
|
|
9
|
-
if (!DRIVE_FOLDER_ID) {
|
|
10
|
-
console.error("Missing DOC_MCP_DRIVE_FOLDER_ID environment variable. The doc-agent requires a target folder ID.");
|
|
11
|
-
process.exit(1);
|
|
12
|
-
}
|
|
5
|
+
import { listDriveFiles, readDriveDocument } from "./tools/driveTools.js";
|
|
6
|
+
import { searchKnowledge, searchExact } from "./tools/knowledgeTools.js";
|
|
13
7
|
const server = new McpServer({
|
|
14
8
|
name: "doc-agent",
|
|
15
|
-
version: "1.
|
|
9
|
+
version: "1.2.0",
|
|
16
10
|
});
|
|
17
|
-
// Register tools
|
|
18
11
|
server.registerTool("list_drive_files", {
|
|
19
|
-
description: "List
|
|
12
|
+
description: "List all Google Drive documents accessible to this agent. Returns file IDs, names, and types. Use keyword to filter by title.",
|
|
20
13
|
inputSchema: {
|
|
21
14
|
keyword: z
|
|
22
15
|
.string()
|
|
23
16
|
.optional()
|
|
24
|
-
.describe("Optional keyword to
|
|
25
|
-
targetFolderId: z
|
|
26
|
-
.string()
|
|
27
|
-
.optional()
|
|
28
|
-
.describe("Optional Google Drive folder ID to list contents from. Defaults to the root knowledge folder."),
|
|
17
|
+
.describe("Optional keyword to filter documents by title"),
|
|
29
18
|
},
|
|
30
|
-
}, async ({ keyword
|
|
31
|
-
const res = await listDriveFiles(keyword
|
|
19
|
+
}, async ({ keyword }) => {
|
|
20
|
+
const res = await listDriveFiles(keyword);
|
|
32
21
|
if (!res.success) {
|
|
33
22
|
return {
|
|
34
23
|
content: [{ type: "text", text: `Error: ${res.error}` }],
|
|
@@ -40,11 +29,17 @@ server.registerTool("list_drive_files", {
|
|
|
40
29
|
};
|
|
41
30
|
});
|
|
42
31
|
server.registerTool("read_drive_document", {
|
|
43
|
-
description: "Read the content of a specific Google Drive document.
|
|
32
|
+
description: "Read the Markdown content of a specific Google Drive document. Automatically syncs the latest version. Use 'offset' (from search_knowledge results) to navigate to a specific section, and 'limit' to control how much content to return.",
|
|
44
33
|
inputSchema: {
|
|
45
34
|
fileId: z.string().describe("The Google Drive file ID to read"),
|
|
46
|
-
offset: z
|
|
47
|
-
|
|
35
|
+
offset: z
|
|
36
|
+
.number()
|
|
37
|
+
.optional()
|
|
38
|
+
.describe("Starting character index in the Markdown content (default: 0)"),
|
|
39
|
+
limit: z
|
|
40
|
+
.number()
|
|
41
|
+
.optional()
|
|
42
|
+
.describe("Maximum characters to return (default: 10000)"),
|
|
48
43
|
},
|
|
49
44
|
}, async ({ fileId, offset, limit }) => {
|
|
50
45
|
const res = await readDriveDocument(fileId, offset, limit);
|
|
@@ -58,13 +53,17 @@ server.registerTool("read_drive_document", {
|
|
|
58
53
|
content: [{ type: "text", text: JSON.stringify(res.data, null, 2) }],
|
|
59
54
|
};
|
|
60
55
|
});
|
|
61
|
-
server.registerTool("
|
|
62
|
-
description: "
|
|
56
|
+
server.registerTool("search_knowledge", {
|
|
57
|
+
description: "Semantic vector search across all accessible Google Drive documents. Automatically syncs latest document changes before searching. Returns relevant Markdown chunks with title and character offset.",
|
|
63
58
|
inputSchema: {
|
|
64
|
-
|
|
59
|
+
query: z.string().describe("The search query"),
|
|
60
|
+
topK: z
|
|
61
|
+
.number()
|
|
62
|
+
.optional()
|
|
63
|
+
.describe("Number of results to return (default: 3)"),
|
|
65
64
|
},
|
|
66
|
-
}, async ({
|
|
67
|
-
const res = await
|
|
65
|
+
}, async ({ query, topK }) => {
|
|
66
|
+
const res = await searchKnowledge(query, topK);
|
|
68
67
|
if (!res.success) {
|
|
69
68
|
return {
|
|
70
69
|
content: [{ type: "text", text: `Error: ${res.error}` }],
|
|
@@ -72,20 +71,32 @@ server.registerTool("save_agent_note", {
|
|
|
72
71
|
};
|
|
73
72
|
}
|
|
74
73
|
return {
|
|
75
|
-
content: [
|
|
74
|
+
content: [
|
|
75
|
+
{
|
|
76
|
+
type: "text",
|
|
77
|
+
text: typeof res.results === "string"
|
|
78
|
+
? res.results
|
|
79
|
+
: JSON.stringify(res.results),
|
|
80
|
+
},
|
|
81
|
+
],
|
|
76
82
|
};
|
|
77
83
|
});
|
|
78
|
-
server.registerTool("
|
|
79
|
-
description: "
|
|
84
|
+
server.registerTool("search_exact", {
|
|
85
|
+
description: "Exhaustive keyword search across all accessible Google Drive documents using full-text index. " +
|
|
86
|
+
"Unlike search_knowledge (semantic/vector), this finds EVERY chunk containing the exact term — " +
|
|
87
|
+
"ideal for specific identifiers: API paths (/v1/foo/bar), function names, config keys, error codes. " +
|
|
88
|
+
"Case-insensitive. Automatically syncs latest document changes before searching.",
|
|
80
89
|
inputSchema: {
|
|
81
|
-
|
|
82
|
-
|
|
90
|
+
term: z
|
|
91
|
+
.string()
|
|
92
|
+
.describe("Exact term to search for (e.g. '/product-orchestrator/v1/products/filter', 'ServiceCode.mkp')"),
|
|
93
|
+
limit: z
|
|
83
94
|
.number()
|
|
84
95
|
.optional()
|
|
85
|
-
.describe("
|
|
96
|
+
.describe("Max results to return (default: 50)"),
|
|
86
97
|
},
|
|
87
|
-
}, async ({
|
|
88
|
-
const res = await
|
|
98
|
+
}, async ({ term, limit }) => {
|
|
99
|
+
const res = await searchExact(term, limit);
|
|
89
100
|
if (!res.success) {
|
|
90
101
|
return {
|
|
91
102
|
content: [{ type: "text", text: `Error: ${res.error}` }],
|
|
@@ -98,16 +109,15 @@ server.registerTool("search_knowledge", {
|
|
|
98
109
|
type: "text",
|
|
99
110
|
text: typeof res.results === "string"
|
|
100
111
|
? res.results
|
|
101
|
-
: JSON.stringify(res
|
|
112
|
+
: JSON.stringify(res, null, 2),
|
|
102
113
|
},
|
|
103
114
|
],
|
|
104
115
|
};
|
|
105
116
|
});
|
|
106
|
-
// Start the server
|
|
107
117
|
async function run() {
|
|
108
118
|
const transport = new StdioServerTransport();
|
|
109
119
|
await server.connect(transport);
|
|
110
|
-
console.error("doc-agent MCP server running on stdio");
|
|
120
|
+
console.error("doc-agent MCP server v1.2.0 running on stdio");
|
|
111
121
|
}
|
|
112
122
|
run().catch((error) => {
|
|
113
123
|
console.error("Fatal error running server:", error);
|
|
@@ -1,21 +1,32 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* List all Google Docs the Service Account can read.
|
|
3
|
+
* Optional keyword filter on document title.
|
|
4
|
+
*/
|
|
5
|
+
export declare function listDriveFiles(keyword?: string): Promise<{
|
|
2
6
|
success: boolean;
|
|
3
|
-
results:
|
|
7
|
+
results: any[];
|
|
4
8
|
error?: undefined;
|
|
5
9
|
} | {
|
|
6
10
|
success: boolean;
|
|
7
11
|
error: any;
|
|
8
12
|
results?: undefined;
|
|
9
13
|
}>;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
/**
|
|
15
|
+
* Sync all documents the SA can see:
|
|
16
|
+
* - New/changed files → syncSingleDocument()
|
|
17
|
+
* - Files removed from Drive → delete from Qdrant + Redis
|
|
18
|
+
*/
|
|
19
|
+
export declare function syncAllDocuments(): Promise<{
|
|
20
|
+
success: boolean;
|
|
21
|
+
error?: undefined;
|
|
14
22
|
} | {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
content?: undefined;
|
|
23
|
+
success: boolean;
|
|
24
|
+
error: any;
|
|
18
25
|
}>;
|
|
26
|
+
/**
|
|
27
|
+
* Read a specific Google Drive document, triggering incremental sync first.
|
|
28
|
+
* Returns paginated Markdown content.
|
|
29
|
+
*/
|
|
19
30
|
export declare function readDriveDocument(fileId: string, offset?: number, limit?: number): Promise<{
|
|
20
31
|
success: boolean;
|
|
21
32
|
data: {
|
|
@@ -34,11 +45,4 @@ export declare function readDriveDocument(fileId: string, offset?: number, limit
|
|
|
34
45
|
error: any;
|
|
35
46
|
data?: undefined;
|
|
36
47
|
}>;
|
|
37
|
-
export declare function syncFolderState(folderId: string): Promise<{
|
|
38
|
-
success: boolean;
|
|
39
|
-
error?: undefined;
|
|
40
|
-
} | {
|
|
41
|
-
success: boolean;
|
|
42
|
-
error: any;
|
|
43
|
-
}>;
|
|
44
48
|
//# sourceMappingURL=driveTools.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"driveTools.d.ts","sourceRoot":"","sources":["../../src/tools/driveTools.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"driveTools.d.ts","sourceRoot":"","sources":["../../src/tools/driveTools.ts"],"names":[],"mappings":"AA4BA;;;GAGG;AACH,wBAAsB,cAAc,CAAC,OAAO,CAAC,EAAE,MAAM;;;;;;;;GA8BpD;AAED;;;;GAIG;AACH,wBAAsB,gBAAgB;;;;;;GAwDrC;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,MAAM,EACd,MAAM,GAAE,MAAU,EAClB,KAAK,GAAE,MAAc;;;;;;;;;;;;;;;;;GAoCtB"}
|