@khoinguyen2002/doc-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,39 @@
1
+ import { upsertProjectDocument, searchProjectMemory } from "../db/vector.js";
2
+ import { syncFolderState } from "./driveTools.js";
3
+ export async function saveAgentNote(content) {
4
+ const folderId = process.env.DOC_MCP_DRIVE_FOLDER_ID;
5
+ if (!folderId) {
6
+ return { success: false, error: "DOC_MCP_DRIVE_FOLDER_ID is not configured." };
7
+ }
8
+ try {
9
+ // We use folderId as the "projectId" parameter for vector-db namespace
10
+ await upsertProjectDocument(folderId, content, {
11
+ source: "agent",
12
+ });
13
+ return { success: true, message: "Successfully stored note in vector memory." };
14
+ }
15
+ catch (err) {
16
+ return { success: false, error: `Failed to store note: ${err.message}` };
17
+ }
18
+ }
19
+ export async function searchKnowledge(query, topK = 3) {
20
+ const folderId = process.env.DOC_MCP_DRIVE_FOLDER_ID;
21
+ if (!folderId) {
22
+ return { success: false, error: "DOC_MCP_DRIVE_FOLDER_ID is not configured." };
23
+ }
24
+ try {
25
+ // Auto-sync folder state before searching
26
+ await syncFolderState(folderId);
27
+ const results = await searchProjectMemory(folderId, query, topK);
28
+ if (!results || results.length === 0) {
29
+ return { success: true, results: "NOT_FOUND" };
30
+ }
31
+ return {
32
+ success: true,
33
+ results: results.map((r) => r.text).join("\n\n---\n\n"),
34
+ };
35
+ }
36
+ catch (err) {
37
+ return { success: false, error: `Failed to search: ${err.message}` };
38
+ }
39
+ }
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "@khoinguyen2002/doc-mcp",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "main": "dist/index.js",
6
+ "bin": {
7
+ "doc-mcp": "./dist/mcp-server.js"
8
+ },
9
+ "scripts": {
10
+ "build": "tsc",
11
+ "start": "node dist/mcp-server.js"
12
+ },
13
+ "dependencies": {
14
+ "@langchain/textsplitters": "^1.0.1",
15
+ "@modelcontextprotocol/sdk": "^1.29.0",
16
+ "@qdrant/js-client-rest": "^1.18.0",
17
+ "googleapis": "^173.0.0",
18
+ "uuid": "^14.0.0",
19
+ "zod": "^4.4.3"
20
+ },
21
+ "types": "dist/index.d.ts",
22
+ "exports": {
23
+ ".": "./dist/index.js"
24
+ },
25
+ "devDependencies": {
26
+ "@types/uuid": "^11.0.0"
27
+ }
28
+ }
package/src/config.ts ADDED
@@ -0,0 +1,29 @@
1
+ import { z } from "zod";
2
+
3
+ const schema = z.object({
4
+ DOC_MCP_DRIVE_FOLDER_ID: z.string().optional(),
5
+ DOC_MCP_GOOGLE_CLIENT_EMAIL: z.string().email().optional(),
6
+ DOC_MCP_GOOGLE_PRIVATE_KEY: z.string().optional(),
7
+
8
+ // Vector DB / Embeddings
9
+ QDRANT_URL: z.string().url().describe("The URL of your Qdrant instance"),
10
+ QDRANT_API_KEY: z.string().optional().describe("API Key for Qdrant Cloud (optional for local)"),
11
+ OPENROUTER_API_KEY: z.string().min(1),
12
+ EMBEDDING_MODEL_ID: z.string().default("nvidia/llama-nemotron-embed-vl-1b-v2:free"),
13
+ CHUNK_SIZE: z.coerce.number().int().positive().default(4000),
14
+ CHUNK_OVERLAP: z.coerce.number().int().nonnegative().default(500),
15
+ });
16
+
17
+ function loadConfig() {
18
+ const result = schema.safeParse(process.env);
19
+ if (!result.success) {
20
+ const missing = result.error.issues
21
+ .map((i) => ` ${i.path.join(".")}: ${i.message}`)
22
+ .join("\n");
23
+ throw new Error(`Invalid environment configuration for doc-mcp:\n${missing}`);
24
+ }
25
+ return result.data;
26
+ }
27
+
28
+ export const config = loadConfig();
29
+ export type Config = typeof config;
@@ -0,0 +1,185 @@
1
+ import { QdrantClient } from '@qdrant/js-client-rest';
2
+ import { v4 as uuidv4 } from 'uuid';
3
+ import { config } from '../config.js';
4
+
5
+ let client: QdrantClient | null = null;
6
+ const COLLECTION_NAME = 'project_memory';
7
+
8
+ export async function initVectorDB() {
9
+ if (!client) {
10
+ client = new QdrantClient({
11
+ url: config.QDRANT_URL,
12
+ apiKey: config.QDRANT_API_KEY,
13
+ });
14
+ console.error(`Connected to Qdrant at ${config.QDRANT_URL}`);
15
+
16
+ // Check if collection exists
17
+ const res = await client.getCollections();
18
+ const exists = res.collections.some(c => c.name === COLLECTION_NAME);
19
+ if (!exists) {
20
+ console.error(`Creating Qdrant collection: ${COLLECTION_NAME}`);
21
+ const dummyVector = await embedText("test");
22
+ const dimension = dummyVector.length;
23
+
24
+ await client.createCollection(COLLECTION_NAME, {
25
+ vectors: {
26
+ size: dimension,
27
+ distance: "Cosine",
28
+ },
29
+ });
30
+ await client.createPayloadIndex(COLLECTION_NAME, {
31
+ field_name: "projectId",
32
+ field_schema: "keyword",
33
+ });
34
+ await client.createPayloadIndex(COLLECTION_NAME, {
35
+ field_name: "file_id",
36
+ field_schema: "keyword",
37
+ });
38
+ await client.createPayloadIndex(COLLECTION_NAME, {
39
+ field_name: "source",
40
+ field_schema: "keyword",
41
+ });
42
+ console.error(`Collection ${COLLECTION_NAME} created with dimension ${dimension}.`);
43
+ }
44
+ }
45
+ }
46
+
47
+ export async function embedText(text: string): Promise<number[]> {
48
+ const response = await fetch("https://openrouter.ai/api/v1/embeddings", {
49
+ method: "POST",
50
+ headers: {
51
+ "Authorization": `Bearer ${config.OPENROUTER_API_KEY}`,
52
+ "Content-Type": "application/json"
53
+ },
54
+ body: JSON.stringify({
55
+ model: config.EMBEDDING_MODEL_ID,
56
+ input: text
57
+ })
58
+ });
59
+
60
+ if (!response.ok) {
61
+ const errText = await response.text();
62
+ throw new Error(`OpenRouter Embedding API failed: ${response.status} ${errText}`);
63
+ }
64
+
65
+ const json: any = await response.json();
66
+ if (!json.data || !json.data[0] || !json.data[0].embedding) {
67
+ throw new Error("Invalid response from OpenRouter Embedding API");
68
+ }
69
+
70
+ return json.data[0].embedding;
71
+ }
72
+
73
+ export async function upsertProjectDocument(projectId: string, text: string, metadata: Record<string, any> = {}): Promise<void> {
74
+ await initVectorDB();
75
+ if (!client) throw new Error("Qdrant not initialized");
76
+
77
+ const vector = await embedText(text);
78
+
79
+ await client.upsert(COLLECTION_NAME, {
80
+ wait: true,
81
+ points: [
82
+ {
83
+ id: uuidv4(),
84
+ vector: vector,
85
+ payload: {
86
+ projectId,
87
+ text,
88
+ source: metadata.source || "user",
89
+ file_id: metadata.file_id || null,
90
+ modified_time: metadata.modified_time || null,
91
+ metadata: JSON.stringify(metadata),
92
+ createdAt: new Date().toISOString()
93
+ }
94
+ }
95
+ ]
96
+ });
97
+
98
+ console.error(`Upserted document chunk for project ${projectId}`);
99
+ }
100
+
101
+ export async function searchProjectMemory(projectId: string, query: string, topK: number = 3): Promise<any[]> {
102
+ await initVectorDB();
103
+ if (!client) throw new Error("Qdrant not initialized");
104
+
105
+ const queryVector = await embedText(query);
106
+
107
+ const results = await client.search(COLLECTION_NAME, {
108
+ vector: queryVector,
109
+ limit: topK,
110
+ with_payload: true,
111
+ filter: {
112
+ must: [
113
+ {
114
+ key: "projectId",
115
+ match: {
116
+ value: projectId
117
+ }
118
+ }
119
+ ]
120
+ }
121
+ });
122
+
123
+ // Map to match LanceDB format expected by other tools
124
+ return results.map(r => ({
125
+ id: r.id,
126
+ vector: r.vector,
127
+ ...r.payload
128
+ }));
129
+ }
130
+
131
+ export async function deleteProjectDocument(projectId: string, fileId: string): Promise<void> {
132
+ await initVectorDB();
133
+ if (!client) return;
134
+
135
+ await client.delete(COLLECTION_NAME, {
136
+ filter: {
137
+ must: [
138
+ { key: "projectId", match: { value: projectId } },
139
+ { key: "file_id", match: { value: fileId } }
140
+ ]
141
+ }
142
+ });
143
+ console.error(`Deleted old chunks from Qdrant for ${projectId} / ${fileId}`);
144
+ }
145
+
146
+ export async function checkProjectDocumentExists(projectId: string, fileId: string): Promise<boolean> {
147
+ await initVectorDB();
148
+ if (!client) return false;
149
+
150
+ const res = await client.count(COLLECTION_NAME, {
151
+ filter: {
152
+ must: [
153
+ { key: "projectId", match: { value: projectId } },
154
+ { key: "file_id", match: { value: fileId } }
155
+ ]
156
+ }
157
+ });
158
+ return res.count > 0;
159
+ }
160
+
161
+ export async function getProjectDocumentMetadata(projectId: string): Promise<Record<string, string>> {
162
+ await initVectorDB();
163
+ if (!client) return {};
164
+
165
+ const res = await client.scroll(COLLECTION_NAME, {
166
+ filter: {
167
+ must: [
168
+ { key: "projectId", match: { value: projectId } },
169
+ { key: "source", match: { value: "google_drive" } }
170
+ ]
171
+ },
172
+ limit: 10000,
173
+ with_payload: ["file_id", "modified_time"],
174
+ with_vector: false
175
+ });
176
+
177
+ const fileMap: Record<string, string> = {};
178
+ for (const r of res.points) {
179
+ if (r.payload && r.payload.file_id && r.payload.modified_time) {
180
+ fileMap[r.payload.file_id as string] = r.payload.modified_time as string;
181
+ }
182
+ }
183
+
184
+ return fileMap;
185
+ }
@@ -0,0 +1,108 @@
1
+ import {
2
+ getProjectDocumentMetadata,
3
+ deleteProjectDocument,
4
+ upsertProjectDocument,
5
+ } from "../db/vector.js";
6
+ import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
7
+ // Remove childLogger dependency to fully decouple from core
8
+ // import { childLogger } from "@workspace/core";
9
+ import { config } from "../config.js";
10
+
11
+ const log = {
12
+ info: (obj: any, msg: string) => console.log(`[driveSync] ${msg}`, obj),
13
+ error: (obj: any, msg: string) => console.error(`[driveSync] ${msg}`, obj)
14
+ };
15
+
16
+ export async function syncProjectDriveFiles(
17
+ projectId: string,
18
+ onSyncMessage?: (msg: string) => void,
19
+ ): Promise<void> {
20
+ const dbMeta = await getProjectDocumentMetadata(projectId);
21
+ const fileIds = Object.keys(dbMeta);
22
+
23
+ if (fileIds.length === 0) return;
24
+
25
+ log.info({ projectId, fileCount: fileIds.length }, "Checking Drive files for updates...");
26
+
27
+ const { google } = await import("googleapis");
28
+
29
+ const clientEmail = config.DOC_MCP_GOOGLE_CLIENT_EMAIL;
30
+ let privateKey = config.DOC_MCP_GOOGLE_PRIVATE_KEY;
31
+ if (!clientEmail || !privateKey) return;
32
+ if (privateKey.startsWith('"') && privateKey.endsWith('"')) {
33
+ privateKey = privateKey.slice(1, -1);
34
+ }
35
+ privateKey = privateKey.replace(/\\n/g, "\n");
36
+
37
+ const auth = new google.auth.JWT({
38
+ email: clientEmail,
39
+ key: privateKey,
40
+ scopes: ["https://www.googleapis.com/auth/drive.readonly"],
41
+ });
42
+
43
+ const drive = google.drive({ version: "v3", auth });
44
+
45
+ let updatedCount = 0;
46
+ for (const fileId of fileIds) {
47
+ try {
48
+ const fileInfo = await drive.files.get({
49
+ fileId: fileId,
50
+ fields: "id, name, modifiedTime, trashed",
51
+ supportsAllDrives: true,
52
+ });
53
+
54
+ if (fileInfo.data.trashed) {
55
+ log.info({ fileId, projectId }, "File trashed on Drive, deleting from VectorDB...");
56
+ await deleteProjectDocument(projectId, fileId);
57
+ continue;
58
+ }
59
+
60
+ const driveModifiedTime = fileInfo.data.modifiedTime || "";
61
+ const dbModifiedTime = dbMeta[fileId];
62
+
63
+ if (driveModifiedTime !== dbModifiedTime) {
64
+ if (onSyncMessage) {
65
+ onSyncMessage(`🔄 Syncing updated file: ${fileInfo.data.name}...`);
66
+ }
67
+
68
+ log.info({ fileId, projectId }, "File updated on Drive, syncing...");
69
+ await deleteProjectDocument(projectId, fileId);
70
+
71
+ const res = await drive.files.export({
72
+ fileId: fileId,
73
+ mimeType: "text/plain",
74
+ });
75
+
76
+ const content = res.data;
77
+ if (typeof content === "string" && content.trim() !== "") {
78
+ const splitter = new RecursiveCharacterTextSplitter({
79
+ chunkSize: config.CHUNK_SIZE,
80
+ chunkOverlap: config.CHUNK_OVERLAP,
81
+ });
82
+ const chunks = await splitter.splitText(content);
83
+
84
+ for (const chunk of chunks) {
85
+ await upsertProjectDocument(projectId, chunk, {
86
+ title: fileInfo.data.name,
87
+ file_id: fileId,
88
+ source: "google_drive",
89
+ modified_time: driveModifiedTime,
90
+ });
91
+ }
92
+ updatedCount++;
93
+ }
94
+ }
95
+ } catch (err: any) {
96
+ if (err.code === 404) {
97
+ log.info({ fileId, projectId }, "File not found on Drive, deleting from VectorDB...");
98
+ await deleteProjectDocument(projectId, fileId);
99
+ } else {
100
+ log.error({ fileId, err: err.message }, "Error syncing drive file");
101
+ }
102
+ }
103
+ }
104
+
105
+ if (updatedCount > 0 && onSyncMessage) {
106
+ onSyncMessage(`✅ Synced ${updatedCount} files from Google Drive.`);
107
+ }
108
+ }
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ // The doc-agent is now an MCP Server.
2
+ // Use `npx doc-agent` or `node dist/mcp-server.js` to run it.
3
+ // We no longer export tools directly as JS functions.
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { z } from "zod";
5
+ import {
6
+ listDriveFiles,
7
+ readDriveDocument,
8
+ } from "./tools/driveTools.js";
9
+ import { saveAgentNote, searchKnowledge } from "./tools/knowledgeTools.js";
10
+ import { config } from "./config.js";
11
+
12
+ const DRIVE_FOLDER_ID = config.DOC_MCP_DRIVE_FOLDER_ID;
13
+
14
+ if (!DRIVE_FOLDER_ID) {
15
+ console.error(
16
+ "Missing DOC_MCP_DRIVE_FOLDER_ID environment variable. The doc-agent requires a target folder ID.",
17
+ );
18
+ process.exit(1);
19
+ }
20
+
21
+ const server = new McpServer({
22
+ name: "doc-agent",
23
+ version: "1.0.0",
24
+ });
25
+
26
+ // Register tools
27
+ server.registerTool(
28
+ "list_drive_files",
29
+ {
30
+ description: "List and search for Google Drive documents in the configured folder.",
31
+ inputSchema: {
32
+ keyword: z
33
+ .string()
34
+ .optional()
35
+ .describe("Optional keyword to search for in document titles"),
36
+ },
37
+ },
38
+ async ({ keyword }) => {
39
+ const res = await listDriveFiles(keyword);
40
+ if (!res.success) {
41
+ return {
42
+ content: [{ type: "text", text: `Error: ${res.error}` }],
43
+ isError: true,
44
+ };
45
+ }
46
+ return {
47
+ content: [{ type: "text", text: JSON.stringify(res.results, null, 2) }],
48
+ };
49
+ },
50
+ );
51
+
52
+ server.registerTool(
53
+ "read_drive_document",
54
+ {
55
+ description:
56
+ "Read the content of a specific Google Drive document. The document will also be automatically ingested into vector memory for future semantic search.",
57
+ inputSchema: {
58
+ fileId: z.string().describe("The Google Drive file ID to read"),
59
+ },
60
+ },
61
+ async ({ fileId }) => {
62
+ const res = await readDriveDocument(fileId);
63
+ if (!res.success) {
64
+ return {
65
+ content: [{ type: "text", text: `Error: ${res.error}` }],
66
+ isError: true,
67
+ };
68
+ }
69
+ return {
70
+ content: [{ type: "text", text: res.content || "No content found." }],
71
+ };
72
+ },
73
+ );
74
+
75
+ server.registerTool(
76
+ "save_agent_note",
77
+ {
78
+ description: "Save an agent note, thought, or summary directly into the vector memory.",
79
+ inputSchema: {
80
+ content: z.string().describe("The note or knowledge content to store"),
81
+ },
82
+ },
83
+ async ({ content }) => {
84
+ const res = await saveAgentNote(content);
85
+ if (!res.success) {
86
+ return {
87
+ content: [{ type: "text", text: `Error: ${res.error}` }],
88
+ isError: true,
89
+ };
90
+ }
91
+ return {
92
+ content: [{ type: "text", text: res.message || "Saved successfully" }],
93
+ };
94
+ },
95
+ );
96
+
97
+ server.registerTool(
98
+ "search_knowledge",
99
+ {
100
+ description:
101
+ "Search the folder's vector memory for relevant context or knowledge.",
102
+ inputSchema: {
103
+ query: z.string().describe("The search query"),
104
+ topK: z
105
+ .number()
106
+ .optional()
107
+ .describe("Number of results to return (default: 3)"),
108
+ },
109
+ },
110
+ async ({ query, topK }) => {
111
+ const res = await searchKnowledge(query, topK);
112
+ if (!res.success) {
113
+ return {
114
+ content: [{ type: "text", text: `Error: ${res.error}` }],
115
+ isError: true,
116
+ };
117
+ }
118
+ return {
119
+ content: [
120
+ {
121
+ type: "text",
122
+ text:
123
+ typeof res.results === "string"
124
+ ? res.results
125
+ : JSON.stringify(res.results),
126
+ },
127
+ ],
128
+ };
129
+ },
130
+ );
131
+
132
+ // Start the server
133
+ async function run() {
134
+ const transport = new StdioServerTransport();
135
+ await server.connect(transport);
136
+ console.error("doc-agent MCP server running on stdio");
137
+ }
138
+
139
+ run().catch((error) => {
140
+ console.error("Fatal error running server:", error);
141
+ process.exit(1);
142
+ });