@khoinguyen2002/doc-mcp 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +6 -4
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +22 -7
- package/dist/db/rateLimiter.d.ts +6 -0
- package/dist/db/rateLimiter.d.ts.map +1 -0
- package/dist/db/rateLimiter.js +20 -0
- package/dist/db/syncState.d.ts +12 -0
- package/dist/db/syncState.d.ts.map +1 -0
- package/dist/db/syncState.js +69 -0
- package/dist/db/vector.d.ts +61 -6
- package/dist/db/vector.d.ts.map +1 -1
- package/dist/db/vector.js +249 -109
- package/dist/mcp-server.js +44 -23
- package/dist/tools/driveTools.d.ts +20 -16
- package/dist/tools/driveTools.d.ts.map +1 -1
- package/dist/tools/driveTools.js +100 -149
- package/dist/tools/ingestFlow.d.ts +8 -0
- package/dist/tools/ingestFlow.d.ts.map +1 -0
- package/dist/tools/ingestFlow.js +407 -0
- package/dist/tools/knowledgeTools.d.ts +25 -6
- package/dist/tools/knowledgeTools.d.ts.map +1 -1
- package/dist/tools/knowledgeTools.js +29 -40
- package/package.json +8 -1
- package/src/config.ts +28 -9
- package/src/db/rateLimiter.ts +25 -0
- package/src/db/syncState.ts +87 -0
- package/src/db/vector.ts +305 -115
- package/src/mcp-server.ts +55 -33
- package/src/tools/driveTools.ts +111 -175
- package/src/tools/ingestFlow.ts +508 -0
- package/src/tools/knowledgeTools.ts +34 -38
- package/src/types/turndown-plugin-gfm.d.ts +8 -0
package/dist/config.d.ts
CHANGED
|
@@ -2,12 +2,14 @@ export declare const config: {
|
|
|
2
2
|
QDRANT_URL: string;
|
|
3
3
|
OPENROUTER_API_KEY: string;
|
|
4
4
|
EMBEDDING_MODEL_ID: string;
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
MAX_CHUNK_SIZE: number;
|
|
6
|
+
EMBEDDING_MAX_TOKENS: number;
|
|
7
|
+
EMBEDDING_RPM: number;
|
|
8
|
+
UPSTASH_REDIS_REST_URL: string;
|
|
9
|
+
UPSTASH_REDIS_REST_TOKEN: string;
|
|
8
10
|
DOC_MCP_GOOGLE_CLIENT_EMAIL?: string | undefined;
|
|
9
11
|
DOC_MCP_GOOGLE_PRIVATE_KEY?: string | undefined;
|
|
10
12
|
QDRANT_API_KEY?: string | undefined;
|
|
13
|
+
VISION_MODEL_ID?: string | undefined;
|
|
11
14
|
};
|
|
12
|
-
export type Config = typeof config;
|
|
13
15
|
//# sourceMappingURL=config.d.ts.map
|
package/dist/config.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AA+CA,eAAO,MAAM,MAAM;;;;;;;;;;;;;CAAe,CAAC"}
|
package/dist/config.js
CHANGED
|
@@ -1,15 +1,30 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
const schema = z.object({
|
|
3
|
-
DOC_MCP_DRIVE_FOLDER_ID: z.string().optional(),
|
|
4
3
|
DOC_MCP_GOOGLE_CLIENT_EMAIL: z.string().email().optional(),
|
|
5
4
|
DOC_MCP_GOOGLE_PRIVATE_KEY: z.string().optional(),
|
|
6
|
-
// Vector DB
|
|
5
|
+
// Vector DB
|
|
7
6
|
QDRANT_URL: z.string().url().describe("The URL of your Qdrant instance"),
|
|
8
|
-
QDRANT_API_KEY: z
|
|
7
|
+
QDRANT_API_KEY: z
|
|
8
|
+
.string()
|
|
9
|
+
.optional()
|
|
10
|
+
.describe("API Key for Qdrant Cloud (optional for local)"),
|
|
11
|
+
// Embeddings
|
|
9
12
|
OPENROUTER_API_KEY: z.string().min(1),
|
|
10
|
-
EMBEDDING_MODEL_ID: z
|
|
11
|
-
|
|
12
|
-
|
|
13
|
+
EMBEDDING_MODEL_ID: z
|
|
14
|
+
.string()
|
|
15
|
+
.default("nvidia/llama-nemotron-embed-vl-1b-v2:free"),
|
|
16
|
+
// Max chunk size in Markdown chars — system may use a smaller value if
|
|
17
|
+
// the embedding model's token budget requires it (see ingestFlow.ts)
|
|
18
|
+
MAX_CHUNK_SIZE: z.coerce.number().int().positive().default(3000),
|
|
19
|
+
// Max tokens per embedding API call (for batch packing)
|
|
20
|
+
EMBEDDING_MAX_TOKENS: z.coerce.number().int().positive().default(32000),
|
|
21
|
+
// Max embedding API requests per minute
|
|
22
|
+
EMBEDDING_RPM: z.coerce.number().int().positive().default(40),
|
|
23
|
+
// Vision LLM model ID for image descriptions (optional, skip if not set)
|
|
24
|
+
VISION_MODEL_ID: z.string().optional(),
|
|
25
|
+
// Upstash Redis (for sync state)
|
|
26
|
+
UPSTASH_REDIS_REST_URL: z.string().url(),
|
|
27
|
+
UPSTASH_REDIS_REST_TOKEN: z.string().min(1),
|
|
13
28
|
});
|
|
14
29
|
function loadConfig() {
|
|
15
30
|
const result = schema.safeParse(process.env);
|
|
@@ -17,7 +32,7 @@ function loadConfig() {
|
|
|
17
32
|
const missing = result.error.issues
|
|
18
33
|
.map((i) => ` ${i.path.join(".")}: ${i.message}`)
|
|
19
34
|
.join("\n");
|
|
20
|
-
throw new Error(`
|
|
35
|
+
throw new Error(`doc-mcp configuration error:\n${missing}\n\nPlease check your environment variables.`);
|
|
21
36
|
}
|
|
22
37
|
return result.data;
|
|
23
38
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rateLimiter.d.ts","sourceRoot":"","sources":["../../src/db/rateLimiter.ts"],"names":[],"mappings":"AASA;;;GAGG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAWtD"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { config } from "../config.js";
|
|
2
|
+
const MIN_GAP_MS = Math.ceil(60000 / config.EMBEDDING_RPM);
|
|
3
|
+
let lastCallTime = 0;
|
|
4
|
+
function sleep(ms) {
|
|
5
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Call này trước mỗi embedBatch() để đảm bảo không vượt EMBEDDING_RPM.
|
|
9
|
+
* Sliding window đơn giản: enforce minimum gap = 60000 / RPM giữa các lần gọi.
|
|
10
|
+
*/
|
|
11
|
+
export async function waitForRateLimit() {
|
|
12
|
+
const now = Date.now();
|
|
13
|
+
const elapsed = now - lastCallTime;
|
|
14
|
+
if (elapsed < MIN_GAP_MS) {
|
|
15
|
+
const waitMs = MIN_GAP_MS - elapsed;
|
|
16
|
+
console.error(`[RateLimit] Waiting ${waitMs}ms before next embedding call...`);
|
|
17
|
+
await sleep(waitMs);
|
|
18
|
+
}
|
|
19
|
+
lastCallTime = Date.now();
|
|
20
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface SyncEntry {
|
|
2
|
+
modifiedTime: string;
|
|
3
|
+
blockCount: number;
|
|
4
|
+
title: string;
|
|
5
|
+
}
|
|
6
|
+
export declare function getAllSyncEntries(): Promise<Record<string, SyncEntry>>;
|
|
7
|
+
export declare function getSyncEntry(fileId: string): Promise<SyncEntry | null>;
|
|
8
|
+
export declare function setSyncEntry(fileId: string, entry: SyncEntry): Promise<void>;
|
|
9
|
+
export declare function deleteSyncEntry(fileId: string): Promise<void>;
|
|
10
|
+
export declare function getImageDesc(imageHash: string): Promise<string | null>;
|
|
11
|
+
export declare function setImageDesc(imageHash: string, description: string): Promise<void>;
|
|
12
|
+
//# sourceMappingURL=syncState.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"syncState.d.ts","sourceRoot":"","sources":["../../src/db/syncState.ts"],"names":[],"mappings":"AAiBA,MAAM,WAAW,SAAS;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAkB5E;AAED,wBAAsB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAW5E;AAED,wBAAsB,YAAY,CAChC,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,SAAS,GACf,OAAO,CAAC,IAAI,CAAC,CAGf;AAED,wBAAsB,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGnE;AAOD,wBAAsB,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAI5E;AAED,wBAAsB,YAAY,CAChC,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,IAAI,CAAC,CAGf"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { Redis } from "@upstash/redis";
|
|
2
|
+
import { config } from "../config.js";
|
|
3
|
+
const HASH_KEY = "doc_sync_state";
|
|
4
|
+
let _redis = null;
|
|
5
|
+
function getRedis() {
|
|
6
|
+
if (!_redis) {
|
|
7
|
+
_redis = new Redis({
|
|
8
|
+
url: config.UPSTASH_REDIS_REST_URL,
|
|
9
|
+
token: config.UPSTASH_REDIS_REST_TOKEN,
|
|
10
|
+
});
|
|
11
|
+
}
|
|
12
|
+
return _redis;
|
|
13
|
+
}
|
|
14
|
+
export async function getAllSyncEntries() {
|
|
15
|
+
const redis = getRedis();
|
|
16
|
+
const raw = await redis.hgetall(HASH_KEY);
|
|
17
|
+
if (!raw)
|
|
18
|
+
return {};
|
|
19
|
+
const result = {};
|
|
20
|
+
for (const [fileId, value] of Object.entries(raw)) {
|
|
21
|
+
if (!value)
|
|
22
|
+
continue;
|
|
23
|
+
try {
|
|
24
|
+
result[fileId] =
|
|
25
|
+
typeof value === "string"
|
|
26
|
+
? JSON.parse(value)
|
|
27
|
+
: value;
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
// skip malformed entries
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return result;
|
|
34
|
+
}
|
|
35
|
+
export async function getSyncEntry(fileId) {
|
|
36
|
+
const redis = getRedis();
|
|
37
|
+
const raw = await redis.hget(HASH_KEY, fileId);
|
|
38
|
+
if (!raw)
|
|
39
|
+
return null;
|
|
40
|
+
try {
|
|
41
|
+
return typeof raw === "string"
|
|
42
|
+
? JSON.parse(raw)
|
|
43
|
+
: raw;
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
export async function setSyncEntry(fileId, entry) {
|
|
50
|
+
const redis = getRedis();
|
|
51
|
+
await redis.hset(HASH_KEY, { [fileId]: JSON.stringify(entry) });
|
|
52
|
+
}
|
|
53
|
+
export async function deleteSyncEntry(fileId) {
|
|
54
|
+
const redis = getRedis();
|
|
55
|
+
await redis.hdel(HASH_KEY, fileId);
|
|
56
|
+
}
|
|
57
|
+
// ─── Image Description Cache ──────────────────────────────────────────────────
|
|
58
|
+
// Global hash: md5(imageBinary) → description text
|
|
59
|
+
// Deduplicates across docs (same image used in multiple files reuses description)
|
|
60
|
+
const IMG_DESC_KEY = "img_desc";
|
|
61
|
+
export async function getImageDesc(imageHash) {
|
|
62
|
+
const redis = getRedis();
|
|
63
|
+
const raw = await redis.hget(IMG_DESC_KEY, imageHash);
|
|
64
|
+
return raw ? String(raw) : null;
|
|
65
|
+
}
|
|
66
|
+
export async function setImageDesc(imageHash, description) {
|
|
67
|
+
const redis = getRedis();
|
|
68
|
+
await redis.hset(IMG_DESC_KEY, { [imageHash]: description });
|
|
69
|
+
}
|
package/dist/db/vector.d.ts
CHANGED
|
@@ -1,8 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic Qdrant point ID: uuidv5(fileId:blockIndex, NS)
|
|
3
|
+
* Same input → same ID → upsert overwrites correctly.
|
|
4
|
+
*/
|
|
5
|
+
export declare function getBlockPointId(fileId: string, blockIndex: number): string;
|
|
1
6
|
export declare function initVectorDB(): Promise<void>;
|
|
2
|
-
export declare function embedText(text: string): Promise<number[]>;
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
export declare function
|
|
7
|
+
export declare function embedText(text: string, maxRetries?: number): Promise<number[]>;
|
|
8
|
+
/**
|
|
9
|
+
* Embed nhiều texts trong 1 API call (batch).
|
|
10
|
+
* OpenRouter hỗ trợ input: string[] → trả data[i].embedding.
|
|
11
|
+
*/
|
|
12
|
+
export declare function embedBatch(texts: string[], maxRetries?: number): Promise<number[][]>;
|
|
13
|
+
export interface ChunkUpsert {
|
|
14
|
+
pointId: string;
|
|
15
|
+
vector: number[];
|
|
16
|
+
text: string;
|
|
17
|
+
title: string;
|
|
18
|
+
blockIndex: number;
|
|
19
|
+
blockHash: string;
|
|
20
|
+
source: string;
|
|
21
|
+
offset: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Bulk upsert nhiều chunks vào Qdrant trong 1 HTTP call.
|
|
25
|
+
*/
|
|
26
|
+
export declare function upsertChunkBatch(chunks: ChunkUpsert[]): Promise<void>;
|
|
27
|
+
/**
|
|
28
|
+
* Fetch block_hash AND offset for a list of point IDs.
|
|
29
|
+
* Used to diff block-level changes during re-sync (hash) and
|
|
30
|
+
* detect stale offsets in unchanged blocks (offset).
|
|
31
|
+
*/
|
|
32
|
+
export declare function getBlockMetaByIds(pointIds: string[]): Promise<Record<string, {
|
|
33
|
+
hash: string;
|
|
34
|
+
offset: number;
|
|
35
|
+
}>>;
|
|
36
|
+
/**
|
|
37
|
+
* Update only the `offset` payload field for a set of points (no re-embedding).
|
|
38
|
+
* Called for unchanged blocks whose character position shifted due to earlier edits.
|
|
39
|
+
* Uses parallel setPayload calls (lightweight metadata-only updates).
|
|
40
|
+
*/
|
|
41
|
+
export declare function updateBlockOffsets(updates: {
|
|
42
|
+
pointId: string;
|
|
43
|
+
offset: number;
|
|
44
|
+
}[]): Promise<void>;
|
|
45
|
+
/**
|
|
46
|
+
* Xóa Qdrant points theo danh sách IDs.
|
|
47
|
+
*/
|
|
48
|
+
export declare function deletePointsByIds(pointIds: string[]): Promise<void>;
|
|
49
|
+
/**
|
|
50
|
+
* Global semantic search — không filter theo folder hay file.
|
|
51
|
+
*/
|
|
52
|
+
export declare function searchProjectMemory(query: string, topK?: number): Promise<any[]>;
|
|
53
|
+
/**
|
|
54
|
+
* Exhaustive full-text search using Qdrant's inverted index on the `text` field.
|
|
55
|
+
* Uses whitespace tokenizer → API paths like /v1/foo/bar match as single tokens.
|
|
56
|
+
* Paginates through all results server-side (no full collection scan in JS).
|
|
57
|
+
*/
|
|
58
|
+
export declare function exactSearchChunks(term: string, limit?: number): Promise<any[]>;
|
|
59
|
+
/**
|
|
60
|
+
* Upsert agent note với random UUID (không có fileId).
|
|
61
|
+
*/
|
|
62
|
+
export declare function upsertAgentNote(text: string): Promise<void>;
|
|
8
63
|
//# sourceMappingURL=vector.d.ts.map
|
package/dist/db/vector.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/db/vector.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/db/vector.ts"],"names":[],"mappings":"AAUA;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM,CAE1E;AAED,wBAAsB,YAAY,kBAgDjC;AAED,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,EAAE,CAAC,CA+CnB;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAiDrB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAqB3E;AAED;;;;GAIG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAmB3D;AAED;;;;GAIG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,EAAE;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,OAAO,CAAC,IAAI,CAAC,CAef;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CASzE;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,MAAU,GACf,OAAO,CAAC,GAAG,EAAE,CAAC,CAoBhB;AAED;;;;GAIG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,KAAK,GAAE,MAAW,GACjB,OAAO,CAAC,GAAG,EAAE,CAAC,CAkChB;AAED;;GAEG;AACH,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAuBjE"}
|