@khoinguyen2002/doc-mcp 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db/vector.d.ts +6 -3
- package/dist/db/vector.d.ts.map +1 -1
- package/dist/db/vector.js +18 -15
- package/dist/tools/ingestFlow.d.ts.map +1 -1
- package/dist/tools/ingestFlow.js +1 -0
- package/dist/tools/knowledgeTools.d.ts +2 -0
- package/dist/tools/knowledgeTools.d.ts.map +1 -1
- package/dist/tools/knowledgeTools.js +2 -0
- package/package.json +1 -1
- package/src/db/vector.ts +22 -20
- package/src/tools/ingestFlow.ts +1 -0
- package/src/tools/knowledgeTools.ts +2 -0
package/dist/db/vector.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ export interface ChunkUpsert {
|
|
|
15
15
|
vector: number[];
|
|
16
16
|
text: string;
|
|
17
17
|
title: string;
|
|
18
|
+
fileId: string;
|
|
18
19
|
blockIndex: number;
|
|
19
20
|
blockHash: string;
|
|
20
21
|
source: string;
|
|
@@ -51,9 +52,11 @@ export declare function deletePointsByIds(pointIds: string[]): Promise<void>;
|
|
|
51
52
|
*/
|
|
52
53
|
export declare function searchProjectMemory(query: string, topK?: number): Promise<any[]>;
|
|
53
54
|
/**
|
|
54
|
-
* Exhaustive
|
|
55
|
-
*
|
|
56
|
-
*
|
|
55
|
+
* Exhaustive substring search: scrolls ALL points and filters client-side.
|
|
56
|
+
* More reliable than Qdrant full-text filter (whitespace tokenizer doesn't
|
|
57
|
+
* strip surrounding punctuation, causing false negatives for terms like
|
|
58
|
+
* "ServiceCode.mkp" appearing as "ServiceCode.mkp)" in headings).
|
|
59
|
+
* For typical collection sizes (~few hundred chunks) the O(N) cost is negligible.
|
|
57
60
|
*/
|
|
58
61
|
export declare function exactSearchChunks(term: string, limit?: number): Promise<any[]>;
|
|
59
62
|
/**
|
package/dist/db/vector.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/db/vector.ts"],"names":[],"mappings":"AAUA;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM,CAE1E;AAED,wBAAsB,YAAY,
|
|
1
|
+
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/db/vector.ts"],"names":[],"mappings":"AAUA;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM,CAE1E;AAED,wBAAsB,YAAY,kBAoDjC;AAED,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,EAAE,CAAC,CA+CnB;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAiDrB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAsB3E;AAED;;;;GAIG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAmB3D;AAED;;;;GAIG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,EAAE;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,OAAO,CAAC,IAAI,CAAC,CAef;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CASzE;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,MAAU,GACf,OAAO,CAAC,GAAG,EAAE,CAAC,CAoBhB;AAED;;;;;;GAMG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,KAAK,GAAE,MAAW,GACjB,OAAO,CAAC,GAAG,EAAE,CAAC,CA4BhB;AAED;;GAEG;AACH,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAuBjE"}
|
package/dist/db/vector.js
CHANGED
|
@@ -32,6 +32,10 @@ export async function initVectorDB() {
|
|
|
32
32
|
field_name: "source",
|
|
33
33
|
field_schema: "keyword",
|
|
34
34
|
});
|
|
35
|
+
await client.createPayloadIndex(COLLECTION_NAME, {
|
|
36
|
+
field_name: "file_id",
|
|
37
|
+
field_schema: "keyword",
|
|
38
|
+
});
|
|
35
39
|
await client.createPayloadIndex(COLLECTION_NAME, {
|
|
36
40
|
field_name: "block_index",
|
|
37
41
|
field_schema: "integer",
|
|
@@ -160,6 +164,7 @@ export async function upsertChunkBatch(chunks) {
|
|
|
160
164
|
payload: {
|
|
161
165
|
text: c.text,
|
|
162
166
|
title: c.title,
|
|
167
|
+
file_id: c.fileId,
|
|
163
168
|
block_index: c.blockIndex,
|
|
164
169
|
block_hash: c.blockHash,
|
|
165
170
|
source: c.source,
|
|
@@ -249,35 +254,33 @@ export async function searchProjectMemory(query, topK = 3) {
|
|
|
249
254
|
}
|
|
250
255
|
}
|
|
251
256
|
/**
|
|
252
|
-
* Exhaustive
|
|
253
|
-
*
|
|
254
|
-
*
|
|
257
|
+
* Exhaustive substring search: scrolls ALL points and filters client-side.
|
|
258
|
+
* More reliable than Qdrant full-text filter (whitespace tokenizer doesn't
|
|
259
|
+
* strip surrounding punctuation, causing false negatives for terms like
|
|
260
|
+
* "ServiceCode.mkp" appearing as "ServiceCode.mkp)" in headings).
|
|
261
|
+
* For typical collection sizes (~few hundred chunks) the O(N) cost is negligible.
|
|
255
262
|
*/
|
|
256
263
|
export async function exactSearchChunks(term, limit = 50) {
|
|
257
264
|
await initVectorDB();
|
|
258
265
|
if (!client)
|
|
259
266
|
throw new Error("Qdrant not initialized");
|
|
260
|
-
const
|
|
261
|
-
must: [
|
|
262
|
-
{
|
|
263
|
-
key: "text",
|
|
264
|
-
match: { text: term.toLowerCase() },
|
|
265
|
-
},
|
|
266
|
-
],
|
|
267
|
-
};
|
|
267
|
+
const lowerTerm = term.toLowerCase();
|
|
268
268
|
const results = [];
|
|
269
269
|
let offset = undefined;
|
|
270
|
-
// Paginate until all matching points are collected or limit is reached
|
|
271
270
|
do {
|
|
272
271
|
const page = await client.scroll(COLLECTION_NAME, {
|
|
273
|
-
filter,
|
|
274
272
|
with_payload: true,
|
|
275
273
|
with_vector: false,
|
|
276
|
-
limit:
|
|
274
|
+
limit: 100,
|
|
277
275
|
...(offset !== undefined ? { offset } : {}),
|
|
278
276
|
});
|
|
279
277
|
for (const point of page.points) {
|
|
280
|
-
|
|
278
|
+
const text = (point.payload?.text ?? "").toLowerCase();
|
|
279
|
+
if (text.includes(lowerTerm)) {
|
|
280
|
+
results.push({ id: point.id, ...point.payload });
|
|
281
|
+
if (results.length >= limit)
|
|
282
|
+
break;
|
|
283
|
+
}
|
|
281
284
|
}
|
|
282
285
|
offset = page.next_page_offset;
|
|
283
286
|
} while (offset != null && results.length < limit);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingestFlow.d.ts","sourceRoot":"","sources":["../../src/tools/ingestFlow.ts"],"names":[],"mappings":"AA8MA,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,GAAG,GACX,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAoJD,wBAAsB,kBAAkB,CACtC,MAAM,EAAE,MAAM,EACd,iBAAiB,EAAE,MAAM,EACzB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC;IACT,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC,
|
|
1
|
+
{"version":3,"file":"ingestFlow.d.ts","sourceRoot":"","sources":["../../src/tools/ingestFlow.ts"],"names":[],"mappings":"AA8MA,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,GAAG,GACX,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAoJD,wBAAsB,kBAAkB,CACtC,MAAM,EAAE,MAAM,EACd,iBAAiB,EAAE,MAAM,EACzB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC;IACT,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC,CAwGD"}
|
package/dist/tools/ingestFlow.js
CHANGED
|
@@ -15,6 +15,7 @@ export declare function searchKnowledge(query: string, topK?: number): Promise<{
|
|
|
15
15
|
success: boolean;
|
|
16
16
|
results: {
|
|
17
17
|
title: any;
|
|
18
|
+
fileId: any;
|
|
18
19
|
offset: any;
|
|
19
20
|
text: any;
|
|
20
21
|
}[];
|
|
@@ -34,6 +35,7 @@ export declare function searchExact(term: string, limit?: number): Promise<{
|
|
|
34
35
|
totalFound: number;
|
|
35
36
|
results: {
|
|
36
37
|
title: any;
|
|
38
|
+
fileId: any;
|
|
37
39
|
offset: any;
|
|
38
40
|
text: any;
|
|
39
41
|
}[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"knowledgeTools.d.ts","sourceRoot":"","sources":["../../src/tools/knowledgeTools.ts"],"names":[],"mappings":"AAGA,wBAAsB,aAAa,CAAC,OAAO,EAAE,MAAM;;;;;;;;GAUlD;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU
|
|
1
|
+
{"version":3,"file":"knowledgeTools.d.ts","sourceRoot":"","sources":["../../src/tools/knowledgeTools.ts"],"names":[],"mappings":"AAGA,wBAAsB,aAAa,CAAC,OAAO,EAAE,MAAM;;;;;;;;GAUlD;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU;;;;;;;;;;;;;;;;;GAuBpE;AACD,wBAAsB,WAAW,CAC/B,IAAI,EAAE,MAAM,EACZ,KAAK,GAAE,MAAW;;;;;;;;;;;;;;;;;;;;GAwBnB"}
|
|
@@ -24,6 +24,7 @@ export async function searchKnowledge(query, topK = 3) {
|
|
|
24
24
|
success: true,
|
|
25
25
|
results: results.map((r) => ({
|
|
26
26
|
title: r.title || "Unknown",
|
|
27
|
+
fileId: r.file_id || null,
|
|
27
28
|
offset: r.offset ?? 0,
|
|
28
29
|
text: r.text,
|
|
29
30
|
})),
|
|
@@ -45,6 +46,7 @@ export async function searchExact(term, limit = 50) {
|
|
|
45
46
|
totalFound: results.length,
|
|
46
47
|
results: results.map((r) => ({
|
|
47
48
|
title: r.title || "Unknown",
|
|
49
|
+
fileId: r.file_id || null,
|
|
48
50
|
offset: r.offset ?? 0,
|
|
49
51
|
text: r.text,
|
|
50
52
|
})),
|
package/package.json
CHANGED
package/src/db/vector.ts
CHANGED
|
@@ -38,6 +38,10 @@ export async function initVectorDB() {
|
|
|
38
38
|
field_name: "source",
|
|
39
39
|
field_schema: "keyword",
|
|
40
40
|
});
|
|
41
|
+
await client.createPayloadIndex(COLLECTION_NAME, {
|
|
42
|
+
field_name: "file_id",
|
|
43
|
+
field_schema: "keyword",
|
|
44
|
+
});
|
|
41
45
|
await client.createPayloadIndex(COLLECTION_NAME, {
|
|
42
46
|
field_name: "block_index",
|
|
43
47
|
field_schema: "integer",
|
|
@@ -181,6 +185,7 @@ export interface ChunkUpsert {
|
|
|
181
185
|
vector: number[];
|
|
182
186
|
text: string;
|
|
183
187
|
title: string;
|
|
188
|
+
fileId: string;
|
|
184
189
|
blockIndex: number;
|
|
185
190
|
blockHash: string;
|
|
186
191
|
source: string;
|
|
@@ -203,6 +208,7 @@ export async function upsertChunkBatch(chunks: ChunkUpsert[]): Promise<void> {
|
|
|
203
208
|
payload: {
|
|
204
209
|
text: c.text,
|
|
205
210
|
title: c.title,
|
|
211
|
+
file_id: c.fileId,
|
|
206
212
|
block_index: c.blockIndex,
|
|
207
213
|
block_hash: c.blockHash,
|
|
208
214
|
source: c.source,
|
|
@@ -308,9 +314,11 @@ export async function searchProjectMemory(
|
|
|
308
314
|
}
|
|
309
315
|
|
|
310
316
|
/**
|
|
311
|
-
* Exhaustive
|
|
312
|
-
*
|
|
313
|
-
*
|
|
317
|
+
* Exhaustive substring search: scrolls ALL points and filters client-side.
|
|
318
|
+
* More reliable than Qdrant full-text filter (whitespace tokenizer doesn't
|
|
319
|
+
* strip surrounding punctuation, causing false negatives for terms like
|
|
320
|
+
* "ServiceCode.mkp" appearing as "ServiceCode.mkp)" in headings).
|
|
321
|
+
* For typical collection sizes (~few hundred chunks) the O(N) cost is negligible.
|
|
314
322
|
*/
|
|
315
323
|
export async function exactSearchChunks(
|
|
316
324
|
term: string,
|
|
@@ -319,31 +327,25 @@ export async function exactSearchChunks(
|
|
|
319
327
|
await initVectorDB();
|
|
320
328
|
if (!client) throw new Error("Qdrant not initialized");
|
|
321
329
|
|
|
322
|
-
const
|
|
323
|
-
must: [
|
|
324
|
-
{
|
|
325
|
-
key: "text",
|
|
326
|
-
match: { text: term.toLowerCase() },
|
|
327
|
-
},
|
|
328
|
-
],
|
|
329
|
-
};
|
|
330
|
-
|
|
330
|
+
const lowerTerm = term.toLowerCase();
|
|
331
331
|
const results: any[] = [];
|
|
332
332
|
let offset: string | number | null | undefined = undefined;
|
|
333
333
|
|
|
334
|
-
// Paginate until all matching points are collected or limit is reached
|
|
335
334
|
do {
|
|
336
335
|
const page: { points: any[]; next_page_offset?: string | number | null } =
|
|
337
336
|
await (client as any).scroll(COLLECTION_NAME, {
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
});
|
|
337
|
+
with_payload: true,
|
|
338
|
+
with_vector: false,
|
|
339
|
+
limit: 100,
|
|
340
|
+
...(offset !== undefined ? { offset } : {}),
|
|
341
|
+
});
|
|
344
342
|
|
|
345
343
|
for (const point of page.points) {
|
|
346
|
-
|
|
344
|
+
const text = ((point.payload?.text as string) ?? "").toLowerCase();
|
|
345
|
+
if (text.includes(lowerTerm)) {
|
|
346
|
+
results.push({ id: point.id, ...point.payload });
|
|
347
|
+
if (results.length >= limit) break;
|
|
348
|
+
}
|
|
347
349
|
}
|
|
348
350
|
offset = page.next_page_offset;
|
|
349
351
|
} while (offset != null && results.length < limit);
|
package/src/tools/ingestFlow.ts
CHANGED
|
@@ -28,6 +28,7 @@ export async function searchKnowledge(query: string, topK: number = 3) {
|
|
|
28
28
|
success: true,
|
|
29
29
|
results: results.map((r: any) => ({
|
|
30
30
|
title: r.title || "Unknown",
|
|
31
|
+
fileId: r.file_id || null,
|
|
31
32
|
offset: r.offset ?? 0,
|
|
32
33
|
text: r.text,
|
|
33
34
|
})),
|
|
@@ -54,6 +55,7 @@ export async function searchExact(
|
|
|
54
55
|
totalFound: results.length,
|
|
55
56
|
results: results.map((r: any) => ({
|
|
56
57
|
title: r.title || "Unknown",
|
|
58
|
+
fileId: r.file_id || null,
|
|
57
59
|
offset: r.offset ?? 0,
|
|
58
60
|
text: r.text,
|
|
59
61
|
})),
|