@khoinguyen2002/doc-mcp 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ export interface ChunkUpsert {
15
15
  vector: number[];
16
16
  text: string;
17
17
  title: string;
18
+ fileId: string;
18
19
  blockIndex: number;
19
20
  blockHash: string;
20
21
  source: string;
@@ -51,9 +52,11 @@ export declare function deletePointsByIds(pointIds: string[]): Promise<void>;
51
52
  */
52
53
  export declare function searchProjectMemory(query: string, topK?: number): Promise<any[]>;
53
54
  /**
54
- * Exhaustive full-text search using Qdrant's inverted index on the `text` field.
55
- * Uses whitespace tokenizer API paths like /v1/foo/bar match as single tokens.
56
- * Paginates through all results server-side (no full collection scan in JS).
55
+ * Exhaustive substring search: scrolls ALL points and filters client-side.
56
+ * More reliable than Qdrant full-text filter (whitespace tokenizer doesn't
57
+ * strip surrounding punctuation, causing false negatives for terms like
58
+ * "ServiceCode.mkp" appearing as "ServiceCode.mkp)" in headings).
59
+ * For typical collection sizes (~few hundred chunks) the O(N) cost is negligible.
57
60
  */
58
61
  export declare function exactSearchChunks(term: string, limit?: number): Promise<any[]>;
59
62
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/db/vector.ts"],"names":[],"mappings":"AAUA;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM,CAE1E;AAED,wBAAsB,YAAY,kBAgDjC;AAED,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,EAAE,CAAC,CA+CnB;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAiDrB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAqB3E;AAED;;;;GAIG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAmB3D;AAED;;;;GAIG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,EAAE;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,OAAO,CAAC,IAAI,CAAC,CAef;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CASzE;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,MAAU,GACf,OAAO,CAAC,GAAG,EAAE,CAAC,CAoBhB;AAED;;;;GAIG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,KAAK,GAAE,MAAW,GACjB,OAAO,CAAC,GAAG,EAAE,CAAC,CAkChB;AAED;;GAEG;AACH,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAuBjE"}
1
+ {"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/db/vector.ts"],"names":[],"mappings":"AAUA;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM,CAE1E;AAED,wBAAsB,YAAY,kBAoDjC;AAED,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,EAAE,CAAC,CA+CnB;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAiDrB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAsB3E;AAED;;;;GAIG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAAE,GACjB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAmB3D;AAED;;;;GAIG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,EAAE;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,OAAO,CAAC,IAAI,CAAC,CAef;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CASzE;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,MAAU,GACf,OAAO,CAAC,GAAG,EAAE,CAAC,CAoBhB;AAED;;;;;;GAMG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,KAAK,GAAE,MAAW,GACjB,OAAO,CAAC,GAAG,EAAE,CAAC,CA4BhB;AAED;;GAEG;AACH,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAuBjE"}
package/dist/db/vector.js CHANGED
@@ -32,6 +32,10 @@ export async function initVectorDB() {
32
32
  field_name: "source",
33
33
  field_schema: "keyword",
34
34
  });
35
+ await client.createPayloadIndex(COLLECTION_NAME, {
36
+ field_name: "file_id",
37
+ field_schema: "keyword",
38
+ });
35
39
  await client.createPayloadIndex(COLLECTION_NAME, {
36
40
  field_name: "block_index",
37
41
  field_schema: "integer",
@@ -160,6 +164,7 @@ export async function upsertChunkBatch(chunks) {
160
164
  payload: {
161
165
  text: c.text,
162
166
  title: c.title,
167
+ file_id: c.fileId,
163
168
  block_index: c.blockIndex,
164
169
  block_hash: c.blockHash,
165
170
  source: c.source,
@@ -249,35 +254,33 @@ export async function searchProjectMemory(query, topK = 3) {
249
254
  }
250
255
  }
251
256
  /**
252
- * Exhaustive full-text search using Qdrant's inverted index on the `text` field.
253
- * Uses whitespace tokenizer API paths like /v1/foo/bar match as single tokens.
254
- * Paginates through all results server-side (no full collection scan in JS).
257
+ * Exhaustive substring search: scrolls ALL points and filters client-side.
258
+ * More reliable than Qdrant full-text filter (whitespace tokenizer doesn't
259
+ * strip surrounding punctuation, causing false negatives for terms like
260
+ * "ServiceCode.mkp" appearing as "ServiceCode.mkp)" in headings).
261
+ * For typical collection sizes (~few hundred chunks) the O(N) cost is negligible.
255
262
  */
256
263
  export async function exactSearchChunks(term, limit = 50) {
257
264
  await initVectorDB();
258
265
  if (!client)
259
266
  throw new Error("Qdrant not initialized");
260
- const filter = {
261
- must: [
262
- {
263
- key: "text",
264
- match: { text: term.toLowerCase() },
265
- },
266
- ],
267
- };
267
+ const lowerTerm = term.toLowerCase();
268
268
  const results = [];
269
269
  let offset = undefined;
270
- // Paginate until all matching points are collected or limit is reached
271
270
  do {
272
271
  const page = await client.scroll(COLLECTION_NAME, {
273
- filter,
274
272
  with_payload: true,
275
273
  with_vector: false,
276
- limit: Math.min(100, limit - results.length),
274
+ limit: 100,
277
275
  ...(offset !== undefined ? { offset } : {}),
278
276
  });
279
277
  for (const point of page.points) {
280
- results.push({ id: point.id, ...point.payload });
278
+ const text = (point.payload?.text ?? "").toLowerCase();
279
+ if (text.includes(lowerTerm)) {
280
+ results.push({ id: point.id, ...point.payload });
281
+ if (results.length >= limit)
282
+ break;
283
+ }
281
284
  }
282
285
  offset = page.next_page_offset;
283
286
  } while (offset != null && results.length < limit);
@@ -1 +1 @@
1
- {"version":3,"file":"ingestFlow.d.ts","sourceRoot":"","sources":["../../src/tools/ingestFlow.ts"],"names":[],"mappings":"AA8MA,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,GAAG,GACX,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAoJD,wBAAsB,kBAAkB,CACtC,MAAM,EAAE,MAAM,EACd,iBAAiB,EAAE,MAAM,EACzB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC;IACT,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC,CAuGD"}
1
+ {"version":3,"file":"ingestFlow.d.ts","sourceRoot":"","sources":["../../src/tools/ingestFlow.ts"],"names":[],"mappings":"AA8MA,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,GAAG,GACX,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAoJD,wBAAsB,kBAAkB,CACtC,MAAM,EAAE,MAAM,EACd,iBAAiB,EAAE,MAAM,EACzB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC;IACT,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC,CAwGD"}
@@ -388,6 +388,7 @@ export async function syncSingleDocument(fileId, driveModifiedTime, title) {
388
388
  vector: vectors[vi],
389
389
  text: bl.text,
390
390
  title,
391
+ fileId,
391
392
  blockIndex: bl.index,
392
393
  blockHash: bl.hash,
393
394
  source: "google_drive",
@@ -15,6 +15,7 @@ export declare function searchKnowledge(query: string, topK?: number): Promise<{
15
15
  success: boolean;
16
16
  results: {
17
17
  title: any;
18
+ fileId: any;
18
19
  offset: any;
19
20
  text: any;
20
21
  }[];
@@ -34,6 +35,7 @@ export declare function searchExact(term: string, limit?: number): Promise<{
34
35
  totalFound: number;
35
36
  results: {
36
37
  title: any;
38
+ fileId: any;
37
39
  offset: any;
38
40
  text: any;
39
41
  }[];
@@ -1 +1 @@
1
- {"version":3,"file":"knowledgeTools.d.ts","sourceRoot":"","sources":["../../src/tools/knowledgeTools.ts"],"names":[],"mappings":"AAGA,wBAAsB,aAAa,CAAC,OAAO,EAAE,MAAM;;;;;;;;GAUlD;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU;;;;;;;;;;;;;;;;GAsBpE;AACD,wBAAsB,WAAW,CAC/B,IAAI,EAAE,MAAM,EACZ,KAAK,GAAE,MAAW;;;;;;;;;;;;;;;;;;;GAuBnB"}
1
+ {"version":3,"file":"knowledgeTools.d.ts","sourceRoot":"","sources":["../../src/tools/knowledgeTools.ts"],"names":[],"mappings":"AAGA,wBAAsB,aAAa,CAAC,OAAO,EAAE,MAAM;;;;;;;;GAUlD;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU;;;;;;;;;;;;;;;;;GAuBpE;AACD,wBAAsB,WAAW,CAC/B,IAAI,EAAE,MAAM,EACZ,KAAK,GAAE,MAAW;;;;;;;;;;;;;;;;;;;;GAwBnB"}
@@ -24,6 +24,7 @@ export async function searchKnowledge(query, topK = 3) {
24
24
  success: true,
25
25
  results: results.map((r) => ({
26
26
  title: r.title || "Unknown",
27
+ fileId: r.file_id || null,
27
28
  offset: r.offset ?? 0,
28
29
  text: r.text,
29
30
  })),
@@ -45,6 +46,7 @@ export async function searchExact(term, limit = 50) {
45
46
  totalFound: results.length,
46
47
  results: results.map((r) => ({
47
48
  title: r.title || "Unknown",
49
+ fileId: r.file_id || null,
48
50
  offset: r.offset ?? 0,
49
51
  text: r.text,
50
52
  })),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khoinguyen2002/doc-mcp",
3
- "version": "1.0.5",
3
+ "version": "1.0.6",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
package/src/db/vector.ts CHANGED
@@ -38,6 +38,10 @@ export async function initVectorDB() {
38
38
  field_name: "source",
39
39
  field_schema: "keyword",
40
40
  });
41
+ await client.createPayloadIndex(COLLECTION_NAME, {
42
+ field_name: "file_id",
43
+ field_schema: "keyword",
44
+ });
41
45
  await client.createPayloadIndex(COLLECTION_NAME, {
42
46
  field_name: "block_index",
43
47
  field_schema: "integer",
@@ -181,6 +185,7 @@ export interface ChunkUpsert {
181
185
  vector: number[];
182
186
  text: string;
183
187
  title: string;
188
+ fileId: string;
184
189
  blockIndex: number;
185
190
  blockHash: string;
186
191
  source: string;
@@ -203,6 +208,7 @@ export async function upsertChunkBatch(chunks: ChunkUpsert[]): Promise<void> {
203
208
  payload: {
204
209
  text: c.text,
205
210
  title: c.title,
211
+ file_id: c.fileId,
206
212
  block_index: c.blockIndex,
207
213
  block_hash: c.blockHash,
208
214
  source: c.source,
@@ -308,9 +314,11 @@ export async function searchProjectMemory(
308
314
  }
309
315
 
310
316
  /**
311
- * Exhaustive full-text search using Qdrant's inverted index on the `text` field.
312
- * Uses whitespace tokenizer API paths like /v1/foo/bar match as single tokens.
313
- * Paginates through all results server-side (no full collection scan in JS).
317
+ * Exhaustive substring search: scrolls ALL points and filters client-side.
318
+ * More reliable than Qdrant full-text filter (whitespace tokenizer doesn't
319
+ * strip surrounding punctuation, causing false negatives for terms like
320
+ * "ServiceCode.mkp" appearing as "ServiceCode.mkp)" in headings).
321
+ * For typical collection sizes (~few hundred chunks) the O(N) cost is negligible.
314
322
  */
315
323
  export async function exactSearchChunks(
316
324
  term: string,
@@ -319,31 +327,25 @@ export async function exactSearchChunks(
319
327
  await initVectorDB();
320
328
  if (!client) throw new Error("Qdrant not initialized");
321
329
 
322
- const filter = {
323
- must: [
324
- {
325
- key: "text",
326
- match: { text: term.toLowerCase() },
327
- },
328
- ],
329
- };
330
-
330
+ const lowerTerm = term.toLowerCase();
331
331
  const results: any[] = [];
332
332
  let offset: string | number | null | undefined = undefined;
333
333
 
334
- // Paginate until all matching points are collected or limit is reached
335
334
  do {
336
335
  const page: { points: any[]; next_page_offset?: string | number | null } =
337
336
  await (client as any).scroll(COLLECTION_NAME, {
338
- filter,
339
- with_payload: true,
340
- with_vector: false,
341
- limit: Math.min(100, limit - results.length),
342
- ...(offset !== undefined ? { offset } : {}),
343
- });
337
+ with_payload: true,
338
+ with_vector: false,
339
+ limit: 100,
340
+ ...(offset !== undefined ? { offset } : {}),
341
+ });
344
342
 
345
343
  for (const point of page.points) {
346
- results.push({ id: point.id, ...point.payload });
344
+ const text = ((point.payload?.text as string) ?? "").toLowerCase();
345
+ if (text.includes(lowerTerm)) {
346
+ results.push({ id: point.id, ...point.payload });
347
+ if (results.length >= limit) break;
348
+ }
347
349
  }
348
350
  offset = page.next_page_offset;
349
351
  } while (offset != null && results.length < limit);
@@ -483,6 +483,7 @@ export async function syncSingleDocument(
483
483
  vector: vectors[vi],
484
484
  text: bl.text,
485
485
  title,
486
+ fileId,
486
487
  blockIndex: bl.index,
487
488
  blockHash: bl.hash,
488
489
  source: "google_drive",
@@ -28,6 +28,7 @@ export async function searchKnowledge(query: string, topK: number = 3) {
28
28
  success: true,
29
29
  results: results.map((r: any) => ({
30
30
  title: r.title || "Unknown",
31
+ fileId: r.file_id || null,
31
32
  offset: r.offset ?? 0,
32
33
  text: r.text,
33
34
  })),
@@ -54,6 +55,7 @@ export async function searchExact(
54
55
  totalFound: results.length,
55
56
  results: results.map((r: any) => ({
56
57
  title: r.title || "Unknown",
58
+ fileId: r.file_id || null,
57
59
  offset: r.offset ?? 0,
58
60
  text: r.text,
59
61
  })),