@hsingjui/contextweaver 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  getIndexer,
3
3
  getVectorStore
4
- } from "./chunk-WWYSLCNZ.js";
4
+ } from "./chunk-TJHS7BN7.js";
5
5
  import {
6
6
  initDb,
7
7
  isChunksFtsInitialized,
@@ -9,11 +9,11 @@ import {
9
9
  searchChunksFts,
10
10
  searchFilesFts,
11
11
  segmentQuery
12
- } from "./chunk-VW5RACJC.js";
12
+ } from "./chunk-OXPWRE3G.js";
13
13
  import {
14
14
  isDebugEnabled,
15
15
  logger
16
- } from "./chunk-C7XDGBT5.js";
16
+ } from "./chunk-JVKVSTQ3.js";
17
17
  import {
18
18
  getEmbeddingConfig,
19
19
  getRerankerConfig
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  generateProjectId
3
- } from "./chunk-VW5RACJC.js";
3
+ } from "./chunk-OXPWRE3G.js";
4
4
  import {
5
5
  logger
6
- } from "./chunk-C7XDGBT5.js";
6
+ } from "./chunk-JVKVSTQ3.js";
7
7
 
8
8
  // src/mcp/tools/codebaseRetrieval.ts
9
9
  import fs from "fs";
@@ -67,9 +67,9 @@ function isProjectIndexed(projectId) {
67
67
  const dbPath = path.join(BASE_DIR, projectId, "index.db");
68
68
  return fs.existsSync(dbPath);
69
69
  }
70
- async function ensureIndexed(repoPath, projectId) {
71
- const { withLock } = await import("./lock-PX2BX2YN.js");
72
- const { scan } = await import("./scanner-HYP3L57R.js");
70
+ async function ensureIndexed(repoPath, projectId, onProgress) {
71
+ const { withLock } = await import("./lock-RC33CJZA.js");
72
+ const { scan } = await import("./scanner-NM7WPJJE.js");
73
73
  await withLock(projectId, "index", async () => {
74
74
  const wasIndexed = isProjectIndexed(projectId);
75
75
  if (!wasIndexed) {
@@ -77,11 +77,12 @@ async function ensureIndexed(repoPath, projectId) {
77
77
  { repoPath, projectId: projectId.slice(0, 10) },
78
78
  "\u4EE3\u7801\u5E93\u672A\u521D\u59CB\u5316\uFF0C\u5F00\u59CB\u9996\u6B21\u7D22\u5F15..."
79
79
  );
80
+ onProgress?.(0, 100, "\u4EE3\u7801\u5E93\u672A\u7D22\u5F15\uFF0C\u5F00\u59CB\u9996\u6B21\u7D22\u5F15...");
80
81
  } else {
81
82
  logger.debug({ projectId: projectId.slice(0, 10) }, "\u6267\u884C\u589E\u91CF\u7D22\u5F15...");
82
83
  }
83
84
  const startTime = Date.now();
84
- const stats = await scan(repoPath, { vectorIndex: true });
85
+ const stats = await scan(repoPath, { vectorIndex: true, onProgress });
85
86
  const elapsed = Date.now() - startTime;
86
87
  logger.info(
87
88
  {
@@ -98,7 +99,7 @@ async function ensureIndexed(repoPath, projectId) {
98
99
  );
99
100
  });
100
101
  }
101
- async function handleCodebaseRetrieval(args, configOverride = ZEN_CONFIG_OVERRIDE) {
102
+ async function handleCodebaseRetrieval(args, configOverride = ZEN_CONFIG_OVERRIDE, onProgress) {
102
103
  const { repo_path, information_request, technical_terms } = args;
103
104
  logger.info(
104
105
  {
@@ -118,7 +119,7 @@ async function handleCodebaseRetrieval(args, configOverride = ZEN_CONFIG_OVERRID
118
119
  return formatEnvMissingResponse(allMissingVars);
119
120
  }
120
121
  const projectId = generateProjectId(repo_path);
121
- await ensureIndexed(repo_path, projectId);
122
+ await ensureIndexed(repo_path, projectId, onProgress);
122
123
  const query = [information_request, ...technical_terms || []].filter(Boolean).join(" ");
123
124
  logger.info(
124
125
  {
@@ -128,7 +129,7 @@ async function handleCodebaseRetrieval(args, configOverride = ZEN_CONFIG_OVERRID
128
129
  },
129
130
  "MCP \u67E5\u8BE2\u6784\u5EFA"
130
131
  );
131
- const { SearchService } = await import("./SearchService-DYGJT2DZ.js");
132
+ const { SearchService } = await import("./SearchService-ZI7QP3NE.js");
132
133
  const service = new SearchService(projectId, repo_path, configOverride);
133
134
  await service.init();
134
135
  logger.debug("SearchService \u521D\u59CB\u5316\u5B8C\u6210");
@@ -176,16 +177,16 @@ async function handleCodebaseRetrieval(args, configOverride = ZEN_CONFIG_OVERRID
176
177
  },
177
178
  "MCP codebase-retrieval \u5B8C\u6210"
178
179
  );
179
- return formatMcpResponse(contextPack, information_request);
180
+ return formatMcpResponse(contextPack);
180
181
  }
181
- function formatMcpResponse(pack, originalRequest) {
182
+ function formatMcpResponse(pack) {
182
183
  const { files, seeds } = pack;
183
184
  const fileBlocks = files.map((file) => {
184
185
  const segments = file.segments.map((seg) => formatSegment(seg)).join("\n\n");
185
186
  return segments;
186
187
  }).join("\n\n---\n\n");
187
188
  const summary = [
188
- `Found ${seeds.length} relevant code blocks for: "${originalRequest}"`,
189
+ `Found ${seeds.length} relevant code blocks`,
189
190
  `Files: ${files.length}`,
190
191
  `Total segments: ${files.reduce((acc, f) => acc + f.segments.length, 0)}`
191
192
  ].join(" | ");
@@ -2,7 +2,7 @@ import {
2
2
  closeAllIndexers,
3
3
  closeAllVectorStores,
4
4
  getIndexer
5
- } from "./chunk-WWYSLCNZ.js";
5
+ } from "./chunk-TJHS7BN7.js";
6
6
  import {
7
7
  batchDelete,
8
8
  batchUpdateMtime,
@@ -16,10 +16,10 @@ import {
16
16
  getStoredEmbeddingDimensions,
17
17
  initDb,
18
18
  setStoredEmbeddingDimensions
19
- } from "./chunk-VW5RACJC.js";
19
+ } from "./chunk-OXPWRE3G.js";
20
20
  import {
21
21
  logger
22
- } from "./chunk-C7XDGBT5.js";
22
+ } from "./chunk-JVKVSTQ3.js";
23
23
  import {
24
24
  getEmbeddingConfig,
25
25
  getExcludePatterns
@@ -165,7 +165,7 @@ import path2 from "path";
165
165
  import pLimit from "p-limit";
166
166
 
167
167
  // src/chunking/ParserPool.ts
168
- import Parser from "tree-sitter";
168
+ import Parser from "@keqingmoe/tree-sitter";
169
169
  var GRAMMAR_MODULES = {
170
170
  typescript: "tree-sitter-typescript",
171
171
  javascript: "tree-sitter-javascript",
@@ -227,20 +227,28 @@ function isLanguageSupported(language) {
227
227
  var LANGUAGE_SPECS = {
228
228
  typescript: {
229
229
  hierarchy: /* @__PURE__ */ new Set([
230
- // 类和接口
231
230
  "class_declaration",
232
231
  "abstract_class_declaration",
233
232
  "interface_declaration",
234
- // 函数
235
233
  "function_declaration",
236
234
  "generator_function_declaration",
237
235
  "method_definition",
238
236
  "arrow_function",
239
- // 模块
240
237
  "export_statement",
241
238
  "import_statement"
242
239
  ]),
243
- nameFields: ["name", "id"]
240
+ nameFields: ["name", "id"],
241
+ nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "type_identifier", "property_identifier"]),
242
+ prefixMap: {
243
+ class_declaration: "class ",
244
+ abstract_class_declaration: "abstract class ",
245
+ interface_declaration: "interface ",
246
+ function_declaration: "fn ",
247
+ generator_function_declaration: "fn* ",
248
+ method_definition: "",
249
+ arrow_function: ""
250
+ },
251
+ commentTypes: /* @__PURE__ */ new Set(["comment"])
244
252
  },
245
253
  javascript: {
246
254
  hierarchy: /* @__PURE__ */ new Set([
@@ -250,57 +258,94 @@ var LANGUAGE_SPECS = {
250
258
  "method_definition",
251
259
  "arrow_function"
252
260
  ]),
253
- nameFields: ["name", "id"]
261
+ nameFields: ["name", "id"],
262
+ nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "property_identifier"]),
263
+ prefixMap: {
264
+ class_declaration: "class ",
265
+ function_declaration: "fn ",
266
+ generator_function_declaration: "fn* ",
267
+ method_definition: "",
268
+ arrow_function: ""
269
+ },
270
+ commentTypes: /* @__PURE__ */ new Set(["comment"])
254
271
  },
255
272
  python: {
256
273
  hierarchy: /* @__PURE__ */ new Set(["class_definition", "function_definition", "decorated_definition"]),
257
- nameFields: ["name"]
274
+ nameFields: ["name"],
275
+ nameNodeTypes: /* @__PURE__ */ new Set(["identifier"]),
276
+ prefixMap: {
277
+ class_definition: "class ",
278
+ function_definition: "def ",
279
+ decorated_definition: ""
280
+ },
281
+ commentTypes: /* @__PURE__ */ new Set(["comment"])
258
282
  },
259
283
  go: {
260
284
  hierarchy: /* @__PURE__ */ new Set([
261
- // 函数和方法
262
285
  "function_declaration",
263
286
  "method_declaration",
264
- // 类型定义
265
287
  "type_spec",
266
288
  "type_declaration",
267
- // 结构体和接口
268
289
  "struct_type",
269
290
  "interface_type"
270
291
  ]),
271
- nameFields: ["name"]
292
+ nameFields: ["name"],
293
+ nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "type_identifier", "field_identifier"]),
294
+ prefixMap: {
295
+ function_declaration: "func ",
296
+ method_declaration: "func ",
297
+ type_spec: "type ",
298
+ type_declaration: "type ",
299
+ struct_type: "struct ",
300
+ interface_type: "interface "
301
+ },
302
+ commentTypes: /* @__PURE__ */ new Set(["comment"])
272
303
  },
273
304
  rust: {
274
305
  hierarchy: /* @__PURE__ */ new Set([
275
- // 函数
276
306
  "function_item",
277
- // 结构体、枚举、trait
278
307
  "struct_item",
279
308
  "enum_item",
280
309
  "trait_item",
281
- // impl 块
282
310
  "impl_item",
283
- // 模块
284
311
  "mod_item",
285
- // 类型别名
286
312
  "type_item"
287
313
  ]),
288
- nameFields: ["name"]
314
+ nameFields: ["name"],
315
+ nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "type_identifier"]),
316
+ prefixMap: {
317
+ function_item: "fn ",
318
+ struct_item: "struct ",
319
+ enum_item: "enum ",
320
+ trait_item: "trait ",
321
+ impl_item: "impl ",
322
+ mod_item: "mod ",
323
+ type_item: "type "
324
+ },
325
+ commentTypes: /* @__PURE__ */ new Set(["line_comment", "block_comment"])
289
326
  },
290
327
  java: {
291
328
  hierarchy: /* @__PURE__ */ new Set([
292
- // 类和接口
293
329
  "class_declaration",
294
330
  "interface_declaration",
295
331
  "enum_declaration",
296
332
  "annotation_type_declaration",
297
- // 方法和构造函数
298
333
  "method_declaration",
299
334
  "constructor_declaration",
300
- // 记录类型 (Java 14+)
301
335
  "record_declaration"
302
336
  ]),
303
- nameFields: ["name", "identifier"]
337
+ nameFields: ["name", "identifier"],
338
+ nameNodeTypes: /* @__PURE__ */ new Set(["identifier"]),
339
+ prefixMap: {
340
+ class_declaration: "class ",
341
+ interface_declaration: "interface ",
342
+ enum_declaration: "enum ",
343
+ annotation_type_declaration: "@interface ",
344
+ method_declaration: "",
345
+ constructor_declaration: "",
346
+ record_declaration: "record "
347
+ },
348
+ commentTypes: /* @__PURE__ */ new Set(["line_comment", "block_comment"])
304
349
  }
305
350
  };
306
351
  function getLanguageSpec(language) {
@@ -466,11 +511,11 @@ var SemanticSplitter = class {
466
511
  code;
467
512
  language;
468
513
  constructor(config = {}) {
469
- const maxChunkSize = config.maxChunkSize ?? 1e3;
514
+ const maxChunkSize = config.maxChunkSize ?? 2500;
470
515
  this.config = {
471
516
  maxChunkSize,
472
- minChunkSize: config.minChunkSize ?? 50,
473
- chunkOverlap: config.chunkOverlap ?? 100,
517
+ minChunkSize: config.minChunkSize ?? 100,
518
+ chunkOverlap: config.chunkOverlap ?? 200,
474
519
  // 物理字符硬上限:默认为 maxChunkSize * 4(假设 1 token ≈ 4 chars)
475
520
  maxRawChars: config.maxRawChars ?? maxChunkSize * 4
476
521
  };
@@ -619,22 +664,10 @@ ${displayCode}`,
619
664
  let nextContext = context;
620
665
  const spec = getLanguageSpec(this.language);
621
666
  if (spec?.hierarchy.has(node.type)) {
622
- let name = null;
623
- for (const child of node.namedChildren) {
624
- if (child.type === "identifier" || child.type === "type_identifier" || child.type === "name") {
625
- name = child.text;
626
- break;
627
- }
628
- }
629
- if (!name && node.firstNamedChild) {
630
- const firstChild = node.firstNamedChild;
631
- if (firstChild.text.length <= 100 && !firstChild.text.includes("\n")) {
632
- name = firstChild.text;
633
- }
634
- }
667
+ const name = this.extractNodeName(node, spec);
635
668
  if (name) {
636
- const typePrefix = this.getTypePrefix(node.type);
637
- nextContext = [...context, `${typePrefix}${name}`];
669
+ const prefix = spec.prefixMap[node.type] ?? "";
670
+ nextContext = [...context, `${prefix}${name}`];
638
671
  }
639
672
  }
640
673
  if (nodeSize <= this.config.maxChunkSize) {
@@ -651,21 +684,33 @@ ${displayCode}`,
651
684
  return this.mergeAdjacentWindows(childWindows);
652
685
  }
653
686
  /**
654
- * 获取节点类型的简短前缀
687
+ * 从节点中提取名称(数据驱动)
655
688
  */
656
- getTypePrefix(nodeType) {
657
- if (nodeType.includes("class")) return "class ";
658
- if (nodeType.includes("interface")) return "interface ";
659
- if (nodeType.includes("method")) return "method ";
660
- if (nodeType.includes("function")) return "function ";
661
- return "";
689
+ extractNodeName(node, spec) {
690
+ for (const child of node.namedChildren) {
691
+ if (spec.nameNodeTypes.has(child.type)) {
692
+ return child.text;
693
+ }
694
+ }
695
+ if (node.firstNamedChild) {
696
+ const firstChild = node.firstNamedChild;
697
+ if (firstChild.text.length <= 100 && !firstChild.text.includes("\n")) {
698
+ return firstChild.text;
699
+ }
700
+ }
701
+ return null;
662
702
  }
663
703
  /**
664
704
  * Gap-Aware 相邻窗口合并
665
705
  *
666
- * 使用 NWS + Raw 双预算策略:
706
+ * 使用三重预算策略:
667
707
  * - NWS 预算:控制有效代码量
668
708
  * - Raw 预算:控制物理字符数,防止大量注释撑爆 Token
709
+ * - 语义边界惩罚:不同 contextPath 的窗口合并门槛更高
710
+ *
711
+ * 前向吸附策略:
712
+ * - 如果当前窗口以 comment 结尾,将 comment 推到下一个窗口
713
+ * - 保证 JSDoc/注释与其描述的代码在同一个 chunk
669
714
  */
670
715
  mergeAdjacentWindows(windows) {
671
716
  if (windows.length === 0) return [];
@@ -673,6 +718,11 @@ ${displayCode}`,
673
718
  let current = windows[0];
674
719
  for (let i = 1; i < windows.length; i++) {
675
720
  const next = windows[i];
721
+ this.forwardAbsorbComments(current, next);
722
+ if (current.nodes.length === 0) {
723
+ current = next;
724
+ continue;
725
+ }
676
726
  const currentStart = current.nodes[0].startIndex;
677
727
  const currentEnd = current.nodes[current.nodes.length - 1].endIndex;
678
728
  const nextStart = next.nodes[0].startIndex;
@@ -680,13 +730,18 @@ ${displayCode}`,
680
730
  const gapNws = this.adapter.nws(currentEnd, nextStart);
681
731
  const combinedNws = current.size + gapNws + next.size;
682
732
  const combinedRawLen = nextEnd - currentStart;
733
+ const sameContext = this.isSameContext(current.contextPath, next.contextPath);
734
+ const boundaryPenalty = sameContext ? 1 : 0.7;
683
735
  const isTiny = current.size < this.config.minChunkSize;
684
- const fitsNwsBudget = combinedNws <= this.config.maxChunkSize || isTiny && combinedNws < this.config.maxChunkSize * 1.5;
685
- const fitsRawBudget = combinedRawLen <= this.config.maxRawChars;
736
+ const effectiveBudget = this.config.maxChunkSize * boundaryPenalty;
737
+ const fitsNwsBudget = combinedNws <= effectiveBudget || isTiny && combinedNws < effectiveBudget * 1.5;
738
+ const fitsRawBudget = combinedRawLen <= this.config.maxRawChars * boundaryPenalty;
686
739
  if (fitsNwsBudget && fitsRawBudget) {
687
740
  current.nodes.push(...next.nodes);
688
741
  current.size = combinedNws;
689
- current.contextPath = this.commonPrefix(current.contextPath, next.contextPath);
742
+ if (next.contextPath.length > current.contextPath.length) {
743
+ current.contextPath = next.contextPath;
744
+ }
690
745
  } else {
691
746
  merged.push(current);
692
747
  current = next;
@@ -695,6 +750,60 @@ ${displayCode}`,
695
750
  merged.push(current);
696
751
  return merged;
697
752
  }
753
+ /**
754
+ * 前向吸附:将 current 尾部的 comment 节点推到 next 头部
755
+ *
756
+ * 这确保 JSDoc/docstring/注释与其描述的函数/方法在同一个 chunk 中,
757
+ * 而不是被切到前一个 chunk 的末尾。
758
+ *
759
+ * 注意:此方法会直接修改 current 和 next
760
+ */
761
+ forwardAbsorbComments(current, next) {
762
+ const spec = getLanguageSpec(this.language);
763
+ const commentTypes = spec?.commentTypes ?? /* @__PURE__ */ new Set(["comment"]);
764
+ const absorbedNodes = [];
765
+ let absorbedNws = 0;
766
+ while (current.nodes.length > 0) {
767
+ const lastNode = current.nodes[current.nodes.length - 1];
768
+ if (commentTypes.has(lastNode.type)) {
769
+ current.nodes.pop();
770
+ const nodeNws = this.adapter.nws(lastNode.startIndex, lastNode.endIndex);
771
+ absorbedNodes.unshift(lastNode);
772
+ absorbedNws += nodeNws;
773
+ current.size -= nodeNws;
774
+ } else {
775
+ break;
776
+ }
777
+ }
778
+ if (absorbedNodes.length > 0) {
779
+ const gapNws = next.nodes.length > 0 ? this.adapter.nws(
780
+ absorbedNodes[absorbedNodes.length - 1].endIndex,
781
+ next.nodes[0].startIndex
782
+ ) : 0;
783
+ next.nodes.unshift(...absorbedNodes);
784
+ next.size += absorbedNws + gapNws;
785
+ }
786
+ }
787
+ /**
788
+ * 检查两个 contextPath 是否属于同一语义单元
789
+ *
790
+ * 规则:如果两者的公共前缀长度 >= 较短路径长度,认为是同一单元
791
+ * 例如:
792
+ * - ["file", "class A", "method foo"] 和 ["file", "class A", "method bar"] -> false(不同方法)
793
+ * - ["file", "class A"] 和 ["file", "class A", "method foo"] -> true(父子关系)
794
+ */
795
+ isSameContext(a, b) {
796
+ const minLen = Math.min(a.length, b.length);
797
+ let commonLen = 0;
798
+ for (let i = 0; i < minLen; i++) {
799
+ if (a[i] === b[i]) {
800
+ commonLen++;
801
+ } else {
802
+ break;
803
+ }
804
+ }
805
+ return commonLen >= minLen;
806
+ }
698
807
  /**
699
808
  * 将窗口转换为最终的 ProcessedChunk
700
809
  *
@@ -771,25 +880,6 @@ ${displayCode}`,
771
880
  }
772
881
  return Math.max(0, result);
773
882
  }
774
- /**
775
- * 计算两个路径数组的最长公共前缀(LCA)
776
- *
777
- * 用于合并窗口时更新 contextPath,避免 breadcrumb 误标
778
- * 例如:["file", "class A", "method foo"] 和 ["file", "class A", "method bar"]
779
- * => ["file", "class A"]
780
- */
781
- commonPrefix(a, b) {
782
- const result = [];
783
- const len = Math.min(a.length, b.length);
784
- for (let i = 0; i < len; i++) {
785
- if (a[i] === b[i]) {
786
- result.push(a[i]);
787
- } else {
788
- break;
789
- }
790
- }
791
- return result;
792
- }
793
883
  };
794
884
  function generateVectorText(code, contextPath) {
795
885
  const breadcrumb = contextPath.join(" > ");
@@ -1052,7 +1142,6 @@ async function scan(rootPath, options = {}) {
1052
1142
  setStoredEmbeddingDimensions(db, currentDimensions);
1053
1143
  }
1054
1144
  if (forceReindex) {
1055
- logger.info("\u5F3A\u5236\u91CD\u65B0\u7D22\u5F15...");
1056
1145
  clear(db);
1057
1146
  if (options.vectorIndex !== false) {
1058
1147
  const embeddingConfig = getEmbeddingConfig();
@@ -1065,15 +1154,12 @@ async function scan(rootPath, options = {}) {
1065
1154
  const scannedPaths = new Set(
1066
1155
  filePaths.map((p) => path3.relative(rootPath, p).replace(/\\/g, "/"))
1067
1156
  );
1068
- let processedCount = 0;
1069
1157
  const results = [];
1070
1158
  const batchSize = 100;
1071
1159
  for (let i = 0; i < filePaths.length; i += batchSize) {
1072
1160
  const batch = filePaths.slice(i, i + batchSize);
1073
1161
  const batchResults = await processFiles(rootPath, batch, knownFiles);
1074
1162
  results.push(...batchResults);
1075
- processedCount += batch.length;
1076
- options.onProgress?.(processedCount, filePaths.length);
1077
1163
  }
1078
1164
  const toAdd = [];
1079
1165
  const toUpdateMtime = [];
@@ -1124,15 +1210,19 @@ async function scan(rootPath, options = {}) {
1124
1210
  errors: results.filter((r) => r.status === "error").length
1125
1211
  };
1126
1212
  if (options.vectorIndex !== false) {
1213
+ options.onProgress?.(45, 100, "\u6B63\u5728\u51C6\u5907\u5411\u91CF\u7D22\u5F15...");
1127
1214
  const embeddingConfig = getEmbeddingConfig();
1128
1215
  const indexer = await getIndexer(projectId, embeddingConfig.dimensions);
1129
1216
  const needsVectorIndex = results.filter(
1130
1217
  (r) => r.status === "added" || r.status === "modified"
1131
1218
  );
1132
1219
  const healingPathSet = new Set(getFilesNeedingVectorIndex(db));
1133
- const healingFiles = results.filter((r) => r.status === "unchanged" && healingPathSet.has(r.relPath)).map((r) => ({ ...r, status: "modified" }));
1134
- if (healingFiles.length > 0) {
1135
- logger.info({ count: healingFiles.length }, "\u81EA\u6108\uFF1A\u53D1\u73B0\u9700\u8981\u8865\u7D22\u5F15\u7684\u6587\u4EF6");
1220
+ const healingFilePaths = results.filter((r) => r.status === "unchanged" && healingPathSet.has(r.relPath)).map((r) => r.absPath);
1221
+ let healingFiles = [];
1222
+ if (healingFilePaths.length > 0) {
1223
+ logger.info({ count: healingFilePaths.length }, "\u81EA\u6108\uFF1A\u53D1\u73B0\u9700\u8981\u8865\u7D22\u5F15\u7684\u6587\u4EF6");
1224
+ const processedHealingFiles = await processFiles(rootPath, healingFilePaths, /* @__PURE__ */ new Map());
1225
+ healingFiles = processedHealingFiles.filter((r) => r.status === "added" || r.status === "modified").map((r) => ({ ...r, status: "modified" }));
1136
1226
  }
1137
1227
  const deletedResults = deletedPaths.map((path4) => ({
1138
1228
  absPath: "",
@@ -1147,7 +1237,11 @@ async function scan(rootPath, options = {}) {
1147
1237
  }));
1148
1238
  const allToIndex = [...needsVectorIndex, ...healingFiles, ...deletedResults];
1149
1239
  if (allToIndex.length > 0) {
1150
- const indexStats = await indexer.indexFiles(db, allToIndex);
1240
+ options.onProgress?.(45, 100, `\u6B63\u5728\u751F\u6210\u5411\u91CF\u5D4C\u5165... (${allToIndex.length} \u4E2A\u6587\u4EF6)`);
1241
+ const indexStats = await indexer.indexFiles(db, allToIndex, (completed, total) => {
1242
+ const progress = 45 + Math.floor(completed / total * 54);
1243
+ options.onProgress?.(progress, 100, `\u6B63\u5728\u751F\u6210\u5411\u91CF\u5D4C\u5165... (${completed}/${total} \u6279\u6B21)`);
1244
+ });
1151
1245
  stats.vectorIndex = {
1152
1246
  indexed: indexStats.indexed,
1153
1247
  deleted: indexStats.deleted,
@@ -1155,6 +1249,7 @@ async function scan(rootPath, options = {}) {
1155
1249
  };
1156
1250
  }
1157
1251
  }
1252
+ options.onProgress?.(100, 100, "\u7D22\u5F15\u5B8C\u6210");
1158
1253
  return stats;
1159
1254
  } finally {
1160
1255
  closeDb(db);
@@ -75,9 +75,7 @@ function createFormattedStream(filePath) {
75
75
  const { level: _l, time: _t, pid: _p, hostname: _h, name: _n, msg: _m, ...extra } = log;
76
76
  let line = `${time} [${level}] ${msg}`;
77
77
  if (Object.keys(extra).length > 0) {
78
- const extraLines = JSON.stringify(extra, null, 2).split("\n").map((l, i) => i === 0 ? l : ` ${l}`).join("\n");
79
- line += `
80
- ${extraLines}`;
78
+ line += ` ${JSON.stringify(extra)}`;
81
79
  }
82
80
  writeStream.write(`${line}
83
81
  `, callback);
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  logger
3
- } from "./chunk-C7XDGBT5.js";
3
+ } from "./chunk-JVKVSTQ3.js";
4
4
 
5
5
  // src/search/fts.ts
6
6
  var tokenizerCache = /* @__PURE__ */ new WeakMap();
@@ -4,10 +4,10 @@ import {
4
4
  batchUpsertChunkFts,
5
5
  clearVectorIndexHash,
6
6
  isChunksFtsInitialized
7
- } from "./chunk-VW5RACJC.js";
7
+ } from "./chunk-OXPWRE3G.js";
8
8
  import {
9
9
  logger
10
- } from "./chunk-C7XDGBT5.js";
10
+ } from "./chunk-JVKVSTQ3.js";
11
11
  import {
12
12
  getEmbeddingConfig
13
13
  } from "./chunk-SKBAE26T.js";
@@ -259,15 +259,21 @@ var ProgressTracker = class {
259
259
  startTime;
260
260
  lastLogTime = 0;
261
261
  logIntervalMs = 2e3;
262
- // 每 3 秒输出一次
263
- constructor(total) {
262
+ // 每 2 秒输出一次
263
+ onProgress;
264
+ /** 是否跳过日志(单批次时跳过,避免与索引日志混淆) */
265
+ skipLogs;
266
+ constructor(total, onProgress) {
264
267
  this.total = total;
265
268
  this.startTime = Date.now();
269
+ this.onProgress = onProgress;
270
+ this.skipLogs = total <= 1;
266
271
  }
267
272
  /** 记录一个批次完成 */
268
273
  recordBatch(tokens) {
269
274
  this.completed++;
270
275
  this.totalTokens += tokens;
276
+ this.onProgress?.(this.completed, this.total);
271
277
  const now = Date.now();
272
278
  if (now - this.lastLogTime >= this.logIntervalMs) {
273
279
  this.logProgress();
@@ -276,6 +282,7 @@ var ProgressTracker = class {
276
282
  }
277
283
  /** 输出进度 */
278
284
  logProgress() {
285
+ if (this.skipLogs) return;
279
286
  const elapsed = (Date.now() - this.startTime) / 1e3;
280
287
  const percent = Math.round(this.completed / this.total * 100);
281
288
  const rate = this.completed / elapsed;
@@ -293,6 +300,7 @@ var ProgressTracker = class {
293
300
  }
294
301
  /** 完成时输出最终统计 */
295
302
  complete() {
303
+ if (this.skipLogs) return;
296
304
  const elapsed = (Date.now() - this.startTime) / 1e3;
297
305
  logger.info(
298
306
  {
@@ -447,8 +455,9 @@ var EmbeddingClient = class {
447
455
  * 批量获取 Embedding
448
456
  * @param texts 待处理的文本数组
449
457
  * @param batchSize 每批次发送的文本数量(默认 20)
458
+ * @param onProgress 可选的进度回调 (completed, total) => void
450
459
  */
451
- async embedBatch(texts, batchSize = 20) {
460
+ async embedBatch(texts, batchSize = 20, onProgress) {
452
461
  if (texts.length === 0) {
453
462
  return [];
454
463
  }
@@ -456,7 +465,7 @@ var EmbeddingClient = class {
456
465
  for (let i = 0; i < texts.length; i += batchSize) {
457
466
  batches.push(texts.slice(i, i + batchSize));
458
467
  }
459
- const progress = new ProgressTracker(batches.length);
468
+ const progress = new ProgressTracker(batches.length, onProgress);
460
469
  const batchResults = await Promise.all(
461
470
  batches.map(
462
471
  (batch, batchIndex) => this.processWithRateLimit(batch, batchIndex * batchSize, progress)
@@ -625,8 +634,9 @@ var Indexer = class {
625
634
  *
626
635
  * @param db SQLite 数据库实例
627
636
  * @param results 文件处理结果
637
+ * @param onProgress 可选的进度回调 (indexed, total) => void
628
638
  */
629
- async indexFiles(db, results) {
639
+ async indexFiles(db, results, onProgress) {
630
640
  if (!this.vectorStore) {
631
641
  await this.init();
632
642
  }
@@ -670,7 +680,7 @@ var Indexer = class {
670
680
  stats.deleted = toDelete.length;
671
681
  }
672
682
  if (toIndex.length > 0) {
673
- const indexResult = await this.batchIndex(db, toIndex);
683
+ const indexResult = await this.batchIndex(db, toIndex, onProgress);
674
684
  stats.indexed = indexResult.success;
675
685
  stats.errors = indexResult.errors;
676
686
  }
@@ -694,7 +704,7 @@ var Indexer = class {
694
704
  * 3. FTS 写入批量化:N 次删除+插入 → 1 次批量删除 + 1 次批量插入
695
705
  * 4. 日志汇总化:逐文件日志 → 汇总日志
696
706
  */
697
- async batchIndex(db, files) {
707
+ async batchIndex(db, files, onProgress) {
698
708
  if (files.length === 0) {
699
709
  return { success: 0, errors: 0 };
700
710
  }
@@ -715,7 +725,7 @@ var Indexer = class {
715
725
  logger.info({ count: allTexts.length, files: files.length }, "\u5F00\u59CB\u6279\u91CF Embedding");
716
726
  let embeddings;
717
727
  try {
718
- const results = await this.embeddingClient.embedBatch(allTexts);
728
+ const results = await this.embeddingClient.embedBatch(allTexts, 20, onProgress);
719
729
  embeddings = results.map((r) => r.embedding);
720
730
  } catch (err) {
721
731
  const error = err;
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  codebaseRetrievalSchema,
3
3
  handleCodebaseRetrieval
4
- } from "./chunk-O3HDM3CF.js";
5
- import "./chunk-VW5RACJC.js";
6
- import "./chunk-C7XDGBT5.js";
4
+ } from "./chunk-2SIQBQ2M.js";
5
+ import "./chunk-OXPWRE3G.js";
6
+ import "./chunk-JVKVSTQ3.js";
7
7
  import "./chunk-SKBAE26T.js";
8
8
  export {
9
9
  codebaseRetrievalSchema,
package/dist/index.js CHANGED
@@ -1,22 +1,30 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  scan
4
- } from "./chunk-QWQ64TBE.js";
5
- import "./chunk-WWYSLCNZ.js";
4
+ } from "./chunk-AA3ILFHL.js";
5
+ import "./chunk-TJHS7BN7.js";
6
6
  import {
7
7
  generateProjectId
8
- } from "./chunk-VW5RACJC.js";
8
+ } from "./chunk-OXPWRE3G.js";
9
9
  import {
10
10
  logger
11
- } from "./chunk-C7XDGBT5.js";
11
+ } from "./chunk-JVKVSTQ3.js";
12
12
  import "./chunk-SKBAE26T.js";
13
13
 
14
14
  // src/index.ts
15
15
  import { promises as fs } from "fs";
16
16
  import os from "os";
17
17
  import path from "path";
18
+ import { fileURLToPath } from "url";
18
19
  import cac from "cac";
20
+ var __dirname = path.dirname(fileURLToPath(import.meta.url));
21
+ var pkgPath = path.resolve(__dirname, "../package.json");
22
+ var pkg = JSON.parse(await fs.readFile(pkgPath, "utf-8"));
19
23
  var cli = cac("contextweaver");
24
+ if (process.argv.includes("-v") || process.argv.includes("--version")) {
25
+ console.log(pkg.version);
26
+ process.exit(0);
27
+ }
20
28
  cli.command("init", "\u521D\u59CB\u5316 ContextWeaver \u914D\u7F6E").action(async () => {
21
29
  const configDir = path.join(os.homedir(), ".contextweaver");
22
30
  const envFile = path.join(configDir, ".env");
@@ -80,32 +88,33 @@ cli.command("index [path]", "\u626B\u63CF\u4EE3\u7801\u5E93\u5E76\u5EFA\u7ACB\u7
80
88
  }
81
89
  const startTime = Date.now();
82
90
  try {
91
+ let lastLoggedPercent = 0;
83
92
  const stats = await scan(rootPath, {
84
93
  force: options.force,
85
- onProgress: (current, total) => {
86
- const percent = (current / total * 100).toFixed(1);
87
- logger.info({ current, total, percent: `${percent}%` }, "\u626B\u63CF\u8FDB\u5EA6");
94
+ onProgress: (current, total, message) => {
95
+ if (total !== void 0) {
96
+ const percent = Math.floor(current / total * 100);
97
+ if (percent >= lastLoggedPercent + 30 && percent < 100) {
98
+ logger.info(`\u7D22\u5F15\u8FDB\u5EA6: ${percent}% - ${message || ""}`);
99
+ lastLoggedPercent = Math.floor(percent / 30) * 30;
100
+ }
101
+ }
88
102
  }
89
103
  });
90
104
  process.stdout.write("\n");
91
105
  const duration = ((Date.now() - startTime) / 1e3).toFixed(2);
92
- logger.info("\u626B\u63CF\u5B8C\u6210\uFF01");
93
- logger.info(`\u8017\u65F6: ${duration}s`);
94
- logger.info(`\u6587\u4EF6\u603B\u6570: ${stats.totalFiles}`);
95
- logger.info(`\u65B0\u589E: ${stats.added}`);
96
- logger.info(`\u4FEE\u6539: ${stats.modified}`);
97
- logger.info(`\u672A\u53D8: ${stats.unchanged}`);
98
- logger.info(`\u5220\u9664: ${stats.deleted}`);
99
- logger.info(`\u8DF3\u8FC7: ${stats.skipped}`);
100
- logger.info(`\u9519\u8BEF: ${stats.errors}`);
106
+ logger.info(`\u7D22\u5F15\u5B8C\u6210 (${duration}s)`);
107
+ logger.info(
108
+ `\u603B\u6570:${stats.totalFiles} \u65B0\u589E:${stats.added} \u4FEE\u6539:${stats.modified} \u672A\u53D8:${stats.unchanged} \u5220\u9664:${stats.deleted} \u8DF3\u8FC7:${stats.skipped} \u9519\u8BEF:${stats.errors}`
109
+ );
101
110
  } catch (err) {
102
111
  const error = err;
103
- logger.error({ err, stack: error.stack }, `\u626B\u63CF\u5931\u8D25: ${error.message}`);
112
+ logger.error({ err, stack: error.stack }, `\u7D22\u5F15\u5931\u8D25: ${error.message}`);
104
113
  process.exit(1);
105
114
  }
106
115
  });
107
116
  cli.command("mcp", "\u542F\u52A8 MCP \u670D\u52A1\u5668").action(async () => {
108
- const { startMcpServer } = await import("./server-7PYHHTOM.js");
117
+ const { startMcpServer } = await import("./server-SOOY6RCA.js");
109
118
  try {
110
119
  await startMcpServer();
111
120
  } catch (err) {
@@ -127,7 +136,7 @@ cli.command("search", "\u672C\u5730\u68C0\u7D22\uFF08\u53C2\u6570\u5BF9\u9F50 MC
127
136
  }
128
137
  const technicalTerms = (options.technicalTerms || "").split(",").map((t) => t.trim()).filter(Boolean);
129
138
  const useZen = options.zen !== false;
130
- const { handleCodebaseRetrieval } = await import("./codebaseRetrieval-AV4GK6FT.js");
139
+ const { handleCodebaseRetrieval } = await import("./codebaseRetrieval-VZ2E5JYE.js");
131
140
  const response = await handleCodebaseRetrieval(
132
141
  {
133
142
  repo_path: repoPath,
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  logger
3
- } from "./chunk-C7XDGBT5.js";
3
+ } from "./chunk-JVKVSTQ3.js";
4
4
  import "./chunk-SKBAE26T.js";
5
5
 
6
6
  // src/utils/lock.ts
@@ -0,0 +1,10 @@
1
+ import {
2
+ scan
3
+ } from "./chunk-AA3ILFHL.js";
4
+ import "./chunk-TJHS7BN7.js";
5
+ import "./chunk-OXPWRE3G.js";
6
+ import "./chunk-JVKVSTQ3.js";
7
+ import "./chunk-SKBAE26T.js";
8
+ export {
9
+ scan
10
+ };
@@ -1,11 +1,11 @@
1
1
  import {
2
2
  codebaseRetrievalSchema,
3
3
  handleCodebaseRetrieval
4
- } from "./chunk-O3HDM3CF.js";
5
- import "./chunk-VW5RACJC.js";
4
+ } from "./chunk-2SIQBQ2M.js";
5
+ import "./chunk-OXPWRE3G.js";
6
6
  import {
7
7
  logger
8
- } from "./chunk-C7XDGBT5.js";
8
+ } from "./chunk-JVKVSTQ3.js";
9
9
  import "./chunk-SKBAE26T.js";
10
10
 
11
11
  // src/mcp/server.ts
@@ -94,14 +94,31 @@ async function startMcpServer() {
94
94
  logger.debug("\u6536\u5230 list_tools \u8BF7\u6C42");
95
95
  return { tools: TOOLS };
96
96
  });
97
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
97
+ server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
98
98
  const { name, arguments: args } = request.params;
99
99
  logger.info({ tool: name }, "\u6536\u5230 call_tool \u8BF7\u6C42");
100
+ const rawToken = extra._meta?.progressToken;
101
+ const progressToken = typeof rawToken === "string" || typeof rawToken === "number" ? rawToken : void 0;
102
+ const onProgress = progressToken ? async (current, total, message) => {
103
+ try {
104
+ await extra.sendNotification({
105
+ method: "notifications/progress",
106
+ params: {
107
+ progressToken,
108
+ progress: current,
109
+ total,
110
+ message
111
+ }
112
+ });
113
+ } catch (err) {
114
+ logger.debug({ error: err.message }, "\u53D1\u9001\u8FDB\u5EA6\u901A\u77E5\u5931\u8D25");
115
+ }
116
+ } : void 0;
100
117
  try {
101
118
  switch (name) {
102
119
  case "codebase-retrieval": {
103
120
  const parsed = codebaseRetrievalSchema.parse(args);
104
- return await handleCodebaseRetrieval(parsed);
121
+ return await handleCodebaseRetrieval(parsed, void 0, onProgress);
105
122
  }
106
123
  default:
107
124
  throw new Error(`Unknown tool: ${name}`);
@@ -121,8 +138,8 @@ async function startMcpServer() {
121
138
  }
122
139
  });
123
140
  const transport = new StdioServerTransport();
124
- await server.connect(transport);
125
141
  logger.info("MCP \u670D\u52A1\u5668\u5DF2\u542F\u52A8\uFF0C\u7B49\u5F85\u8FDE\u63A5...");
142
+ await server.connect(transport);
126
143
  }
127
144
  export {
128
145
  startMcpServer
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hsingjui/contextweaver",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "description": "A context weaving tool for LLMs",
5
5
  "license": "MIT",
6
6
  "author": "hsingjui",
@@ -29,9 +29,10 @@
29
29
  "fmt": "biome check --write ./src"
30
30
  },
31
31
  "dependencies": {
32
- "@lancedb/lancedb": "^0.19.1",
32
+ "@keqingmoe/tree-sitter": "^0.26.2",
33
+ "@lancedb/lancedb": "^0.22.0",
33
34
  "@modelcontextprotocol/sdk": "^1.25.1",
34
- "better-sqlite3": "^11.10.0",
35
+ "better-sqlite3": "^12.2.0",
35
36
  "cac": "^6.7.14",
36
37
  "chardet": "^2.1.1",
37
38
  "dotenv": "^17.2.3",
@@ -40,28 +41,27 @@
40
41
  "ignore": "^7.0.5",
41
42
  "p-limit": "^7.2.0",
42
43
  "pino": "^10.1.0",
43
- "tree-sitter": "0.20.6",
44
- "tree-sitter-go": "0.20.0",
45
- "tree-sitter-java": "0.20.2",
46
- "tree-sitter-javascript": "0.20.4",
47
- "tree-sitter-python": "0.20.4",
48
- "tree-sitter-rust": "0.20.4",
49
- "tree-sitter-typescript": "0.20.5",
44
+ "tree-sitter-go": "0.23.4",
45
+ "tree-sitter-java": "0.23.5",
46
+ "tree-sitter-javascript": "0.23.1",
47
+ "tree-sitter-python": "0.25.0",
48
+ "tree-sitter-rust": "0.21.0",
49
+ "tree-sitter-typescript": "0.23.2",
50
50
  "zod": "^4.2.1"
51
51
  },
52
52
  "devDependencies": {
53
53
  "@biomejs/biome": "2.3.10",
54
54
  "@types/better-sqlite3": "^7.6.13",
55
- "@types/node": "^25.0.3",
55
+ "@types/node": "^24.0.0",
56
56
  "knip": "^5.78.0",
57
57
  "tsup": "^8.5.1",
58
58
  "typescript": "^5.9.3"
59
59
  },
60
60
  "pnpm": {
61
61
  "onlyBuiltDependencies": [
62
+ "@keqingmoe/tree-sitter",
62
63
  "better-sqlite3",
63
64
  "esbuild",
64
- "tree-sitter",
65
65
  "tree-sitter-go",
66
66
  "tree-sitter-java",
67
67
  "tree-sitter-javascript",
@@ -1,10 +0,0 @@
1
- import {
2
- scan
3
- } from "./chunk-QWQ64TBE.js";
4
- import "./chunk-WWYSLCNZ.js";
5
- import "./chunk-VW5RACJC.js";
6
- import "./chunk-C7XDGBT5.js";
7
- import "./chunk-SKBAE26T.js";
8
- export {
9
- scan
10
- };