@llangtop/pwiki-core 0.3.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/dist/WikiEngine.d.ts +25 -37
  2. package/dist/WikiEngine.d.ts.map +1 -1
  3. package/dist/WikiEngine.js +157 -298
  4. package/dist/WikiEngine.js.map +1 -1
  5. package/dist/ast-chunker.d.ts +23 -0
  6. package/dist/ast-chunker.d.ts.map +1 -0
  7. package/dist/ast-chunker.js +434 -0
  8. package/dist/ast-chunker.js.map +1 -0
  9. package/dist/content-cache.d.ts +13 -0
  10. package/dist/content-cache.d.ts.map +1 -0
  11. package/dist/content-cache.js +33 -0
  12. package/dist/content-cache.js.map +1 -0
  13. package/dist/embedder.d.ts +38 -0
  14. package/dist/embedder.d.ts.map +1 -0
  15. package/dist/embedder.js +267 -0
  16. package/dist/embedder.js.map +1 -0
  17. package/dist/file-manifest.d.ts +46 -0
  18. package/dist/file-manifest.d.ts.map +1 -0
  19. package/dist/file-manifest.js +121 -0
  20. package/dist/file-manifest.js.map +1 -0
  21. package/dist/index.d.ts +18 -8
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +25 -7
  24. package/dist/index.js.map +1 -1
  25. package/dist/indexer-compile.d.ts +20 -0
  26. package/dist/indexer-compile.d.ts.map +1 -0
  27. package/dist/indexer-compile.js +198 -0
  28. package/dist/indexer-compile.js.map +1 -0
  29. package/dist/indexer-embed.d.ts +21 -0
  30. package/dist/indexer-embed.d.ts.map +1 -0
  31. package/dist/indexer-embed.js +248 -0
  32. package/dist/indexer-embed.js.map +1 -0
  33. package/dist/indexer-scan.d.ts +4 -0
  34. package/dist/indexer-scan.d.ts.map +1 -0
  35. package/dist/indexer-scan.js +51 -0
  36. package/dist/indexer-scan.js.map +1 -0
  37. package/dist/indexer.d.ts +4 -0
  38. package/dist/indexer.d.ts.map +1 -0
  39. package/dist/indexer.js +7 -0
  40. package/dist/indexer.js.map +1 -0
  41. package/dist/model-registry.d.ts +32 -0
  42. package/dist/model-registry.d.ts.map +1 -0
  43. package/dist/model-registry.js +82 -0
  44. package/dist/model-registry.js.map +1 -0
  45. package/dist/parser.d.ts +9 -0
  46. package/dist/parser.d.ts.map +1 -0
  47. package/dist/parser.js +54 -0
  48. package/dist/parser.js.map +1 -0
  49. package/dist/preprocessor.d.ts +36 -0
  50. package/dist/preprocessor.d.ts.map +1 -0
  51. package/dist/preprocessor.js +209 -0
  52. package/dist/preprocessor.js.map +1 -0
  53. package/dist/search.d.ts +6 -0
  54. package/dist/search.d.ts.map +1 -0
  55. package/dist/search.js +91 -0
  56. package/dist/search.js.map +1 -0
  57. package/dist/semantic-compiler.d.ts +44 -0
  58. package/dist/semantic-compiler.d.ts.map +1 -0
  59. package/dist/semantic-compiler.js +376 -0
  60. package/dist/semantic-compiler.js.map +1 -0
  61. package/dist/semantic-search.d.ts +11 -0
  62. package/dist/semantic-search.d.ts.map +1 -0
  63. package/dist/semantic-search.js +217 -0
  64. package/dist/semantic-search.js.map +1 -0
  65. package/dist/store-settings.d.ts +32 -0
  66. package/dist/store-settings.d.ts.map +1 -0
  67. package/dist/store-settings.js +138 -0
  68. package/dist/store-settings.js.map +1 -0
  69. package/dist/store-vectors.d.ts +13 -0
  70. package/dist/store-vectors.d.ts.map +1 -0
  71. package/dist/store-vectors.js +101 -0
  72. package/dist/store-vectors.js.map +1 -0
  73. package/dist/store.d.ts +11 -0
  74. package/dist/store.d.ts.map +1 -0
  75. package/dist/store.js +28 -0
  76. package/dist/store.js.map +1 -0
  77. package/dist/types.d.ts +75 -92
  78. package/dist/types.d.ts.map +1 -1
  79. package/dist/types.js +1 -1
  80. package/dist/types.js.map +1 -1
  81. package/dist/wiki-paths.d.ts +3 -0
  82. package/dist/wiki-paths.d.ts.map +1 -0
  83. package/dist/wiki-paths.js +13 -0
  84. package/dist/wiki-paths.js.map +1 -0
  85. package/package.json +38 -38
  86. package/dist/compile/compiler.d.ts +0 -39
  87. package/dist/compile/compiler.d.ts.map +0 -1
  88. package/dist/compile/compiler.js +0 -227
  89. package/dist/compile/compiler.js.map +0 -1
  90. package/dist/compile/index.d.ts +0 -3
  91. package/dist/compile/index.d.ts.map +0 -1
  92. package/dist/compile/index.js +0 -2
  93. package/dist/compile/index.js.map +0 -1
  94. package/dist/embed/WikiEmbedder.d.ts +0 -28
  95. package/dist/embed/WikiEmbedder.d.ts.map +0 -1
  96. package/dist/embed/WikiEmbedder.js +0 -147
  97. package/dist/embed/WikiEmbedder.js.map +0 -1
  98. package/dist/embed/index.d.ts +0 -2
  99. package/dist/embed/index.d.ts.map +0 -1
  100. package/dist/embed/index.js +0 -2
  101. package/dist/embed/index.js.map +0 -1
  102. package/dist/llm/WikiLLM.d.ts +0 -24
  103. package/dist/llm/WikiLLM.d.ts.map +0 -1
  104. package/dist/llm/WikiLLM.js +0 -46
  105. package/dist/llm/WikiLLM.js.map +0 -1
  106. package/dist/llm/index.d.ts +0 -3
  107. package/dist/llm/index.d.ts.map +0 -1
  108. package/dist/llm/index.js +0 -2
  109. package/dist/llm/index.js.map +0 -1
  110. package/dist/models.d.ts +0 -5
  111. package/dist/models.d.ts.map +0 -1
  112. package/dist/models.js +0 -54
  113. package/dist/models.js.map +0 -1
  114. package/dist/search/WikiSearch.d.ts +0 -14
  115. package/dist/search/WikiSearch.d.ts.map +0 -1
  116. package/dist/search/WikiSearch.js +0 -223
  117. package/dist/search/WikiSearch.js.map +0 -1
  118. package/dist/search/index.d.ts +0 -2
  119. package/dist/search/index.d.ts.map +0 -1
  120. package/dist/search/index.js +0 -2
  121. package/dist/search/index.js.map +0 -1
  122. package/dist/store/WikiStore.d.ts +0 -47
  123. package/dist/store/WikiStore.d.ts.map +0 -1
  124. package/dist/store/WikiStore.js +0 -301
  125. package/dist/store/WikiStore.js.map +0 -1
  126. package/dist/store/index.d.ts +0 -2
  127. package/dist/store/index.d.ts.map +0 -1
  128. package/dist/store/index.js +0 -2
  129. package/dist/store/index.js.map +0 -1
  130. package/dist/util/fs.d.ts +0 -7
  131. package/dist/util/fs.d.ts.map +0 -1
  132. package/dist/util/fs.js +0 -36
  133. package/dist/util/fs.js.map +0 -1
  134. package/dist/util/index.d.ts +0 -3
  135. package/dist/util/index.d.ts.map +0 -1
  136. package/dist/util/index.js +0 -3
  137. package/dist/util/index.js.map +0 -1
  138. package/dist/util/paths.d.ts +0 -17
  139. package/dist/util/paths.d.ts.map +0 -1
  140. package/dist/util/paths.js +0 -31
  141. package/dist/util/paths.js.map +0 -1
@@ -0,0 +1,82 @@
1
+ // model-registry.ts — 模型中间层 (v1.0)
2
+ //
3
+ // 提供内置模型目录 + 选择/查询 API,让 wiki 搜索与具体模型解耦。
4
+ // store.ts 只存 currentModelId 字符串,所有模型元信息由此 registry 提供。
5
+ //
6
+ // 添加新模型: 在 BUILTIN_MODELS 数组中追加一条即可。
7
+ // embedder / indexer / management 通过 getCurrentModel() 自动适配。
8
+ import { readModelId, writeModelId } from "./store.js";
9
+ // ---- 内置模型目录 ----
10
+ export const BUILTIN_MODELS = [
11
+ {
12
+ id: "bge-base-zh-v1.5",
13
+ name: "BGE Base Chinese v1.5",
14
+ hfRepo: "Xenova/bge-base-zh-v1.5",
15
+ dim: 768,
16
+ description: "BAAI 中文优化,MTEB 中文榜单领先,适合中文技术文档语义搜索",
17
+ languages: ["zh", "en"],
18
+ maxTokens: 512,
19
+ int8Size: 130_000_000, // ~130 MB
20
+ fp32Size: 390_000_000, // ~390 MB
21
+ },
22
+ {
23
+ id: "bge-large-zh-v1.5",
24
+ name: "BGE Large Chinese v1.5",
25
+ hfRepo: "Xenova/bge-large-zh-v1.5",
26
+ dim: 1024,
27
+ description: "BAAI 中文大模型,1024 维高精度,适合对中文精度有较高要求的笔记 wiki 化",
28
+ languages: ["zh", "en"],
29
+ maxTokens: 512,
30
+ int8Size: 324_000_000, // ~324 MB
31
+ fp32Size: 1_300_000_000, // ~1.3 GB
32
+ },
33
+ {
34
+ id: "paraphrase-multilingual",
35
+ name: "Paraphrase Multilingual MiniLM",
36
+ hfRepo: "Xenova/paraphrase-multilingual-MiniLM-L12-v2",
37
+ dim: 384,
38
+ description: "轻量多语言模型,50+ 语言,适合混合语言知识库",
39
+ languages: ["zh", "en", "fr", "de", "ja", "ko", "..."],
40
+ maxTokens: 128,
41
+ int8Size: 118_000_000, // ~118 MB
42
+ fp32Size: 470_000_000, // ~470 MB
43
+ },
44
+ {
45
+ id: "bge-m3",
46
+ name: "BGE M3",
47
+ hfRepo: "Xenova/bge-m3",
48
+ dim: 1024,
49
+ description: "BAAI 多语言多粒度模型,100+ 语言,支持长文本 (8192 token),中英混合笔记首选",
50
+ languages: ["zh", "en", "fr", "de", "ja", "ko", "es", "ru", "ar", "..."],
51
+ maxTokens: 8192,
52
+ int8Size: 340_000_000, // ~340 MB (O4 量化)
53
+ fp32Size: 2_200_000_000, // ~2.2 GB
54
+ },
55
+ ];
56
+ // ---- 查询 API ----
57
+ /** 获取所有内置模型 */
58
+ export function getBuiltinModels() {
59
+ return BUILTIN_MODELS;
60
+ }
61
+ /** 按 id 查找模型 */
62
+ export function findModel(id) {
63
+ return BUILTIN_MODELS.find(m => m.id === id);
64
+ }
65
+ /** 获取当前选中的模型元信息 */
66
+ export function getCurrentModel() {
67
+ const id = readModelId();
68
+ return findModel(id) ?? BUILTIN_MODELS[0];
69
+ }
70
+ /** 切换模型 — 返回新模型信息,若 id 不存在返回 null */
71
+ export function selectModel(id) {
72
+ const m = findModel(id);
73
+ if (!m)
74
+ return null;
75
+ writeModelId(m.id);
76
+ return m;
77
+ }
78
+ /** 默认模型 id */
79
+ export function getDefaultModelId() {
80
+ return BUILTIN_MODELS[0].id;
81
+ }
82
+ //# sourceMappingURL=model-registry.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model-registry.js","sourceRoot":"","sources":["../src/model-registry.ts"],"names":[],"mappings":"AAAA,mCAAmC;AACnC,EAAE;AACF,yCAAyC;AACzC,wDAAwD;AACxD,EAAE;AACF,qCAAqC;AACrC,6DAA6D;AAE7D,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAyBvD,mBAAmB;AAEnB,MAAM,CAAC,MAAM,cAAc,GAAgB;IACzC;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,uBAAuB;QAC7B,MAAM,EAAE,yBAAyB;QACjC,GAAG,EAAE,GAAG;QACR,WAAW,EAAE,oCAAoC;QACjD,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;QACvB,SAAS,EAAE,GAAG;QACd,QAAQ,EAAE,WAAW,EAAI,UAAU;QACnC,QAAQ,EAAE,WAAW,EAAI,UAAU;KACpC;IACD;QACE,EAAE,EAAE,mBAAmB;QACvB,IAAI,EAAE,wBAAwB;QAC9B,MAAM,EAAE,0BAA0B;QAClC,GAAG,EAAE,IAAI;QACT,WAAW,EAAE,6CAA6C;QAC1D,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;QACvB,SAAS,EAAE,GAAG;QACd,QAAQ,EAAE,WAAW,EAAI,UAAU;QACnC,QAAQ,EAAE,aAAa,EAAE,UAAU;KACpC;IACD;QACE,EAAE,EAAE,yBAAyB;QAC7B,IAAI,EAAE,gCAAgC;QACtC,MAAM,EAAE,8CAA8C;QACtD,GAAG,EAAE,GAAG;QACR,WAAW,EAAE,0BAA0B;QACvC,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC;QACtD,SAAS,EAAE,GAAG;QACd,QAAQ,EAAE,WAAW,EAAI,UAAU;QACnC,QAAQ,EAAE,WAAW,EAAI,UAAU;KACpC;IACD;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,QAAQ;QACd,MAAM,EAAE,eAAe;QACvB,GAAG,EAAE,IAAI;QACT,WAAW,EAAE,mDAAmD;QAChE,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC;QACxE,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,WAAW,EAAI,kBAAkB;QAC3C,QAAQ,EAAE,aAAa,EAAE,UAAU;KACpC;CACF,CAAC;AAEF,mBAAmB;AAEnB,eAAe;AACf,MAAM,UAAU,gBAAgB;IAC9B,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,gBAAgB;AAChB,MAAM,UAAU,SAAS,CAAC,EAAsB;IAC9C,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AAC/C,CAAC;AAED,mBAAmB;AACnB,MAAM,UAAU,eAAe;IAC7B,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,OAAO,SAAS,CAAC,EAAE,CAAC,IAAI,cAAc,CAAC,CAAC,CAAC,CAAC;AAC5C,CAAC;AAED,qCAAqC;AACrC,MAAM,UAAU,WAAW,CAAC,EAAU;IACpC,MAAM,CAAC,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC;IACxB,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACnB,OAAO,CAAC,CAAC;AACX,CAAC;AAED,cAAc;AACd,MAAM,UAAU,iBAAiB;IAC/B,OAAO,cAAc,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AAC9B,CAAC"}
@@ -0,0 +1,9 @@
1
+ import type { FileEntry } from "./types.js";
2
+ /**
3
+ * 解析单个 .md 文件为 FileEntry
4
+ * @param root 数据源根目录
5
+ * @param filePath 文件绝对路径
6
+ * @param mtime 文件修改时间(可选;不传则通过 statSync 自动获取)
7
+ */
8
+ export declare function parseFileEntry(root: string, filePath: string, mtime?: string): FileEntry | null;
9
+ //# sourceMappingURL=parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAE5C;;;;;GAKG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,KAAK,CAAC,EAAE,MAAM,GACb,SAAS,GAAG,IAAI,CA2ClB"}
package/dist/parser.js ADDED
@@ -0,0 +1,54 @@
1
+ // parser.ts — 统一的 .md 文件元信息解析
2
+ // 提取 frontmatter (title/tags) + H1 标题
3
+ // 供 indexer.ts 和 management.ts 共用
4
+ import { readFileSync, statSync } from "node:fs";
5
+ import { relative, basename } from "node:path";
6
+ /**
7
+ * 解析单个 .md 文件为 FileEntry
8
+ * @param root 数据源根目录
9
+ * @param filePath 文件绝对路径
10
+ * @param mtime 文件修改时间(可选;不传则通过 statSync 自动获取)
11
+ */
12
+ export function parseFileEntry(root, filePath, mtime) {
13
+ try {
14
+ const raw = readFileSync(filePath, "utf-8");
15
+ const relPath = relative(root, filePath).replace(/\\/g, "/");
16
+ let title = basename(filePath, ".md");
17
+ const tags = [];
18
+ // --------------------------------------------------
19
+ // frontmatter
20
+ // --------------------------------------------------
21
+ const fmMatch = raw.match(/^---\n([\s\S]*?)\n---/);
22
+ if (fmMatch) {
23
+ for (const line of fmMatch[1].split("\n")) {
24
+ const ci = line.indexOf(":");
25
+ if (ci < 0)
26
+ continue;
27
+ const k = line.slice(0, ci).trim();
28
+ const v = line.slice(ci + 1).trim().replace(/['"]/g, "");
29
+ if (k === "title")
30
+ title = v;
31
+ if (k === "tags" && v.startsWith("[") && v.endsWith("]")) {
32
+ tags.push(...v
33
+ .slice(1, -1)
34
+ .split(",")
35
+ .map((s) => s.trim().replace(/['"]/g, "")));
36
+ }
37
+ }
38
+ }
39
+ // --------------------------------------------------
40
+ // 无 frontmatter title → 回退到第一个 # 标题
41
+ // --------------------------------------------------
42
+ if (!fmMatch || !raw.match(/^---\n[\s\S]*?\n---\n*\n*# /)) {
43
+ const h1 = raw.match(/^# (.+)$/m);
44
+ if (h1)
45
+ title = h1[1].trim();
46
+ }
47
+ const finalMtime = mtime ?? statSync(filePath).mtime.toISOString();
48
+ return { title, tags, sourceDir: root, relPath, mtime: finalMtime };
49
+ }
50
+ catch {
51
+ return null;
52
+ }
53
+ }
54
+ //# sourceMappingURL=parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA,8BAA8B;AAC9B,sCAAsC;AACtC,kCAAkC;AAElC,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAG/C;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,QAAgB,EAChB,KAAc;IAEd,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAC7D,IAAI,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACtC,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,qDAAqD;QACrD,cAAc;QACd,qDAAqD;QACrD,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QACnD,IAAI,OAAO,EAAE,CAAC;YACZ,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1C,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBAC7B,IAAI,EAAE,GAAG,CAAC;oBAAE,SAAS;gBACrB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;gBACzD,IAAI,CAAC,KAAK,OAAO;oBAAE,KAAK,GAAG,CAAC,CAAC;gBAC7B,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACzD,IAAI,CAAC,IAAI,CACP,GAAG,CAAC;yBACD,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;yBACZ,KAAK,CAAC,GAAG,CAAC;yBACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAC7C,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QAED,qDAAqD;QACrD,oCAAoC;QACpC,qDAAqD;QACrD,IAAI,CAAC,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,6BAA6B,CAAC,EAAE,CAAC;YAC1D,MAAM,EAAE,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YAClC,IAAI,EAAE;gBAAE,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC/B,CAAC;QAED,MAAM,UAAU,GAAG,KAAK,IAAI,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;QAEnE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;IACtE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,36 @@
1
+ /** chunkType: markdown 特征推断 */
2
+ export declare function inferChunkType(text: string, heading: string): string;
3
+ /** contentClass: 文件路径推断 */
4
+ export declare function inferContentClass(relPath: string): string;
5
+ /** importance: 启发式打分 (0.1-1.0) */
6
+ export declare function inferImportance(text: string, heading: string): number;
7
+ /** temporalAnchor: 正则提取第一个 YYYY-MM-DD */
8
+ export declare function inferTemporalAnchor(text: string): string | undefined;
9
+ /** confidence: 基于文本长度的预设置信度 */
10
+ export declare function inferConfidence(text: string): number;
11
+ /** summary: 清洗后取前 30 字 */
12
+ export declare function inferSummary(text: string): string;
13
+ /** keywords: 英文标识符 + 中文高频词预提取 */
14
+ export declare function inferKeywords(text: string): string[];
15
+ /** 预处理器对文件全文的输出 */
16
+ export interface PreprocessedChunk {
17
+ /** 程序切分的块(按标题简单切,仅作兜底) */
18
+ heading: string;
19
+ level: number;
20
+ text: string;
21
+ /** 程序提取的元数据 */
22
+ chunkType: string;
23
+ contentClass: string;
24
+ importance: number;
25
+ temporalAnchor?: string;
26
+ confidence: number;
27
+ summary: string;
28
+ keywords: string[];
29
+ }
30
+ /**
31
+ * 对文件全文做预处理: AST 分块 + 提取每段元数据。
32
+ * v5.3: 复用 ast-chunker 替代 regex 逐行扫描。
33
+ * 这是兜底分块 — LLM 在文件级编译时可能输出不同的 segments。
34
+ */
35
+ export declare function preprocessFile(relPath: string, fullText: string, defaultTitle: string): Promise<PreprocessedChunk[]>;
36
+ //# sourceMappingURL=preprocessor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"preprocessor.d.ts","sourceRoot":"","sources":["../src/preprocessor.ts"],"names":[],"mappings":"AAkBA,+BAA+B;AAC/B,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAsBpE;AAED,2BAA2B;AAC3B,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAkBzD;AAED,kCAAkC;AAClC,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd,MAAM,CAeR;AAED,yCAAyC;AACzC,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,GACX,MAAM,GAAG,SAAS,CAKpB;AAED,+BAA+B;AAC/B,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAIpD;AAED,0BAA0B;AAC1B,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CASjD;AAED,iCAAiC;AACjC,wBAAgB,aAAa,CAC3B,IAAI,EAAE,MAAM,GACX,MAAM,EAAE,CAmDV;AAMD,mBAAmB;AACnB,MAAM,WAAW,iBAAiB;IAChC,0BAA0B;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,eAAe;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;GAIG;AACH,wBAAsB,cAAc,CAClC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,GACnB,OAAO,CAAC,iBAAiB,EAAE,CAAC,CA+D9B"}
@@ -0,0 +1,209 @@
1
+ // preprocessor.ts — 程序化元数据提取 (v5.2)
2
+ //
3
+ // 原则: 程序做能做的一切,LLM 只做需要语义理解的活。
4
+ //
5
+ // 7 个字段由规则/启发式/正则自动提取:
6
+ // chunkType, contentClass, importance, temporalAnchor,
7
+ // confidence, summary, keywords
8
+ //
9
+ // LLM 只负责 4 个语义字段:
10
+ // topic, normalizedText, concepts, aliases
11
+ import { extractChunksAST } from "./ast-chunker.js";
12
+ // ============================================================
13
+ // 推断规则
14
+ // ============================================================
15
+ /** chunkType: markdown 特征推断 */
16
+ export function inferChunkType(text, heading) {
17
+ if (/```[\s\S]*?```/.test(text))
18
+ return "code";
19
+ if (/^\s*[-*]\s*\[ \]/m.test(text))
20
+ return "todo";
21
+ if (/^\s*[-*]\s*\[x\]/im.test(text))
22
+ return "log";
23
+ if (/^#{1,4}\s*(架构|拓扑|结构|方案)/.test(heading))
24
+ return "architecture";
25
+ if (/^#{1,4}\s*(决定|决策|结论|决议)/.test(heading))
26
+ return "decision";
27
+ if (/^#{1,4}\s*(参考|链接|相关|资源|附录)/.test(heading))
28
+ return "reference";
29
+ if (/^#{1,4}\s*(问题|排查|故障|报错|异常)/.test(heading))
30
+ return "question";
31
+ if (/^#{1,4}\s*(日志|记录|日报|周报|流水)/.test(heading))
32
+ return "log";
33
+ if (/^#{1,4}\s*(想法|思路|灵感|idea)/i.test(heading))
34
+ return "idea";
35
+ if (/^#{1,4}\s*(研究|调研|分析|探索)/.test(heading))
36
+ return "research";
37
+ if (text.length < 50)
38
+ return "reference";
39
+ return "note";
40
+ }
41
+ /** contentClass: 文件路径推断 */
42
+ export function inferContentClass(relPath) {
43
+ if (/chatgpt|聊天|对话|conversation/i.test(relPath))
44
+ return "conversation";
45
+ if (/日报|更新|会议|流水|日志|记录|周报/i.test(relPath))
46
+ return "event";
47
+ if (/知识点|规范|标准|原理|手册|指南|教程|总结/i.test(relPath))
48
+ return "knowledge";
49
+ return "reference";
50
+ }
51
+ /** importance: 启发式打分 (0.1-1.0) */
52
+ export function inferImportance(text, heading) {
53
+ let score = 0.3;
54
+ if (text.length > 200)
55
+ score += 0.2;
56
+ if (text.length > 500)
57
+ score += 0.1;
58
+ if (/```/.test(text))
59
+ score += 0.15;
60
+ if (/^#{1,3}\s/.test(heading))
61
+ score += 0.1;
62
+ if (/错误|异常|故障|问题|解决|修复|排查/.test(text))
63
+ score += 0.1;
64
+ if (/TODO|待办|以后|FIXME|临时|暂存/i.test(text))
65
+ score -= 0.3;
66
+ return Math.max(0.1, Math.min(1, score));
67
+ }
68
+ /** temporalAnchor: 正则提取第一个 YYYY-MM-DD */
69
+ export function inferTemporalAnchor(text) {
70
+ const m = text.match(/\b(20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b/);
71
+ return m ? m[1].replace(/\//g, "-") : undefined;
72
+ }
73
+ /** confidence: 基于文本长度的预设置信度 */
74
+ export function inferConfidence(text) {
75
+ if (text.length < 20)
76
+ return 0.3;
77
+ if (text.length < 80)
78
+ return 0.6;
79
+ return 0.85;
80
+ }
81
+ /** summary: 清洗后取前 30 字 */
82
+ export function inferSummary(text) {
83
+ const cleaned = text
84
+ .replace(/^#{1,6}\s+/gm, "")
85
+ .replace(/```[\s\S]*?```/g, "[代码]")
86
+ .replace(/\[([^\]]*)\]\([^)]*\)/g, "$1")
87
+ .replace(/\*\*|__|\*|_|`|~~/g, "")
88
+ .replace(/\n/g, " ")
89
+ .trim();
90
+ return cleaned.slice(0, 30);
91
+ }
92
+ /** keywords: 英文标识符 + 中文高频词预提取 */
93
+ export function inferKeywords(text) {
94
+ const en = text.match(/\b[a-z_]{3,}\b/gi) || [];
95
+ const zh = text.match(/[\u4e00-\u9fff]{2,4}/g) || [];
96
+ const all = [...new Set([...en, ...zh])];
97
+ // 停用词过滤
98
+ const stop = new Set([
99
+ "可以",
100
+ "一个",
101
+ "这个",
102
+ "不是",
103
+ "还是",
104
+ "如果",
105
+ "因为",
106
+ "所以",
107
+ "但是",
108
+ "而且",
109
+ "或者",
110
+ "以及",
111
+ "就是",
112
+ "没有",
113
+ "已经",
114
+ "什么",
115
+ "怎么",
116
+ "这样",
117
+ "那样",
118
+ "时候",
119
+ "问题",
120
+ "需要",
121
+ "通过",
122
+ "进行",
123
+ "使用",
124
+ "用于",
125
+ "可能",
126
+ "应该",
127
+ "然后",
128
+ "the",
129
+ "and",
130
+ "for",
131
+ "from",
132
+ "with",
133
+ "that",
134
+ "this",
135
+ "are",
136
+ "not",
137
+ "but",
138
+ "has",
139
+ "was",
140
+ ]);
141
+ return all.filter((w) => !stop.has(w)).slice(0, 8);
142
+ }
143
+ /**
144
+ * 对文件全文做预处理: AST 分块 + 提取每段元数据。
145
+ * v5.3: 复用 ast-chunker 替代 regex 逐行扫描。
146
+ * 这是兜底分块 — LLM 在文件级编译时可能输出不同的 segments。
147
+ */
148
+ export async function preprocessFile(relPath, fullText, defaultTitle) {
149
+ // v5.3: 优先 AST 分块
150
+ const astChunks = await extractChunksAST(fullText, relPath, defaultTitle);
151
+ // 如果 AST 成功,直接映射
152
+ if (astChunks.length > 0) {
153
+ return astChunks.map((c) => ({
154
+ heading: c.heading,
155
+ level: c.level,
156
+ text: c.rawText,
157
+ chunkType: c.chunkTypeHint || inferChunkType(c.rawText, c.heading),
158
+ contentClass: inferContentClass(relPath),
159
+ importance: inferImportance(c.rawText, c.heading),
160
+ temporalAnchor: inferTemporalAnchor(c.rawText),
161
+ confidence: inferConfidence(c.rawText),
162
+ summary: inferSummary(c.rawText),
163
+ keywords: inferKeywords(c.rawText),
164
+ }));
165
+ }
166
+ // ── fallback: regex 逐行扫描 ──
167
+ const lines = fullText.split("\n");
168
+ const sections = [];
169
+ for (const line of lines) {
170
+ const m = line.match(/^#{1,4}\s/);
171
+ if (m) {
172
+ const heading = line.trim();
173
+ const level = heading.match(/^#+/)[0].length;
174
+ sections.push({ heading, level, lines: [] });
175
+ }
176
+ else if (sections.length > 0) {
177
+ sections[sections.length - 1].lines.push(line);
178
+ }
179
+ else {
180
+ sections.push({ heading: defaultTitle, level: 0, lines: [] });
181
+ sections[0].lines.push(line);
182
+ }
183
+ }
184
+ if (sections.length === 0) {
185
+ sections.push({ heading: defaultTitle, level: 0, lines });
186
+ }
187
+ // 跳过 frontmatter
188
+ if (sections[0]?.lines[0]?.trim() === "---" || sections[0]?.heading === "---") {
189
+ const fmEnd = sections[0].lines.findIndex((l) => l.trim() === "---", 1);
190
+ if (fmEnd > 0)
191
+ sections[0].lines = sections[0].lines.slice(fmEnd + 1);
192
+ }
193
+ return sections.map((s) => {
194
+ const text = s.lines.join("\n").trim();
195
+ return {
196
+ heading: s.heading,
197
+ level: s.level,
198
+ text,
199
+ chunkType: inferChunkType(text, s.heading),
200
+ contentClass: inferContentClass(relPath),
201
+ importance: inferImportance(text, s.heading),
202
+ temporalAnchor: inferTemporalAnchor(text),
203
+ confidence: inferConfidence(text),
204
+ summary: inferSummary(text),
205
+ keywords: inferKeywords(text),
206
+ };
207
+ });
208
+ }
209
+ //# sourceMappingURL=preprocessor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"preprocessor.js","sourceRoot":"","sources":["../src/preprocessor.ts"],"names":[],"mappings":"AAAA,oCAAoC;AACpC,EAAE;AACF,+BAA+B;AAC/B,EAAE;AACF,uBAAuB;AACvB,yDAAyD;AACzD,kCAAkC;AAClC,EAAE;AACF,mBAAmB;AACnB,6CAA6C;AAG7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,+DAA+D;AAC/D,OAAO;AACP,+DAA+D;AAE/D,+BAA+B;AAC/B,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IAC1D,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAC/C,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAClD,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAClD,IAAI,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC;QACzC,OAAO,cAAc,CAAC;IACxB,IAAI,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC;QACzC,OAAO,UAAU,CAAC;IACpB,IAAI,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5C,OAAO,WAAW,CAAC;IACrB,IAAI,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5C,OAAO,UAAU,CAAC;IACpB,IAAI,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5C,OAAO,KAAK,CAAC;IACf,IAAI,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5C,OAAO,MAAM,CAAC;IAChB,IACE,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC;QAEvC,OAAO,UAAU,CAAC;IACpB,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,WAAW,CAAC;IACzC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,2BAA2B;AAC3B,MAAM,UAAU,iBAAiB,CAAC,OAAe;IAC/C,IACE,6BAA6B,CAAC,IAAI,CAAC,OAAO,CAAC;QAE3C,OAAO,cAAc,CAAC;IACxB,IACE,uBAAuB,CAAC,IAAI,CAC1B,OAAO,CACR;QAED,OAAO,OAAO,CAAC;IACjB,IACE,2BAA2B,CAAC,IAAI,CAC9B,OAAO,CACR;QAED,OAAO,WAAW,CAAC;IACrB,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,kCAAkC;AAClC,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,OAAe;IAEf,IAAI,KAAK,GAAG,GAAG,CAAC;IAChB,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;QAAE,KAAK,IAAI,GAAG,CAAC;IACpC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;QAAE,KAAK,IAAI,GAAG,CAAC;IACpC,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,KAAK,IAAI,IAAI,CAAC;IACpC,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,KAAK,IAAI,GAAG,CAAC;IAC5C,IACE,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC;QAEjC,KAAK,IAAI,GAAG,CAAC;IACf,IACE,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC;QAEpC,KAAK,IAAI,GAAG,CAAC;IACf,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED,yCAAyC;AACzC,MAAM,UAAU,mBAAmB,CACjC,IAAY;IAEZ,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAClB,qCAAqC,CACtC,CAAC;IACF,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAClD,CAAC;AAED,+BAA+B;AAC/B,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,GAAG,CAAC;IACjC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,GAAG,CAAC;IACjC,OAAO,IAAI,CAAC;AACd,CAAC;AAED,0BAA0B;AAC1B,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,OAAO,GAAG,IAAI;SACjB,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;SAC3B,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC;SAClC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC;SACvC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;SACjC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;SACnB,IAAI,EAAE,CAAC;IACV,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC9B,CAAC;AAED,iCAAiC;AACjC,MAAM,UAAU,aAAa,CAC3B,IAAY;IAEZ,MAAM,EAAE,GACN,IAAI,CAAC,KAAK,CAAC,kBAAkB,CAAC,IAAI,EAAE,CAAC;IACvC,MAAM,EAAE,GACN,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,IAAI,EAAE,CAAC;IAC5C,MAAM,GAAG,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,QAAQ;IACR,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC;QACnB,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,KAAK;QACL,KAAK;QACL,KAAK;QACL,MAAM;QACN,MAAM;QACN,MAAM;QACN,MAAM;QACN,KAAK;QACL,KAAK;QACL,KAAK;QACL,KAAK;QACL,KAAK;KACN,CAAC,CAAC;IACH,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACrD,CAAC;AAsBD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,OAAe,EACf,QAAgB,EAChB,YAAoB;IAEpB,kBAAkB;IAClB,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,QAAQ,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAE1E,iBAAiB;IACjB,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3B,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,IAAI,EAAE,CAAC,CAAC,OAAO;YACf,SAAS,EAAE,CAAC,CAAC,aAAa,IAAI,cAAc,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC;YAClE,YAAY,EAAE,iBAAiB,CAAC,OAAO,CAAC;YACxC,UAAU,EAAE,eAAe,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC;YACjD,cAAc,EAAE,mBAAmB,CAAC,CAAC,CAAC,OAAO,CAAC;YAC9C,UAAU,EAAE,eAAe,CAAC,CAAC,CAAC,OAAO,CAAC;YACtC,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC;YAChC,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC;SACnC,CAAC,CAAC,CAAC;IACN,CAAC;IAED,6BAA6B;IAC7B,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAA0D,EAAE,CAAC;IAE3E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAClC,IAAI,CAAC,EAAE,CAAC;YACN,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YAC9C,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC/C,CAAC;aAAM,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YAC9D,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,iBAAiB;IACjB,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,KAAK,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,OAAO,KAAK,KAAK,EAAE,CAAC;QAC9E,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,KAAK,EAAE,CAAC,CAAC,CAAC;QACxE,IAAI,KAAK,GAAG,CAAC;YAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACxB,MAAM,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,OAAO;YACL,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,IAAI;YACJ,SAAS,EAAE,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC;YAC1C,YAAY,EAAE,iBAAiB,CAAC,OAAO,CAAC;YACxC,UAAU,EAAE,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC;YAC5C,cAAc,EAAE,mBAAmB,CAAC,IAAI,CAAC;YACzC,UAAU,EAAE,eAAe,CAAC,IAAI,CAAC;YACjC,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC;YAC3B,QAAQ,EAAE,aAAa,CAAC,IAAI,CAAC;SAC9B,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type { SearchHit } from "./types.js";
2
+ /** 关键词搜索(同步,纯子串匹配 + 加权打分) */
3
+ export declare function keywordSearch(query: string): SearchHit[];
4
+ /** 向后兼容别名 */
5
+ export declare const search: typeof keywordSearch;
6
+ //# sourceMappingURL=search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAa,SAAS,EAAE,MAAM,YAAY,CAAC;AA0BvD,6BAA6B;AAC7B,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,EAAE,CAkExD;AAED,aAAa;AACb,eAAO,MAAM,MAAM,sBAAgB,CAAC"}
package/dist/search.js ADDED
@@ -0,0 +1,91 @@
1
+ // search.ts — 关键词搜索 (v5.0)
2
+ // 标题/路径/标签/内容匹配 → 行级上下文展示
3
+ import { getIndex } from "./store.js";
4
+ import { readFileSync } from "node:fs";
5
+ import { resolve as resolvePath } from "node:path";
6
+ import { getContent } from "./content-cache.js";
7
+ /** 提取匹配行的上下文(前一行、匹配行、后一行) */
8
+ function lineContext(content, query, maxLen = 100) {
9
+ const lower = content.toLowerCase();
10
+ const q = query.toLowerCase();
11
+ const pos = lower.indexOf(q);
12
+ if (pos < 0)
13
+ return "";
14
+ // 找到匹配位置所在行号
15
+ const before = content.slice(0, pos);
16
+ const lineNum = before.split("\n").length; // 1-indexed
17
+ const lines = content.split("\n");
18
+ const prev = lineNum > 1 ? lines[lineNum - 2].trim() : "";
19
+ const curr = lines[lineNum - 1].trim();
20
+ const next = lineNum < lines.length ? lines[lineNum].trim() : "";
21
+ const parts = [];
22
+ if (prev)
23
+ parts.push(`L${lineNum - 1}: ${prev.slice(0, maxLen)}`);
24
+ parts.push(`L${lineNum}: ${curr.slice(0, maxLen)}`);
25
+ if (next)
26
+ parts.push(`L${lineNum + 1}: ${next.slice(0, maxLen)}`);
27
+ return parts.join("\n");
28
+ }
29
+ /** 关键词搜索(同步,纯子串匹配 + 加权打分) */
30
+ export function keywordSearch(query) {
31
+ const idx = getIndex();
32
+ const q = query.toLowerCase();
33
+ const hits = [];
34
+ for (const [relPath, entry] of Object.entries(idx)) {
35
+ let score = 0;
36
+ const parts = [];
37
+ // 标题匹配
38
+ if (entry.title.toLowerCase().includes(q)) {
39
+ score += 10;
40
+ }
41
+ // 路径匹配
42
+ if (relPath.toLowerCase().includes(q)) {
43
+ score += 5;
44
+ }
45
+ // 标签匹配
46
+ if (entry.tags.some(t => t.toLowerCase().includes(q))) {
47
+ score += 3;
48
+ }
49
+ // 内容匹配(优先内存缓存,miss 时回退磁盘)
50
+ let content = getContent(relPath);
51
+ if (!content) {
52
+ try {
53
+ content = readFileSync(resolvePath(entry.sourceDir, relPath), "utf-8");
54
+ }
55
+ catch {
56
+ content = undefined;
57
+ }
58
+ }
59
+ if (content) {
60
+ const lower = content.toLowerCase();
61
+ let count = 0, p = lower.indexOf(q);
62
+ while (p >= 0 && count < 5) {
63
+ count++;
64
+ if (count === 1)
65
+ score += 1;
66
+ // 取该匹配位置的行上下文
67
+ const ctx = lineContext(content, query);
68
+ if (ctx && !parts.some(pp => pp.includes(ctx.slice(0, 30)))) {
69
+ parts.push(ctx);
70
+ }
71
+ p = lower.indexOf(q, p + 1);
72
+ }
73
+ // 多次出现加分
74
+ score += Math.min(count - 1, 9);
75
+ }
76
+ if (score > 0) {
77
+ hits.push({
78
+ relPath: entry.relPath,
79
+ sourceDir: entry.sourceDir,
80
+ title: entry.title,
81
+ tags: entry.tags,
82
+ snippet: parts.join("\n"),
83
+ score,
84
+ });
85
+ }
86
+ }
87
+ return hits.sort((a, b) => b.score - a.score);
88
+ }
89
+ /** 向后兼容别名 */
90
+ export const search = keywordSearch; // alias
91
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA,2BAA2B;AAC3B,0BAA0B;AAE1B,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,WAAW,CAAC;AACnD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAGhD,6BAA6B;AAC7B,SAAS,WAAW,CAAC,OAAe,EAAE,KAAa,EAAE,MAAM,GAAG,GAAG;IAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAC9B,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAC7B,IAAI,GAAG,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvB,aAAa;IACb,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY;IACvD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,IAAI,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1D,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACvC,MAAM,IAAI,GAAG,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAEjE,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,GAAG,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IAClE,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IACpD,IAAI,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,GAAG,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IAElE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,6BAA6B;AAC7B,MAAM,UAAU,aAAa,CAAC,KAAa;IACzC,MAAM,GAAG,GAAG,QAAQ,EAAE,CAAC;IACvB,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAC9B,MAAM,IAAI,GAAgB,EAAE,CAAC;IAE7B,KAAK,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACnD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,OAAO;QACP,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC1C,KAAK,IAAI,EAAE,CAAC;QACd,CAAC;QAED,OAAO;QACP,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,CAAC;QACb,CAAC;QAED,OAAO;QACP,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACtD,KAAK,IAAI,CAAC,CAAC;QACb,CAAC;QAED,0BAA0B;QAC1B,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,IAAI,CAAC;gBACH,OAAO,GAAG,YAAY,CAAC,WAAW,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,CAAC;YACzE,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,GAAG,SAAS,CAAC;YACtB,CAAC;QACH,CAAC;QACD,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;YAEpC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACpC,OAAO,CAAC,IAAI,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBAC3B,KAAK,EAAE,CAAC;gBACR,IAAI,KAAK,KAAK,CAAC;oBAAE,KAAK,IAAI,CAAC,CAAC;gBAE5B,cAAc;gBACd,MAAM,GAAG,GAAG,WAAW,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;gBACxC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5D,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAClB,CAAC;gBACD,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC9B,CAAC;YAED,SAAS;YACT,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,IAAI,CAAC,IAAI,CAAC;gBACR,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,SAAS,EAAE,KAAK,CAAC,SAAS;gBAC1B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;gBACzB,KAAK;aACN,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AAChD,CAAC;AAED,aAAa;AACb,MAAM,CAAC,MAAM,MAAM,GAAG,aAAa,CAAC,CAAC,QAAQ"}
@@ -0,0 +1,44 @@
1
+ import type { RawChunk, CompiledChunk, FileSegment } from "./types.js";
2
+ import type { PreprocessedChunk } from "./preprocessor.js";
3
+ /** 推荐每批处理的 chunk 数(平衡上下文大小和效率) */
4
+ export declare const BATCH_SIZE = 25;
5
+ export declare const COMPILE_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1\u4E0D\u662F\u603B\u7ED3\u5185\u5BB9\u3002\n\n\u4F60\u7684\u4EFB\u52A1\u662F\uFF1A\n\u5C06\u4EBA\u7C7B\u968F\u624B\u8BB0\u5F55\u7684\u975E\u7ED3\u6784\u5316\u7B14\u8BB0\uFF0C\n\u8F6C\u6362\u4E3A\u9002\u5408\u673A\u5668\u8BED\u4E49\u7D22\u5F15\u3001\u6982\u5FF5\u68C0\u7D22\u3001\n\u77E5\u8BC6\u805A\u7C7B\u3001\u957F\u671F\u6F14\u5316\u7684\"\u8BA4\u77E5\u77E5\u8BC6\u5355\u5143\"\u3002\n\n\u6838\u5FC3\u539F\u5219\uFF1A\n1. \u4FDD\u7559\u539F\u59CB\u4FE1\u606F \u2014 \u4E0D\u5220\u6280\u672F\u7EC6\u8282\n2. \u4E0D\u6539\u53D8\u539F\u610F \u2014 \u53EA\u89C4\u8303\u5316\u8868\u8FBE\n3. \u8865\u5168\u9690\u5F0F\u8868\u8FBE \u2014 \u8865\u5145\u7701\u7565\u7684\u4E3B\u8BED\u3001\u5C55\u5F00\u7F29\u5199\n4. \u7EDF\u4E00\u672F\u8BED \u2014 \u5C06\u540C\u4E49\u8868\u8FBE\u5F52\u4E00\uFF08\u5982 \"\u72B6\u6001\u6C61\u67D3\" \u2194 \"stale closure\"\uFF09\n5. \u63D0\u53D6\u6838\u5FC3\u6982\u5FF5 \u2014 \u8BC6\u522B\u6280\u672F\u5173\u952E\u8BCD\n6. \u4FDD\u6301\u5355\u4E3B\u9898 \u2014 \u4E00\u4E2A chunk \u53EA\u63CF\u8FF0\u4E00\u4E2A\u8BA4\u77E5\u4E3B\u9898\n7. \u8F93\u51FA\u7ED3\u6784\u5316 JSON \u2014 \u4E25\u683C\u9075\u5FAA schema\n\n\u7981\u6B62\uFF1A\n1. \u8FC7\u5EA6\u603B\u7ED3\n2. \u5220\u9664\u539F\u6587\n3. \u6539\u5199\u903B\u8F91\n4. \u4E3B\u89C2\u63A8\u65AD\n5. \u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\n\n\u4F60\u7684\u89D2\u8272\u662F\uFF1A\"\u8BED\u4E49\u6807\u51C6\u5316\u5668\"\uFF0C\u4E0D\u662F\"\u5185\u5BB9\u4F5C\u8005\"\u3002";
6
+ /** 为一批 chunk 构建编译 prompt */
7
+ export declare function buildCompilePrompt(chunks: RawChunk[]): string;
8
+ /**
9
+ * 从编译后的 ChunkInfo 构建最优 embedding 输入
10
+ *
11
+ * 蓝图推荐格式:
12
+ * [TOPIC] + [CONCEPTS] + [ALIASES] + [KEYWORDS] + [NORMALIZED] + [RAW]
13
+ *
14
+ * 原因: 增强隐式语义,让向量模型在检索时更稳定地匹配
15
+ */
16
+ export declare function buildEmbeddingText(topic: string, normalizedText: string, concepts: string[], aliases: string[], keywords: string[], contentClass: string, temporalAnchor: string | undefined, rawText: string): string;
17
+ /** 尝试从 LLM 响应中提取 CompiledChunk 数组 */
18
+ export declare function parseCompiledResult(text: string): CompiledChunk[] | null;
19
+ /** 文件级编译 System Prompt(只要求 LLM 做 4 件事) */
20
+ export declare const FILE_COMPILE_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1: \u5C06\u6574\u7BC7\u7B14\u8BB0\u8F6C\u6362\u4E3A\u7ED3\u6784\u5316\u7684\u8BED\u4E49\u77E5\u8BC6\u5355\u5143\u3002\n\n\u4F60\u9700\u8981\u505A\u7684 4 \u4EF6\u4E8B:\n1. \u81EA\u884C\u5224\u65AD\u8BED\u4E49\u8FB9\u754C \u2014 \u5C06\u6587\u4EF6\u5206\u6210\u82E5\u5E72\u8FDE\u7EED\u7684\u8BED\u4E49\u7247\u6BB5\uFF08segments\uFF09\n2. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u5199\u51FA topic\uFF08\u6838\u5FC3\u4E3B\u9898\uFF0C\u4E00\u53E5\u8BDD\uFF09\n3. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u5199\u51FA normalizedText\uFF08\u89C4\u8303\u5316\u6587\u672C\uFF1A\u8865\u5168\u7701\u7565\u3001\u7EDF\u4E00\u672F\u8BED\u3001\u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF09\n4. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u63D0\u53D6 concepts\uFF08\u6280\u672F\u6982\u5FF5\uFF09\u548C aliases\uFF08\u540C\u4E49\u8868\u8FBE\uFF0C\u683C\u5F0F \"\u4E2D\u6587 \u2194 English\"\uFF09\n\n\u6838\u5FC3\u539F\u5219:\n- \u4FDD\u7559\u539F\u59CB\u4FE1\u606F \u2014 \u4E0D\u5220\u6280\u672F\u7EC6\u8282\uFF08API \u540D\u3001\u53C2\u6570\u3001\u9519\u8BEF\u4FE1\u606F\u3001\u7F29\u5199\uFF09\n- \u4E0D\u6539\u53D8\u539F\u610F \u2014 \u53EA\u89C4\u8303\u5316\u8868\u8FBE\n- \u8BED\u4E49\u8FB9\u754C = \u540C\u4E00\u8BA4\u77E5\u4E3B\u9898\u7684\u81EA\u7136\u6BB5\u6216\u8FDE\u7EED\u6BB5\u843D\n- \u5982\u679C\u6574\u4E2A\u6587\u4EF6\u662F\u5355\u4E00\u4E3B\u9898\uFF0C\u53EA\u8F93\u51FA 1 \u4E2A segment\n\n\u7981\u6B62: \u8FC7\u5EA6\u603B\u7ED3\u3001\u5220\u9664\u539F\u6587\u3001\u6539\u5199\u903B\u8F91\u3001\u4E3B\u89C2\u63A8\u65AD\u3001\u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\u3002";
21
+ /**
22
+ * 为文件级编译构建 prompt
23
+ * @param relPath 文件路径
24
+ * @param fullText 文件全文
25
+ * @param preprocessed 预处理器输出(仅展示给 LLM 参考)
26
+ */
27
+ export declare function buildFileCompilePrompt(relPath: string, fullText: string, preprocessed: PreprocessedChunk[]): string;
28
+ /** 从 LLM 响应中提取 FileSegment 数组 */
29
+ export declare function parseFileSegments(text: string): FileSegment[] | null;
30
+ /** v5.4 文件级 System Prompt(极简版) */
31
+ export declare const FILE_LLM_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1: \u5C06\u6574\u7BC7\u7B14\u8BB0\u8F6C\u6362\u4E3A\u7ED3\u6784\u5316\u7684\u8BED\u4E49\u5143\u6570\u636E\uFF0C\u7528\u4E8E\u589E\u5F3A\u8BED\u4E49\u641C\u7D22\u3002\n\n\u4F60\u9700\u8981\u8F93\u51FA\u7684 4 \u4E2A\u5B57\u6BB5:\n1. topic \u2014 \u6838\u5FC3\u4E3B\u9898\uFF08\u4E00\u53E5\u8BDD\u6982\u62EC\u5168\u6587\uFF09\n2. normalizedText \u2014 \u89C4\u8303\u5316\u6587\u672C\uFF08\u8865\u5168\u7701\u7565\u4E3B\u8BED\u3001\u7EDF\u4E00\u672F\u8BED\u3001\u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF09\n3. concepts \u2014 \u6280\u672F\u6982\u5FF5\u5217\u8868\uFF083-8 \u4E2A\u6838\u5FC3\u6982\u5FF5\uFF09\n4. aliases \u2014 \u540C\u4E49\u8868\u8FBE\uFF08\u683C\u5F0F \"\u4E2D\u6587 \u2194 English\"\uFF0C2-5 \u7EC4\uFF09\n\n\u6838\u5FC3\u539F\u5219:\n- \u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF08API \u540D\u3001\u53C2\u6570\u3001\u9519\u8BEF\u4FE1\u606F\u3001\u7F29\u5199\u3001\u7248\u672C\u53F7\uFF09\n- \u4E0D\u6539\u53D8\u539F\u610F\uFF0C\u53EA\u89C4\u8303\u5316\u8868\u8FBE\n- concepts \u63D0\u53D6\u6280\u672F\u5173\u952E\u8BCD\uFF0C\u4E0D\u662F\u6458\u8981\n- aliases \u8986\u76D6\u4E2D\u82F1\u5BF9\u7167\u548C\u7F29\u5199\u5C55\u5F00\n\n\u7981\u6B62: \u8FC7\u5EA6\u603B\u7ED3\u3001\u5220\u9664\u539F\u6587\u3001\u6539\u5199\u903B\u8F91\u3001\u4E3B\u89C2\u63A8\u65AD\u3001\u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\u3002";
32
+ /**
33
+ * v5.4 构建简化文件级编译 prompt
34
+ */
35
+ export declare function buildFileLLMPrompt(relPath: string, fullText: string): string;
36
+ /**
37
+ * v5.4 解析文件级 LLM 响应(单对象,非 segments 数组)
38
+ */
39
+ export declare function parseFileLLMResult(text: string): import("./types.js").FileLLMData | null;
40
+ /**
41
+ * v5.4 构建文件级 LLM 向量的 embedding 文本
42
+ */
43
+ export declare function buildFileLLMEmbeddingText(data: import("./types.js").FileLLMData, relPath?: string, maxEmbedLen?: number): string;
44
+ //# sourceMappingURL=semantic-compiler.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-compiler.d.ts","sourceRoot":"","sources":["../src/semantic-compiler.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACvE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAM3D,kCAAkC;AAClC,eAAO,MAAM,UAAU,KAAK,CAAC;AAM7B,eAAO,MAAM,qBAAqB,siDAyBT,CAAC;AAM1B,4BAA4B;AAC5B,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,CAkG7D;AAMD;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,MAAM,EACb,cAAc,EAAE,MAAM,EACtB,QAAQ,EAAE,MAAM,EAAE,EAClB,OAAO,EAAE,MAAM,EAAE,EACjB,QAAQ,EAAE,MAAM,EAAE,EAClB,YAAY,EAAE,MAAM,EACpB,cAAc,EAAE,MAAM,GAAG,SAAS,EAClC,OAAO,EAAE,MAAM,GACd,MAAM,CAWR;AAMD,qCAAqC;AACrC,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,GACX,aAAa,EAAE,GAAG,IAAI,CAiBxB;AAMD,0CAA0C;AAC1C,eAAO,MAAM,0BAA0B,koDAgBL,CAAC;AAEnC;;;;;GAKG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,iBAAiB,EAAE,GAChC,MAAM,CAwCR;AAED,iCAAiC;AACjC,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,EAAE,GAAG,IAAI,CAgBpE;AAMD,kCAAkC;AAClC,eAAO,MAAM,sBAAsB,u7CAgBD,CAAC;AAEnC;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,GACf,MAAM,CAmBR;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,MAAM,GACX,OAAO,YAAY,EAAE,WAAW,GAAG,IAAI,CAyBzC;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,IAAI,EAAE,OAAO,YAAY,EAAE,WAAW,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,WAAW,SAAO,GAAG,MAAM,CAY9H"}