@llangtop/pwiki-core 0.3.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/dist/WikiEngine.d.ts +25 -37
  2. package/dist/WikiEngine.d.ts.map +1 -1
  3. package/dist/WikiEngine.js +157 -298
  4. package/dist/WikiEngine.js.map +1 -1
  5. package/dist/ast-chunker.d.ts +23 -0
  6. package/dist/ast-chunker.d.ts.map +1 -0
  7. package/dist/ast-chunker.js +434 -0
  8. package/dist/ast-chunker.js.map +1 -0
  9. package/dist/content-cache.d.ts +13 -0
  10. package/dist/content-cache.d.ts.map +1 -0
  11. package/dist/content-cache.js +33 -0
  12. package/dist/content-cache.js.map +1 -0
  13. package/dist/embedder.d.ts +38 -0
  14. package/dist/embedder.d.ts.map +1 -0
  15. package/dist/embedder.js +267 -0
  16. package/dist/embedder.js.map +1 -0
  17. package/dist/file-manifest.d.ts +46 -0
  18. package/dist/file-manifest.d.ts.map +1 -0
  19. package/dist/file-manifest.js +121 -0
  20. package/dist/file-manifest.js.map +1 -0
  21. package/dist/index.d.ts +18 -8
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +25 -7
  24. package/dist/index.js.map +1 -1
  25. package/dist/indexer-compile.d.ts +20 -0
  26. package/dist/indexer-compile.d.ts.map +1 -0
  27. package/dist/indexer-compile.js +198 -0
  28. package/dist/indexer-compile.js.map +1 -0
  29. package/dist/indexer-embed.d.ts +21 -0
  30. package/dist/indexer-embed.d.ts.map +1 -0
  31. package/dist/indexer-embed.js +248 -0
  32. package/dist/indexer-embed.js.map +1 -0
  33. package/dist/indexer-scan.d.ts +4 -0
  34. package/dist/indexer-scan.d.ts.map +1 -0
  35. package/dist/indexer-scan.js +51 -0
  36. package/dist/indexer-scan.js.map +1 -0
  37. package/dist/indexer.d.ts +4 -0
  38. package/dist/indexer.d.ts.map +1 -0
  39. package/dist/indexer.js +7 -0
  40. package/dist/indexer.js.map +1 -0
  41. package/dist/model-registry.d.ts +32 -0
  42. package/dist/model-registry.d.ts.map +1 -0
  43. package/dist/model-registry.js +82 -0
  44. package/dist/model-registry.js.map +1 -0
  45. package/dist/parser.d.ts +9 -0
  46. package/dist/parser.d.ts.map +1 -0
  47. package/dist/parser.js +54 -0
  48. package/dist/parser.js.map +1 -0
  49. package/dist/preprocessor.d.ts +36 -0
  50. package/dist/preprocessor.d.ts.map +1 -0
  51. package/dist/preprocessor.js +209 -0
  52. package/dist/preprocessor.js.map +1 -0
  53. package/dist/search.d.ts +6 -0
  54. package/dist/search.d.ts.map +1 -0
  55. package/dist/search.js +91 -0
  56. package/dist/search.js.map +1 -0
  57. package/dist/semantic-compiler.d.ts +44 -0
  58. package/dist/semantic-compiler.d.ts.map +1 -0
  59. package/dist/semantic-compiler.js +376 -0
  60. package/dist/semantic-compiler.js.map +1 -0
  61. package/dist/semantic-search.d.ts +11 -0
  62. package/dist/semantic-search.d.ts.map +1 -0
  63. package/dist/semantic-search.js +217 -0
  64. package/dist/semantic-search.js.map +1 -0
  65. package/dist/store-settings.d.ts +32 -0
  66. package/dist/store-settings.d.ts.map +1 -0
  67. package/dist/store-settings.js +138 -0
  68. package/dist/store-settings.js.map +1 -0
  69. package/dist/store-vectors.d.ts +13 -0
  70. package/dist/store-vectors.d.ts.map +1 -0
  71. package/dist/store-vectors.js +101 -0
  72. package/dist/store-vectors.js.map +1 -0
  73. package/dist/store.d.ts +11 -0
  74. package/dist/store.d.ts.map +1 -0
  75. package/dist/store.js +28 -0
  76. package/dist/store.js.map +1 -0
  77. package/dist/types.d.ts +75 -92
  78. package/dist/types.d.ts.map +1 -1
  79. package/dist/types.js +1 -1
  80. package/dist/types.js.map +1 -1
  81. package/dist/wiki-paths.d.ts +3 -0
  82. package/dist/wiki-paths.d.ts.map +1 -0
  83. package/dist/wiki-paths.js +13 -0
  84. package/dist/wiki-paths.js.map +1 -0
  85. package/package.json +38 -38
  86. package/dist/compile/compiler.d.ts +0 -39
  87. package/dist/compile/compiler.d.ts.map +0 -1
  88. package/dist/compile/compiler.js +0 -227
  89. package/dist/compile/compiler.js.map +0 -1
  90. package/dist/compile/index.d.ts +0 -3
  91. package/dist/compile/index.d.ts.map +0 -1
  92. package/dist/compile/index.js +0 -2
  93. package/dist/compile/index.js.map +0 -1
  94. package/dist/embed/WikiEmbedder.d.ts +0 -28
  95. package/dist/embed/WikiEmbedder.d.ts.map +0 -1
  96. package/dist/embed/WikiEmbedder.js +0 -147
  97. package/dist/embed/WikiEmbedder.js.map +0 -1
  98. package/dist/embed/index.d.ts +0 -2
  99. package/dist/embed/index.d.ts.map +0 -1
  100. package/dist/embed/index.js +0 -2
  101. package/dist/embed/index.js.map +0 -1
  102. package/dist/llm/WikiLLM.d.ts +0 -24
  103. package/dist/llm/WikiLLM.d.ts.map +0 -1
  104. package/dist/llm/WikiLLM.js +0 -46
  105. package/dist/llm/WikiLLM.js.map +0 -1
  106. package/dist/llm/index.d.ts +0 -3
  107. package/dist/llm/index.d.ts.map +0 -1
  108. package/dist/llm/index.js +0 -2
  109. package/dist/llm/index.js.map +0 -1
  110. package/dist/models.d.ts +0 -5
  111. package/dist/models.d.ts.map +0 -1
  112. package/dist/models.js +0 -54
  113. package/dist/models.js.map +0 -1
  114. package/dist/search/WikiSearch.d.ts +0 -14
  115. package/dist/search/WikiSearch.d.ts.map +0 -1
  116. package/dist/search/WikiSearch.js +0 -223
  117. package/dist/search/WikiSearch.js.map +0 -1
  118. package/dist/search/index.d.ts +0 -2
  119. package/dist/search/index.d.ts.map +0 -1
  120. package/dist/search/index.js +0 -2
  121. package/dist/search/index.js.map +0 -1
  122. package/dist/store/WikiStore.d.ts +0 -47
  123. package/dist/store/WikiStore.d.ts.map +0 -1
  124. package/dist/store/WikiStore.js +0 -301
  125. package/dist/store/WikiStore.js.map +0 -1
  126. package/dist/store/index.d.ts +0 -2
  127. package/dist/store/index.d.ts.map +0 -1
  128. package/dist/store/index.js +0 -2
  129. package/dist/store/index.js.map +0 -1
  130. package/dist/util/fs.d.ts +0 -7
  131. package/dist/util/fs.d.ts.map +0 -1
  132. package/dist/util/fs.js +0 -36
  133. package/dist/util/fs.js.map +0 -1
  134. package/dist/util/index.d.ts +0 -3
  135. package/dist/util/index.d.ts.map +0 -1
  136. package/dist/util/index.js +0 -3
  137. package/dist/util/index.js.map +0 -1
  138. package/dist/util/paths.d.ts +0 -17
  139. package/dist/util/paths.d.ts.map +0 -1
  140. package/dist/util/paths.js +0 -31
  141. package/dist/util/paths.js.map +0 -1
package/package.json CHANGED
@@ -1,38 +1,38 @@
1
- {
2
- "name": "@llangtop/pwiki-core",
3
- "version": "0.3.4",
4
- "description": "Wiki knowledge base engine - keyword/semantic/hybrid search with local ONNX embeddings",
5
- "type": "module",
6
- "main": "./dist/index.js",
7
- "types": "./dist/index.d.ts",
8
- "exports": {
9
- ".": {
10
- "types": "./dist/index.d.ts",
11
- "import": "./dist/index.js"
12
- }
13
- },
14
- "files": [
15
- "dist",
16
- "README.md"
17
- ],
18
- "scripts": {
19
- "build": "tsc",
20
- "prepublishOnly": "npm run build",
21
- "test": "vitest run"
22
- },
23
- "dependencies": {
24
- "@huggingface/transformers": "^3.0.0"
25
- },
26
- "devDependencies": {
27
- "typescript": "^5.5.0",
28
- "vitest": "^1.0.0"
29
- },
30
- "engines": {
31
- "node": ">=18"
32
- },
33
- "license": "MIT"
34
- }
35
-
36
-
37
-
38
-
1
+ {
2
+ "name": "@llangtop/pwiki-core",
3
+ "version": "0.4.5",
4
+ "description": "Wiki knowledge base engine - keyword/semantic/hybrid search with local ONNX embeddings",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js"
12
+ }
13
+ },
14
+ "files": [
15
+ "dist",
16
+ "README.md"
17
+ ],
18
+ "scripts": {
19
+ "build": "tsc",
20
+ "prepublishOnly": "npm run build",
21
+ "test": "vitest run"
22
+ },
23
+ "dependencies": {
24
+ "@huggingface/transformers": "^3.0.0",
25
+ "remark-parse": "^11.0.0",
26
+ "unified": "^11.0.0",
27
+ "unist-util-visit": "^5.0.0"
28
+ },
29
+ "devDependencies": {
30
+ "@types/mdast": "^4.0.0",
31
+ "typescript": "^5.5.0",
32
+ "vitest": "^1.0.0"
33
+ },
34
+ "engines": {
35
+ "node": "\u003e=18"
36
+ },
37
+ "license": "MIT"
38
+ }
@@ -1,39 +0,0 @@
1
- import type { WikiStore } from "../store/index.js";
2
- import type { ChunkMeta } from "../types.js";
3
- export declare function extractSummary(raw: string, maxLen?: number): string;
4
- export interface HeadingChunk {
5
- key: string;
6
- relPath: string;
7
- heading: string;
8
- level: number;
9
- text: string;
10
- }
11
- /** 按 heading 将 markdown 切分为块 */
12
- export declare function chunkByHeadings(content: string, relPath: string, maxEmbedLen?: number): HeadingChunk[];
13
- /** 推断 chunkType */
14
- export declare function inferChunkType(text: string, heading: string): string;
15
- /** 推断 contentClass */
16
- export declare function inferContentClass(text: string): string;
17
- /** 推断 importance (0-1) */
18
- export declare function inferImportance(text: string, heading: string): number;
19
- /** 生成关键词(简单分词) */
20
- export declare function extractKeywords(text: string): string[];
21
- /** 预处理一个块,生成程序可推断的 ChunkMeta 字段 */
22
- export declare function preprocess(text: string, heading: string, level: number): Partial<ChunkMeta>;
23
- export declare const COMPILE_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\u4F60\u7684\u4EFB\u52A1\u4E0D\u662F\u603B\u7ED3\u5185\u5BB9\u3002\n\u4F60\u7684\u4EFB\u52A1\u662F\uFF1A\u5C06\u4EBA\u7C7B\u968F\u624B\u8BB0\u5F55\u7684\u975E\u7ED3\u6784\u5316\u7B14\u8BB0\uFF0C\u8F6C\u6362\u4E3A\u9002\u5408\u673A\u5668\u8BED\u4E49\u7D22\u5F15\u3001\u6982\u5FF5\u68C0\u7D22\u3001\u77E5\u8BC6\u805A\u7C7B\u3001\u957F\u671F\u6F14\u5316\u7684\"\u8BA4\u77E5\u77E5\u8BC6\u5355\u5143\"\u3002\n\u6838\u5FC3\u539F\u5219\uFF1A\n1. \u4FDD\u7559\u539F\u59CB\u4FE1\u606F \u2014 \u4E0D\u5220\u6280\u672F\u7EC6\u8282\n2. \u4E0D\u6539\u53D8\u539F\u610F \u2014 \u53EA\u89C4\u8303\u5316\u8868\u8FBE\n3. \u8865\u5168\u9690\u5F0F\u8868\u8FBE \u2014 \u8865\u5145\u7701\u7565\u7684\u4E3B\u8BED\u3001\u5C55\u5F00\u7F29\u5199\n4. \u7EDF\u4E00\u672F\u8BED \u2014 \u5C06\u540C\u4E49\u8868\u8FBE\u5F52\u4E00\n5. \u63D0\u53D6\u6838\u5FC3\u6982\u5FF5 \u2014 \u8BC6\u522B\u6280\u672F\u5173\u952E\u8BCD\n6. \u4FDD\u6301\u5355\u4E3B\u9898 \u2014 \u4E00\u4E2A chunk \u53EA\u63CF\u8FF0\u4E00\u4E2A\u8BA4\u77E5\u4E3B\u9898\n7. \u8F93\u51FA\u7ED3\u6784\u5316 JSON \u2014 \u4E25\u683C\u9075\u5FAA schema\n\u8F93\u51FA JSON schema:\n{\n \"topic\": \"string \u2014 \u6838\u5FC3\u4E3B\u9898\uFF08\u4E00\u53E5\u8BDD\uFF09\",\n \"normalizedText\": \"string \u2014 \u89C4\u8303\u5316\u540E\u7684\u6587\u672C\",\n \"concepts\": [\"string \u2014 \u6838\u5FC3\u6982\u5FF5\"],\n \"aliases\": [\"string \u2014 \u540C\u4E49\u8868\u8FBE\uFF08\u4E2D\u82F1\u5BF9\u7167\u3001\u7F29\u5199\u5C55\u5F00\uFF09\"]\n}";
24
- export declare function buildCompilePrompt(relPath: string, rawText: string, heading: string): string;
25
- export declare function parseCompileResult(raw: string): {
26
- topic: string;
27
- normalizedText: string;
28
- concepts: string[];
29
- aliases: string[];
30
- } | null;
31
- export declare function buildEmbeddingText(rawText: string, meta: Partial<ChunkMeta>, maxLen?: number): string;
32
- export declare function getCompileStats(store: WikiStore): {
33
- total: number;
34
- compiled: number;
35
- uncompiled: string[];
36
- };
37
- /** 标记文件为已编译 */
38
- export declare function markCompiled(store: WikiStore, relPath: string): void;
39
- //# sourceMappingURL=compiler.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"compiler.d.ts","sourceRoot":"","sources":["../../src/compile/compiler.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAiB,MAAM,aAAa,CAAC;AAM5D,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,SAAM,GAAG,MAAM,CAchE;AAMD,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,gCAAgC;AAChC,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,WAAW,SAAM,GAAG,YAAY,EAAE,CAgCnG;AAMD,mBAAmB;AACnB,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAapE;AAED,sBAAsB;AACtB,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAKtD;AAED,0BAA0B;AAC1B,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAOrE;AAED,kBAAkB;AAClB,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CActD;AAED,mCAAmC;AACnC,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAS3F;AAMD,eAAO,MAAM,qBAAqB,2hDAiBhC,CAAC;AAEH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAQ5F;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,EAAE,CAAC;IAAC,OAAO,EAAE,MAAM,EAAE,CAAA;CAAE,GAAG,IAAI,CAiBvI;AAMD,wBAAgB,kBAAkB,CAChC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,OAAO,CAAC,SAAS,CAAC,EACxB,MAAM,SAAM,GACX,MAAM,CAQR;AAMD,wBAAgB,eAAe,CAAC,KAAK,EAAE,SAAS,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAU3G;AAED,eAAe;AACf,wBAAgB,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,GAAG,IAAI,CAWpE"}
@@ -1,227 +0,0 @@
1
- // compiler.ts — 语义编译管线:AST 分块 → 预处理 → LLM prompt 生成 → 结果存储
2
- // ============================================================================
3
- // 文件内容摘要提取
4
- // ============================================================================
5
- export function extractSummary(raw, maxLen = 200) {
6
- const fmEnd = raw.match(/^---\n[\s\S]*?\n---/);
7
- const body = fmEnd ? raw.slice(fmEnd[0].length).trim() : raw;
8
- return body
9
- .replace(/^#{1,6}\s+/gm, "")
10
- .replace(/\*\*|__|\*|_|`|~~/g, "")
11
- .replace(/\[([^\]]*)\]\([^)]*\)/g, "$1")
12
- .replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1")
13
- .replace(/^\s*[-*+]\s+/gm, "")
14
- .replace(/^\s*\d+\.\s+/gm, "")
15
- .replace(/\n{2,}/g, " ")
16
- .replace(/\n/g, " ")
17
- .trim()
18
- .slice(0, maxLen);
19
- }
20
- /** 按 heading 将 markdown 切分为块 */
21
- export function chunkByHeadings(content, relPath, maxEmbedLen = 800) {
22
- const lines = content.split("\n");
23
- const chunks = [];
24
- let currentHeading = "(top)";
25
- let currentLevel = 0;
26
- let currentLines = [];
27
- const flush = () => {
28
- const text = currentLines.join("\n").trim();
29
- if (!text)
30
- return;
31
- chunks.push({
32
- key: `${relPath}###${chunks.length}`,
33
- relPath,
34
- heading: currentHeading,
35
- level: currentLevel,
36
- text: text.slice(0, maxEmbedLen),
37
- });
38
- };
39
- for (const line of lines) {
40
- const m = line.match(/^(#{1,6})\s+(.+)$/);
41
- if (m) {
42
- flush();
43
- currentHeading = m[0];
44
- currentLevel = m[1].length;
45
- currentLines = [];
46
- }
47
- else {
48
- currentLines.push(line);
49
- }
50
- }
51
- flush();
52
- return chunks;
53
- }
54
- // ============================================================================
55
- // 预处理器 — 程序推断 7 个字段
56
- // ============================================================================
57
- /** 推断 chunkType */
58
- export function inferChunkType(text, heading) {
59
- if (/```[\s\S]*?```/.test(text))
60
- return "code";
61
- if (/^\s*[-*]\s*\[ \]/m.test(text))
62
- return "todo";
63
- if (/^\s*[-*]\s*\[x\]/im.test(text))
64
- return "log";
65
- if (/\b(架构|拓扑|结构|方案)\b/.test(heading))
66
- return "architecture";
67
- if (/\b(决定|决策|结论|决议)\b/.test(heading))
68
- return "decision";
69
- if (/\b(参考|相关|资源|附录|链接)\b/.test(heading))
70
- return "reference";
71
- if (/\b(问题|排查|故障|报错|异常)\b/.test(heading))
72
- return "question";
73
- if (/\b(日志|记录|日报|周报|流水)\b/.test(heading))
74
- return "log";
75
- if (/\b(想法|思路|灵感|idea)\b/i.test(heading))
76
- return "idea";
77
- if (/\b(研究|调研|分析|探索)\b/.test(heading))
78
- return "research";
79
- if (text.length < 50)
80
- return "reference";
81
- return "note";
82
- }
83
- /** 推断 contentClass */
84
- export function inferContentClass(text) {
85
- if (/\b(20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b/.test(text))
86
- return "event";
87
- if (/\b(I|我|you|你|he|他|she|她|we|我们)\b/.test(text) && /\b(said|说|asked|问|told|告诉)\b/.test(text))
88
- return "conversation";
89
- if (/```[\s\S]{20,}?```/.test(text))
90
- return "reference";
91
- return "knowledge";
92
- }
93
- /** 推断 importance (0-1) */
94
- export function inferImportance(text, heading) {
95
- let score = 0.5;
96
- if (/\b(重要|关键|核心|必须|注意|⚠️|❗)\b/.test(text))
97
- score += 0.2;
98
- if (/\b(可选|补充|参考|备注)\b/.test(text))
99
- score -= 0.15;
100
- if (text.length > 500)
101
- score += 0.1;
102
- if (heading.match(/^#{1,2}\s/))
103
- score += 0.1;
104
- return Math.max(0, Math.min(1, score));
105
- }
106
- /** 生成关键词(简单分词) */
107
- export function extractKeywords(text) {
108
- const words = text
109
- .replace(/[^\w\u4e00-\u9fff\s-]/g, " ")
110
- .split(/\s+/)
111
- .filter(w => w.length >= 2);
112
- const freq = new Map();
113
- for (const w of words) {
114
- const k = w.toLowerCase();
115
- freq.set(k, (freq.get(k) ?? 0) + 1);
116
- }
117
- return [...freq.entries()]
118
- .sort((a, b) => b[1] - a[1])
119
- .slice(0, 10)
120
- .map(([w]) => w);
121
- }
122
- /** 预处理一个块,生成程序可推断的 ChunkMeta 字段 */
123
- export function preprocess(text, heading, level) {
124
- return {
125
- heading, level,
126
- chunkType: inferChunkType(text, heading),
127
- contentClass: inferContentClass(text),
128
- importance: inferImportance(text, heading),
129
- keywords: extractKeywords(text),
130
- summary: extractSummary(text, 150),
131
- };
132
- }
133
- // ============================================================================
134
- // LLM 编译 — prompt 构建 + 结果解析
135
- // ============================================================================
136
- export const COMPILE_SYSTEM_PROMPT = `你是一个"知识语义编译器"。
137
- 你的任务不是总结内容。
138
- 你的任务是:将人类随手记录的非结构化笔记,转换为适合机器语义索引、概念检索、知识聚类、长期演化的"认知知识单元"。
139
- 核心原则:
140
- 1. 保留原始信息 — 不删技术细节
141
- 2. 不改变原意 — 只规范化表达
142
- 3. 补全隐式表达 — 补充省略的主语、展开缩写
143
- 4. 统一术语 — 将同义表达归一
144
- 5. 提取核心概念 — 识别技术关键词
145
- 6. 保持单主题 — 一个 chunk 只描述一个认知主题
146
- 7. 输出结构化 JSON — 严格遵循 schema
147
- 输出 JSON schema:
148
- {
149
- "topic": "string — 核心主题(一句话)",
150
- "normalizedText": "string — 规范化后的文本",
151
- "concepts": ["string — 核心概念"],
152
- "aliases": ["string — 同义表达(中英对照、缩写展开)"]
153
- }`;
154
- export function buildCompilePrompt(relPath, rawText, heading) {
155
- return `## 文件: ${relPath}
156
- ## 段落标题: ${heading}
157
-
158
- ## 原始文本:
159
- ${rawText}
160
-
161
- 请输出 JSON(不要包裹在 markdown code block 中):`;
162
- }
163
- export function parseCompileResult(raw) {
164
- try {
165
- // 尝试直接解析
166
- let cleaned = raw.trim();
167
- // 去掉可能的 markdown code block
168
- cleaned = cleaned.replace(/^```(?:json)?\s*/m, "").replace(/\s*```$/m, "");
169
- const obj = JSON.parse(cleaned);
170
- if (obj.topic && obj.normalizedText && Array.isArray(obj.concepts)) {
171
- return {
172
- topic: String(obj.topic),
173
- normalizedText: String(obj.normalizedText),
174
- concepts: obj.concepts.map(String),
175
- aliases: Array.isArray(obj.aliases) ? obj.aliases.map(String) : [],
176
- };
177
- }
178
- }
179
- catch { /* ignore */ }
180
- return null;
181
- }
182
- // ============================================================================
183
- // 向量文本构建
184
- // ============================================================================
185
- export function buildEmbeddingText(rawText, meta, maxLen = 800) {
186
- const parts = [];
187
- if (meta.topic)
188
- parts.push(`[主题] ${meta.topic}`);
189
- if (meta.summary)
190
- parts.push(`[摘要] ${meta.summary}`);
191
- if (meta.concepts?.length)
192
- parts.push(`[概念] ${meta.concepts.join(", ")}`);
193
- if (meta.aliases?.length)
194
- parts.push(`[别名] ${meta.aliases.join(", ")}`);
195
- parts.push(rawText);
196
- return parts.join("\n").slice(0, maxLen);
197
- }
198
- // ============================================================================
199
- // 状态查询
200
- // ============================================================================
201
- export function getCompileStats(store) {
202
- const entries = store.entries;
203
- const compiled = [];
204
- const uncompiled = [];
205
- for (const relPath of Object.keys(entries)) {
206
- const mft = store.getManifestEntry(relPath);
207
- if (mft?.compiled)
208
- compiled.push(relPath);
209
- else
210
- uncompiled.push(relPath);
211
- }
212
- return { total: Object.keys(entries).length, compiled: compiled.length, uncompiled };
213
- }
214
- /** 标记文件为已编译 */
215
- export function markCompiled(store, relPath) {
216
- const existing = store.getManifestEntry(relPath);
217
- const entry = {
218
- md5: existing?.md5 ?? "",
219
- size: existing?.size ?? 0,
220
- chunks: existing?.chunks ?? 0,
221
- compiled: true,
222
- embedded: existing?.embedded ?? false,
223
- updatedAt: new Date().toISOString(),
224
- };
225
- store.setManifestEntry(relPath, entry);
226
- }
227
- //# sourceMappingURL=compiler.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"compiler.js","sourceRoot":"","sources":["../../src/compile/compiler.ts"],"names":[],"mappings":"AAAA,2DAA2D;AAO3D,+EAA+E;AAC/E,WAAW;AACX,+EAA+E;AAE/E,MAAM,UAAU,cAAc,CAAC,GAAW,EAAE,MAAM,GAAG,GAAG;IACtD,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;IAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;IAC7D,OAAO,IAAI;SACR,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;SAC3B,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;SACjC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC;SACvC,OAAO,CAAC,yBAAyB,EAAE,IAAI,CAAC;SACxC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC;SAC7B,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC;SAC7B,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;SACnB,IAAI,EAAE;SACN,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;AACtB,CAAC;AAcD,gCAAgC;AAChC,MAAM,UAAU,eAAe,CAAC,OAAe,EAAE,OAAe,EAAE,WAAW,GAAG,GAAG;IACjF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,MAAM,GAAmB,EAAE,CAAC;IAClC,IAAI,cAAc,GAAG,OAAO,CAAC;IAC7B,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,YAAY,GAAa,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,GAAG,EAAE;QACjB,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5C,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,MAAM,CAAC,IAAI,CAAC;YACV,GAAG,EAAE,GAAG,OAAO,MAAM,MAAM,CAAC,MAAM,EAAE;YACpC,OAAO;YACP,OAAO,EAAE,cAAc;YACvB,KAAK,EAAE,YAAY;YACnB,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;SACjC,CAAC,CAAC;IACL,CAAC,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAC1C,IAAI,CAAC,EAAE,CAAC;YACN,KAAK,EAAE,CAAC;YACR,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACtB,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YAC3B,YAAY,GAAG,EAAE,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,KAAK,EAAE,CAAC;IACR,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E,mBAAmB;AACnB,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IAC1D,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAC/C,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAClD,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAClD,IAAI,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,cAAc,CAAC;IAC7D,IAAI,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,UAAU,CAAC;IACzD,IAAI,sBAAsB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,WAAW,CAAC;IAC7D,IAAI,sBAAsB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,UAAU,CAAC;IAC5D,IAAI,sBAAsB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,KAAK,CAAC;IACvD,IAAI,sBAAsB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,MAAM,CAAC;IACxD,IAAI,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,UAAU,CAAC;IACzD,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,WAAW,CAAC;IACzC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,sBAAsB;AACtB,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,IAAI,qCAAqC,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IACrE,IAAI,kCAAkC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,8BAA8B,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,cAAc,CAAC;IACtH,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,WAAW,CAAC;IACxD,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,0BAA0B;AAC1B,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,OAAe;IAC3D,IAAI,KAAK,GAAG,GAAG,CAAC;IAChB,IAAI,2BAA2B,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,KAAK,IAAI,GAAG,CAAC;IACzD,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,KAAK,IAAI,IAAI,CAAC;IAClD,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;QAAE,KAAK,IAAI,GAAG,CAAC;IACpC,IAAI,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC;QAAE,KAAK,IAAI,GAAG,CAAC;IAC7C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;AACzC,CAAC;AAED,kBAAkB;AAClB,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,KAAK,GAAG,IAAI;SACf,OAAO,CAAC,wBAAwB,EAAE,GAAG,CAAC;SACtC,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;QAC1B,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,CAAC,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;SACvB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AACrB,CAAC;AAED,mCAAmC;AACnC,MAAM,UAAU,UAAU,CAAC,IAAY,EAAE,OAAe,EAAE,KAAa;IACrE,OAAO;QACL,OAAO,EAAE,KAAK;QACd,SAAS,EAAE,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC;QACxC,YAAY,EAAE,iBAAiB,CAAC,IAAI,CAAC;QACrC,UAAU,EAAE,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC;QAC1C,QAAQ,EAAE,eAAe,CAAC,IAAI,CAAC;QAC/B,OAAO,EAAE,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC;KACnC,CAAC;AACJ,CAAC;AAED,+EAA+E;AAC/E,4BAA4B;AAC5B,+EAA+E;AAE/E,MAAM,CAAC,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;;EAiBnC,CAAC;AAEH,MAAM,UAAU,kBAAkB,CAAC,OAAe,EAAE,OAAe,EAAE,OAAe;IAClF,OAAO,UAAU,OAAO;WACf,OAAO;;;EAGhB,OAAO;;uCAE8B,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,IAAI,CAAC;QACH,SAAS;QACT,IAAI,OAAO,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACzB,4BAA4B;QAC5B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAC3E,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAChC,IAAI,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC,cAAc,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACnE,OAAO;gBACL,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC;gBACxB,cAAc,EAAE,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC;gBAC1C,QAAQ,EAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC;gBAClC,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;aACnE,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;IACxB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,+EAA+E;AAC/E,SAAS;AACT,+EAA+E;AAE/E,MAAM,UAAU,kBAAkB,CAChC,OAAe,EACf,IAAwB,EACxB,MAAM,GAAG,GAAG;IAEZ,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,IAAI,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;IACjD,IAAI,IAAI,CAAC,OAAO;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IACrD,IAAI,IAAI,CAAC,QAAQ,EAAE,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1E,IAAI,IAAI,CAAC,OAAO,EAAE,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;AAC3C,CAAC;AAED,+EAA+E;AAC/E,OAAO;AACP,+EAA+E;AAE/E,MAAM,UAAU,eAAe,CAAC,KAAgB;IAC9C,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC;IAC9B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3C,MAAM,GAAG,GAAG,KAAK,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;QAC5C,IAAI,GAAG,EAAE,QAAQ;YAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;;YACrC,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,MAAM,EAAE,UAAU,EAAE,CAAC;AACvF,CAAC;AAED,eAAe;AACf,MAAM,UAAU,YAAY,CAAC,KAAgB,EAAE,OAAe;IAC5D,MAAM,QAAQ,GAAG,KAAK,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACjD,MAAM,KAAK,GAAkB;QAC3B,GAAG,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE;QACxB,IAAI,EAAE,QAAQ,EAAE,IAAI,IAAI,CAAC;QACzB,MAAM,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;QAC7B,QAAQ,EAAE,IAAI;QACd,QAAQ,EAAE,QAAQ,EAAE,QAAQ,IAAI,KAAK;QACrC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;IACF,KAAK,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;AACzC,CAAC"}
@@ -1,3 +0,0 @@
1
- export { extractSummary, chunkByHeadings, preprocess, inferChunkType, inferContentClass, inferImportance, extractKeywords, COMPILE_SYSTEM_PROMPT, buildCompilePrompt, parseCompileResult, buildEmbeddingText, getCompileStats, markCompiled, } from "./compiler.js";
2
- export type { HeadingChunk } from "./compiler.js";
3
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/compile/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EAAE,eAAe,EAAE,UAAU,EAC3C,cAAc,EAAE,iBAAiB,EAAE,eAAe,EAAE,eAAe,EACnE,qBAAqB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,kBAAkB,EACjF,eAAe,EAAE,YAAY,GAC9B,MAAM,eAAe,CAAC;AACvB,YAAY,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC"}
@@ -1,2 +0,0 @@
1
- export { extractSummary, chunkByHeadings, preprocess, inferChunkType, inferContentClass, inferImportance, extractKeywords, COMPILE_SYSTEM_PROMPT, buildCompilePrompt, parseCompileResult, buildEmbeddingText, getCompileStats, markCompiled, } from "./compiler.js";
2
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/compile/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EAAE,eAAe,EAAE,UAAU,EAC3C,cAAc,EAAE,iBAAiB,EAAE,eAAe,EAAE,eAAe,EACnE,qBAAqB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,kBAAkB,EACjF,eAAe,EAAE,YAAY,GAC9B,MAAM,eAAe,CAAC"}
@@ -1,28 +0,0 @@
1
- import type { ModelInfo } from "../types.js";
2
- export declare class WikiEmbedder {
3
- private model;
4
- private pipeline;
5
- private initPromise;
6
- private _error;
7
- private _variant;
8
- private modelsDir;
9
- constructor(model: ModelInfo, modelsDir: string);
10
- get ready(): boolean;
11
- get error(): string | null;
12
- get modelInfo(): ModelInfo;
13
- get source(): string;
14
- init(): Promise<boolean>;
15
- embed(text: string): Promise<number[]>;
16
- embedBatch(texts: string[]): Promise<number[][]>;
17
- download(): Promise<{
18
- ok: boolean;
19
- msg: string;
20
- }>;
21
- switchModel(newModel: ModelInfo): void;
22
- private localDir;
23
- private hasLocal;
24
- private resolvePath;
25
- private doInit;
26
- private ensure;
27
- }
28
- //# sourceMappingURL=WikiEmbedder.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"WikiEmbedder.d.ts","sourceRoot":"","sources":["../../src/embed/WikiEmbedder.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C,qBAAa,YAAY;IAQrB,OAAO,CAAC,KAAK;IAPf,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAiC;IACpD,OAAO,CAAC,MAAM,CAAuB;IACrC,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,SAAS,CAAS;gBAGhB,KAAK,EAAE,SAAS,EACxB,SAAS,EAAE,MAAM;IAOnB,IAAI,KAAK,YAA0C;IACnD,IAAI,KAAK,kBAA+B;IACxC,IAAI,SAAS,cAA0B;IAEvC,IAAI,MAAM,IAAI,MAAM,CAGnB;IAIK,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IASxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAMtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAYhD,QAAQ,IAAI,OAAO,CAAC;QAAE,EAAE,EAAE,OAAO,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC;IA6BvD,WAAW,CAAC,QAAQ,EAAE,SAAS,GAAG,IAAI;IAStC,OAAO,CAAC,QAAQ;IAKhB,OAAO,CAAC,QAAQ;IAWhB,OAAO,CAAC,WAAW;YAIL,MAAM;YA6BN,MAAM;CAMrB"}
@@ -1,147 +0,0 @@
1
- // WikiEmbedder.ts — ONNX 嵌入引擎
2
- import { existsSync, mkdirSync } from "node:fs";
3
- import { resolve } from "node:path";
4
- import { execSync } from "node:child_process";
5
- export class WikiEmbedder {
6
- model;
7
- pipeline = null;
8
- initPromise = null;
9
- _error = null;
10
- _variant = "unknown";
11
- modelsDir;
12
- constructor(model, modelsDir) {
13
- this.model = model;
14
- this.modelsDir = modelsDir;
15
- }
16
- // ---- 公开状态 ----
17
- get ready() { return this.pipeline !== null; }
18
- get error() { return this._error; }
19
- get modelInfo() { return this.model; }
20
- get source() {
21
- if (!this.pipeline)
22
- return "未加载";
23
- return this.hasLocal() ? `本地 (${this._variant.toUpperCase()})` : "远程 (HuggingFace)";
24
- }
25
- // ---- 初始化 ----
26
- async init() {
27
- if (this.pipeline)
28
- return true;
29
- if (this.initPromise)
30
- return this.initPromise;
31
- this.initPromise = this.doInit();
32
- return this.initPromise;
33
- }
34
- // ---- 嵌入 ----
35
- async embed(text) {
36
- const pipe = await this.ensure();
37
- const result = await pipe(text, { pooling: "mean", normalize: true });
38
- return Array.from(result.data);
39
- }
40
- async embedBatch(texts) {
41
- const pipe = await this.ensure();
42
- const out = [];
43
- for (const t of texts) {
44
- const r = await pipe(t, { pooling: "mean", normalize: true });
45
- out.push(Array.from(r.data));
46
- }
47
- return out;
48
- }
49
- // ---- 模型下载 ----
50
- async download() {
51
- const dirName = this.model.hfRepo.split("/").pop();
52
- const targetDir = resolve(this.modelsDir, dirName);
53
- const onnxDir = resolve(targetDir, "onnx");
54
- if (existsSync(resolve(targetDir, "config.json"))
55
- && existsSync(resolve(onnxDir, "model_quantized.onnx"))) {
56
- return { ok: true, msg: `模型已存在: ${dirName}` };
57
- }
58
- try {
59
- mkdirSync(onnxDir, { recursive: true });
60
- const base = `https://hf-mirror.com/${this.model.hfRepo}/resolve/main`;
61
- for (const [rel, out] of [
62
- ["config.json", resolve(targetDir, "config.json")],
63
- ["tokenizer_config.json", resolve(targetDir, "tokenizer_config.json")],
64
- ["tokenizer.json", resolve(targetDir, "tokenizer.json")],
65
- ["onnx/model_quantized.onnx", resolve(onnxDir, "model_quantized.onnx")],
66
- ]) {
67
- execSync(`curl -L -f -s -o "${out}" "${base}/${rel}"`, { timeout: 600_000 });
68
- }
69
- return { ok: true, msg: `已下载: ${dirName} (${fmtSize(this.model.int8Size)})` };
70
- }
71
- catch (e) {
72
- return { ok: false, msg: `下载失败: ${e?.message || String(e)}` };
73
- }
74
- }
75
- // ---- 切换模型 ----
76
- switchModel(newModel) {
77
- this.model = newModel;
78
- this.pipeline = null;
79
- this.initPromise = null;
80
- this._error = null;
81
- }
82
- // ---- 内部 ----
83
- localDir() {
84
- const name = this.model.hfRepo.split("/").pop() || this.model.hfRepo;
85
- return resolve(this.modelsDir, name);
86
- }
87
- hasLocal() {
88
- const dir = this.localDir();
89
- if (!existsSync(dir))
90
- return false;
91
- if (!existsSync(resolve(dir, "config.json")))
92
- return false;
93
- const onnx = resolve(dir, "onnx");
94
- return existsSync(onnx) && (existsSync(resolve(onnx, "model.onnx")) ||
95
- existsSync(resolve(onnx, "model_quantized.onnx")));
96
- }
97
- resolvePath() {
98
- return this.hasLocal() ? this.localDir() : this.model.hfRepo;
99
- }
100
- async doInit() {
101
- try {
102
- const { pipeline: mkPipeline, env } = await import("@huggingface/transformers");
103
- const isLocal = this.hasLocal();
104
- if (isLocal)
105
- env.allowLocalModels = true;
106
- const opts = {};
107
- if (isLocal) {
108
- const onnx = resolve(this.localDir(), "onnx");
109
- if (!existsSync(resolve(onnx, "model.onnx"))
110
- && existsSync(resolve(onnx, "model_quantized.onnx"))) {
111
- opts.model_file_name = "model_quantized";
112
- }
113
- opts.progress_callback = null;
114
- }
115
- this.pipeline = await mkPipeline("feature-extraction", this.resolvePath(), opts);
116
- this._variant = isLocal ? "int8" : "remote";
117
- this._error = null;
118
- return true;
119
- }
120
- catch (e) {
121
- this._error = e?.message || String(e);
122
- this.pipeline = null;
123
- return false;
124
- }
125
- finally {
126
- this.initPromise = null;
127
- }
128
- }
129
- async ensure() {
130
- if (this.pipeline)
131
- return this.pipeline;
132
- const ok = await this.init();
133
- if (!ok)
134
- throw new Error(`Embedder 未初始化: ${this._error || "未知错误"}`);
135
- return this.pipeline;
136
- }
137
- }
138
- function fmtSize(bytes) {
139
- if (bytes >= 1e9)
140
- return `${(bytes / 1e9).toFixed(1)} GB`;
141
- if (bytes >= 1e6)
142
- return `${(bytes / 1e6).toFixed(0)} MB`;
143
- if (bytes >= 1e3)
144
- return `${(bytes / 1e3).toFixed(0)} KB`;
145
- return `${bytes} B`;
146
- }
147
- //# sourceMappingURL=WikiEmbedder.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"WikiEmbedder.js","sourceRoot":"","sources":["../../src/embed/WikiEmbedder.ts"],"names":[],"mappings":"AAAA,8BAA8B;AAE9B,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAG9C,MAAM,OAAO,YAAY;IAQb;IAPF,QAAQ,GAAQ,IAAI,CAAC;IACrB,WAAW,GAA4B,IAAI,CAAC;IAC5C,MAAM,GAAkB,IAAI,CAAC;IAC7B,QAAQ,GAAW,SAAS,CAAC;IAC7B,SAAS,CAAS;IAE1B,YACU,KAAgB,EACxB,SAAiB;QADT,UAAK,GAAL,KAAK,CAAW;QAGxB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,iBAAiB;IAEjB,IAAI,KAAK,KAAU,OAAO,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,CAAC,CAAC;IACnD,IAAI,KAAK,KAAU,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACxC,IAAI,SAAS,KAAM,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAEvC,IAAI,MAAM;QACR,IAAI,CAAC,IAAI,CAAC,QAAQ;YAAE,OAAO,KAAK,CAAC;QACjC,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,kBAAkB,CAAC;IACtF,CAAC;IAED,gBAAgB;IAEhB,KAAK,CAAC,IAAI;QACR,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAC/B,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO,IAAI,CAAC,WAAW,CAAC;QAC9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACjC,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,eAAe;IAEf,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACtE,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAa,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,GAAG,GAAe,EAAE,CAAC;QAC3B,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,CAAC,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAa,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED,iBAAiB;IAEjB,KAAK,CAAC,QAAQ;QACZ,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAG,CAAC;QACpD,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAE3C,IAAI,UAAU,CAAC,OAAO,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;eAC1C,UAAU,CAAC,OAAO,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC,EAAE,CAAC;YAC5D,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,UAAU,OAAO,EAAE,EAAE,CAAC;QAChD,CAAC;QAED,IAAI,CAAC;YACH,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACxC,MAAM,IAAI,GAAG,yBAAyB,IAAI,CAAC,KAAK,CAAC,MAAM,eAAe,CAAC;YACvE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI;gBACvB,CAAC,aAAa,EAAE,OAAO,CAAC,SAAS,EAAE,aAAa,CAAC,CAAqB;gBACtE,CAAC,uBAAuB,EAAE,OAAO,CAAC,SAAS,EAAE,uBAAuB,CAAC,CAAC;gBACtE,CAAC,gBAAgB,EAAE,OAAO,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;gBACxD,CAAC,2BAA2B,EAAE,OAAO,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC;aACxE,EAAE,CAAC;gBACF,QAAQ,CAAC,qBAAqB,GAAG,MAAM,IAAI,IAAI,GAAG,GAAG,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;YAC/E,CAAC;YACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,QAAQ,OAAO,KAAK,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC;QAChF,CAAC;QAAC,OAAO,CAAM,EAAE,CAAC;YAChB,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,CAAC,EAAE,OAAO,IAAI,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAChE,CAAC;IACH,CAAC;IAED,iBAAiB;IAEjB,WAAW,CAAC,QAAmB;QAC7B,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC;QACtB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACrB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QACxB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,eAAe;IAEP,QAAQ;QACd,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;QACrE,OAAO,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACvC,CAAC;IAEO,QAAQ;QACd,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC5B,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAC3D,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QAClC,OAAO,UAAU,CAAC,IAAI,CAAC,IAAI,CACzB,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;YACvC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,sBAAsB,CAAC,CAAC,CAClD,CAAC;IACJ,CAAC;IAEO,WAAW;QACjB,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IAC/D,CAAC;IAEO,KAAK,CAAC,MAAM;QAClB,IAAI,CAAC;YACH,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;YAChF,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;YAChC,IAAI,OAAO;gBAAE,GAAG,CAAC,gBAAgB,GAAG,IAAI,CAAC;YAEzC,MAAM,IAAI,GAAQ,EAAE,CAAC;YACrB,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,MAAM,CAAC,CAAC;gBAC9C,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;uBACrC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,sBAAsB,CAAC,CAAC,EAAE,CAAC;oBACzD,IAAI,CAAC,eAAe,GAAG,iBAAiB,CAAC;gBAC3C,CAAC;gBACD,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;YAChC,CAAC;YAED,IAAI,CAAC,QAAQ,GAAG,MAAM,UAAU,CAAC,oBAAoB,EAAE,IAAI,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,CAAC;YACjF,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;YAC5C,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;YACnB,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,OAAO,CAAM,EAAE,CAAC;YAChB,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,OAAO,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC;YACtC,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;YACrB,OAAO,KAAK,CAAC;QACf,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,MAAM;QAClB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC,QAAQ,CAAC;QACxC,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,kBAAkB,IAAI,CAAC,MAAM,IAAI,MAAM,EAAE,CAAC,CAAC;QACpE,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;CACF;AAED,SAAS,OAAO,CAAC,KAAa;IAC5B,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;IAC1D,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;IAC1D,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;IAC1D,OAAO,GAAG,KAAK,IAAI,CAAC;AACtB,CAAC"}
@@ -1,2 +0,0 @@
1
- export { WikiEmbedder } from "./WikiEmbedder.js";
2
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/embed/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC"}
@@ -1,2 +0,0 @@
1
- export { WikiEmbedder } from "./WikiEmbedder.js";
2
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/embed/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC"}
@@ -1,24 +0,0 @@
1
- /** LLM 配置 */
2
- export interface LLMConfig {
3
- /** API base URL. Default: https://api.deepseek.com */
4
- apiBase?: string;
5
- /** API key. Default: env DEEPSEEK_API_KEY or OPENAI_API_KEY */
6
- apiKey?: string;
7
- /** Model name. Default: deepseek-v4-flash */
8
- model?: string;
9
- }
10
- export declare class WikiLLM {
11
- private apiBase;
12
- private apiKey;
13
- private model;
14
- constructor(config?: LLMConfig);
15
- get isConfigured(): boolean;
16
- get info(): {
17
- apiBase: string;
18
- model: string;
19
- hasKey: boolean;
20
- };
21
- /** 单次 JSON 模式调用 */
22
- chatJson(system: string, user: string): Promise<string>;
23
- }
24
- //# sourceMappingURL=WikiLLM.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"WikiLLM.d.ts","sourceRoot":"","sources":["../../src/llm/WikiLLM.ts"],"names":[],"mappings":"AAEA,aAAa;AACb,MAAM,WAAW,SAAS;IACxB,sDAAsD;IACtD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+DAA+D;IAC/D,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6CAA6C;IAC7C,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAMD,qBAAa,OAAO;IAClB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;gBAEV,MAAM,GAAE,SAAc;IAMlC,IAAI,YAAY,IAAI,OAAO,CAAmC;IAE9D,IAAI,IAAI;;;;MAAsF;IAE9F,mBAAmB;IACb,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CA8B9D"}