botrun-horse 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +1 -0
  2. package/bin/bh.mjs +193 -0
  3. package/bin/commands/dag-cmd.mjs +74 -0
  4. package/bin/commands/db-cmd.mjs +73 -0
  5. package/bin/commands/doc.mjs +185 -0
  6. package/bin/commands/gemini.mjs +120 -0
  7. package/bin/commands/help.mjs +109 -0
  8. package/bin/commands/legal.mjs +174 -0
  9. package/bin/commands/nchc.mjs +212 -0
  10. package/bin/commands/openrouter.mjs +154 -0
  11. package/bin/commands/prompt.mjs +175 -0
  12. package/bin/commands/schema.mjs +258 -0
  13. package/bin/commands/search.mjs +46 -0
  14. package/bin/commands/writing.mjs +33 -0
  15. package/lib/core/adapters/base.mjs +52 -0
  16. package/lib/core/adapters/claude.mjs +13 -0
  17. package/lib/core/adapters/gemini-api.mjs +174 -0
  18. package/lib/core/adapters/gemini-shared.mjs +164 -0
  19. package/lib/core/adapters/gemini-vertex.mjs +232 -0
  20. package/lib/core/adapters/local.mjs +13 -0
  21. package/lib/core/adapters/nchc.mjs +236 -0
  22. package/lib/core/adapters/openai-shared.mjs +34 -0
  23. package/lib/core/adapters/openrouter.mjs +304 -0
  24. package/lib/core/ai-cache.mjs +277 -0
  25. package/lib/core/ai-router.mjs +217 -0
  26. package/lib/core/cli-utils.mjs +170 -0
  27. package/lib/core/dag.mjs +114 -0
  28. package/lib/core/db.mjs +412 -0
  29. package/lib/core/env.mjs +64 -0
  30. package/lib/core/llm.mjs +58 -0
  31. package/lib/core/paths.mjs +115 -0
  32. package/lib/core/proxy.mjs +46 -0
  33. package/lib/core/watermelon.mjs +9 -0
  34. package/lib/doc/index.mjs +419 -0
  35. package/lib/doc/office2text.mjs +234 -0
  36. package/lib/doc/pdf2text.mjs +133 -0
  37. package/lib/doc/split.mjs +132 -0
  38. package/lib/flows/draft-writing.mjs +29 -0
  39. package/lib/flows/gemini-ask.mjs +185 -0
  40. package/lib/flows/hatch-portal.mjs +13 -0
  41. package/lib/flows/legal-ask.mjs +325 -0
  42. package/lib/flows/openai-agent.mjs +167 -0
  43. package/lib/flows/opencode-agent.mjs +240 -0
  44. package/lib/flows/openrouter-ask.mjs +111 -0
  45. package/lib/flows/review-doc.mjs +18 -0
  46. package/lib/ocr/index.mjs +6 -0
  47. package/lib/portal/hatch.mjs +6 -0
  48. package/lib/portal/index.mjs +6 -0
  49. package/lib/prompt/prompt-search.mjs +55 -0
  50. package/lib/prompt/prompt-store.mjs +94 -0
  51. package/lib/prompt/prompts/zero-framework/coding.md +15 -0
  52. package/lib/prompt/prompts/zero-framework/search.md +12 -0
  53. package/lib/prompt/prompts/zero-framework/slice.md +11 -0
  54. package/lib/search/crawler.mjs +6 -0
  55. package/lib/search/index.mjs +7 -0
  56. package/lib/tools/fs-tools.mjs +268 -0
  57. package/lib/tools/index.mjs +27 -0
  58. package/lib/writing/generate.mjs +86 -0
  59. package/lib/writing/generators/nstc-generators.mjs +279 -0
  60. package/lib/writing/generators/nstc-top5.mjs +554 -0
  61. package/lib/writing/index.mjs +5 -0
  62. package/lib/writing/layouts/nstc-layout.mjs +249 -0
  63. package/lib/writing/renderer.mjs +61 -0
  64. package/package.json +35 -0
@@ -0,0 +1,277 @@
1
+ // lib/core/ai-cache.mjs — 通用 AI 問答快取 SQLite 層
2
+ //
3
+ // 設計原則:SOLID / DRY / KISS / DDD
4
+ // - 單一職責:只負責 SQLite 讀寫,不含路由或 LLM 業務邏輯
5
+ // - 冪等設計:重複儲存相同問題不拋例外
6
+ // - DDD Value Object:QaEntry(問題 + 標籤 + 兩步回答 + 來源)
7
+ // - 通用能力:不限定法律領域,任何領域皆可使用
8
+ //
9
+ // 使用方式:
10
+ // import { AiCache } from '../core/ai-cache.mjs';
11
+ // const cache = new AiCache(dbPath);
12
+ // cache.initSchema();
13
+ // cache.insertQa({ question, tags, step1Result, step2Answer, sources });
14
+ // const candidates = cache.findByTags(['租賃', '驅逐']);
15
+ // cache.incrementHit(id);
16
+ //
17
+ // 需要 Node.js --experimental-sqlite flag:
18
+ // node --experimental-sqlite bin/bh.mjs ...
19
+
20
+ import { DatabaseSync } from 'node:sqlite';
21
+ import fs from 'fs';
22
+ import path from 'path';
23
+
24
+ /**
25
+ * AiCache — 通用 AI 問答快取存取層
26
+ *
27
+ * 表結構:
28
+ * qa_cache — 問答快取主表(問題、標籤、兩步回答、來源)
29
+ * router_log — 路由決策紀錄(供效能分析與除錯)
30
+ */
31
+ export class AiCache {
32
+ /**
33
+ * 開啟或建立 SQLite 資料庫
34
+ * @param {string} dbPath - 資料庫檔案路徑
35
+ */
36
+ constructor(dbPath) {
37
+ const dir = path.dirname(dbPath);
38
+ if (!fs.existsSync(dir)) {
39
+ fs.mkdirSync(dir, { recursive: true });
40
+ }
41
+ this.dbPath = dbPath;
42
+ this.db = new DatabaseSync(dbPath);
43
+ this.db.exec('PRAGMA journal_mode = WAL');
44
+ this.db.exec('PRAGMA foreign_keys = ON');
45
+ this.db.exec('PRAGMA synchronous = NORMAL');
46
+ }
47
+
48
+ /**
49
+ * 建立資料表(冪等,可重複呼叫)
50
+ */
51
+ initSchema() {
52
+ // ── qa_cache:問答快取主表 ──
53
+ this.db.exec(`
54
+ CREATE TABLE IF NOT EXISTS qa_cache (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ question TEXT NOT NULL,
57
+ tags TEXT NOT NULL DEFAULT '[]',
58
+ step1_laws TEXT,
59
+ step2_answer TEXT,
60
+ sources TEXT NOT NULL DEFAULT '[]',
61
+ model_step1 TEXT,
62
+ model_step2 TEXT,
63
+ hit_count INTEGER NOT NULL DEFAULT 0,
64
+ created_at TEXT DEFAULT (datetime('now')),
65
+ updated_at TEXT DEFAULT (datetime('now'))
66
+ )
67
+ `);
68
+
69
+ // ── router_log:路由決策紀錄 ──
70
+ this.db.exec(`
71
+ CREATE TABLE IF NOT EXISTS router_log (
72
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
73
+ question TEXT NOT NULL,
74
+ decision TEXT NOT NULL,
75
+ cache_id INTEGER REFERENCES qa_cache(id),
76
+ confidence REAL,
77
+ tags TEXT NOT NULL DEFAULT '[]',
78
+ reason TEXT,
79
+ latency_ms INTEGER,
80
+ created_at TEXT DEFAULT (datetime('now'))
81
+ )
82
+ `);
83
+ }
84
+
85
+ // ─────────────────────────────────────────────────────────
86
+ // qa_cache:問答快取 CRUD
87
+ // ─────────────────────────────────────────────────────────
88
+
89
+ /**
90
+ * 儲存問答快取(每次都新增一筆,允許相同問題有多版本)
91
+ *
92
+ * @param {object} entry
93
+ * @param {string} entry.question - 使用者原始問題
94
+ * @param {string[]} entry.tags - 法律領域標籤陣列(由 AI Router 萃取)
95
+ * @param {string} [entry.step1Laws] - Step1 搜尋到的法條(JSON 字串)
96
+ * @param {string} [entry.step2Answer] - Step2 最終回答
97
+ * @param {Array} [entry.sources] - 引證來源陣列 [{uri, title}]
98
+ * @param {string} [entry.modelStep1] - Step1 使用的模型
99
+ * @param {string} [entry.modelStep2] - Step2 使用的模型
100
+ * @returns {number} 新建的快取 ID
101
+ */
102
+ insertQa({ question, tags = [], step1Laws = null, step2Answer = null, sources = [], modelStep1 = null, modelStep2 = null }) {
103
+ const result = this.db.prepare(`
104
+ INSERT INTO qa_cache (question, tags, step1_laws, step2_answer, sources, model_step1, model_step2)
105
+ VALUES (?, ?, ?, ?, ?, ?, ?)
106
+ `).run(
107
+ question,
108
+ JSON.stringify(tags),
109
+ step1Laws,
110
+ step2Answer,
111
+ JSON.stringify(sources),
112
+ modelStep1,
113
+ modelStep2,
114
+ );
115
+ return Number(result.lastInsertRowid);
116
+ }
117
+
118
+ /**
119
+ * 依標籤交集查詢快取候選(標籤重疊數 >= minOverlap 才回傳)
120
+ *
121
+ * 策略:JSON 欄位逐一比對,適合快取量 < 10,000 筆的場景。
122
+ * 大型快取應改用全文索引或向量搜尋。
123
+ *
124
+ * @param {string[]} tags - 查詢標籤陣列
125
+ * @param {number} [minOverlap=1] - 最少重疊標籤數
126
+ * @param {number} [limit=10] - 回傳筆數上限
127
+ * @returns {Array<QaEntry>} 候選快取條目(降冪排序:hit_count DESC, created_at DESC)
128
+ */
129
+ findByTags(tags, minOverlap = 1, limit = 10) {
130
+ if (!tags || tags.length === 0) return [];
131
+
132
+ // 取出所有有 step2_answer 的快取(已完整回答)
133
+ const rows = this.db.prepare(`
134
+ SELECT id, question, tags, step1_laws, step2_answer, sources,
135
+ model_step1, model_step2, hit_count, created_at
136
+ FROM qa_cache
137
+ WHERE step2_answer IS NOT NULL
138
+ ORDER BY hit_count DESC, created_at DESC
139
+ `).all();
140
+
141
+ // 計算標籤交集數量,過濾後排序
142
+ const candidates = rows
143
+ .map(row => {
144
+ let cachedTags = [];
145
+ try { cachedTags = JSON.parse(row.tags); } catch { /* 忽略 JSON 解析失敗 */ }
146
+
147
+ const overlap = tags.filter(t => cachedTags.includes(t)).length;
148
+ return { ...row, _overlap: overlap };
149
+ })
150
+ .filter(row => row._overlap >= minOverlap)
151
+ .sort((a, b) => b._overlap - a._overlap || b.hit_count - a.hit_count)
152
+ .slice(0, limit);
153
+
154
+ // 還原 JSON 欄位(DDD Value Object)
155
+ return candidates.map(row => ({
156
+ id: Number(row.id),
157
+ question: row.question,
158
+ tags: (() => { try { return JSON.parse(row.tags); } catch { return []; } })(),
159
+ step1Laws: row.step1_laws,
160
+ step2Answer: row.step2_answer,
161
+ sources: (() => { try { return JSON.parse(row.sources); } catch { return []; } })(),
162
+ modelStep1: row.model_step1,
163
+ modelStep2: row.model_step2,
164
+ hitCount: Number(row.hit_count),
165
+ overlap: row._overlap,
166
+ }));
167
+ }
168
+
169
+ /**
170
+ * 依 ID 取得快取條目
171
+ * @param {number} id
172
+ * @returns {QaEntry|undefined}
173
+ */
174
+ getById(id) {
175
+ const row = this.db.prepare('SELECT * FROM qa_cache WHERE id = ?').get(id);
176
+ if (!row) return undefined;
177
+ return {
178
+ id: Number(row.id),
179
+ question: row.question,
180
+ tags: (() => { try { return JSON.parse(row.tags); } catch { return []; } })(),
181
+ step1Laws: row.step1_laws,
182
+ step2Answer: row.step2_answer,
183
+ sources: (() => { try { return JSON.parse(row.sources); } catch { return []; } })(),
184
+ modelStep1: row.model_step1,
185
+ modelStep2: row.model_step2,
186
+ hitCount: Number(row.hit_count),
187
+ };
188
+ }
189
+
190
+ /**
191
+ * 累加快取命中次數(每次 cache hit 後呼叫)
192
+ * @param {number} id - 快取條目 ID
193
+ */
194
+ incrementHit(id) {
195
+ this.db.prepare(`
196
+ UPDATE qa_cache
197
+ SET hit_count = hit_count + 1, updated_at = datetime('now')
198
+ WHERE id = ?
199
+ `).run(id);
200
+ }
201
+
202
+ // ─────────────────────────────────────────────────────────
203
+ // router_log:路由決策紀錄
204
+ // ─────────────────────────────────────────────────────────
205
+
206
+ /**
207
+ * 記錄路由決策(供效能分析)
208
+ *
209
+ * @param {object} log
210
+ * @param {string} log.question - 使用者問題
211
+ * @param {string} log.decision - 'cache' | 'search'
212
+ * @param {number} [log.cacheId] - 命中的快取 ID(decision='cache' 時有值)
213
+ * @param {number} [log.confidence]- 信心分數(0.0 ~ 1.0)
214
+ * @param {string[]} [log.tags] - 問題標籤
215
+ * @param {string} [log.reason] - 決策原因說明
216
+ * @param {number} [log.latencyMs] - 路由決策耗時(毫秒)
217
+ */
218
+ logRouterDecision({ question, decision, cacheId = null, confidence = null, tags = [], reason = null, latencyMs = null }) {
219
+ this.db.prepare(`
220
+ INSERT INTO router_log (question, decision, cache_id, confidence, tags, reason, latency_ms)
221
+ VALUES (?, ?, ?, ?, ?, ?, ?)
222
+ `).run(question, decision, cacheId, confidence, JSON.stringify(tags), reason, latencyMs);
223
+ }
224
+
225
+ // ─────────────────────────────────────────────────────────
226
+ // 統計資訊
227
+ // ─────────────────────────────────────────────────────────
228
+
229
+ /**
230
+ * 取得快取統計
231
+ * @returns {object} 統計資訊
232
+ */
233
+ stats() {
234
+ const total = this.db.prepare('SELECT COUNT(*) AS count FROM qa_cache WHERE step2_answer IS NOT NULL').get();
235
+ const totalHits = this.db.prepare('SELECT COALESCE(SUM(hit_count), 0) AS total FROM qa_cache').get();
236
+ const routerLog = this.db.prepare(`
237
+ SELECT decision, COUNT(*) AS count FROM router_log GROUP BY decision
238
+ `).all();
239
+ const topHit = this.db.prepare(`
240
+ SELECT question, hit_count FROM qa_cache
241
+ WHERE step2_answer IS NOT NULL
242
+ ORDER BY hit_count DESC LIMIT 5
243
+ `).all();
244
+
245
+ return {
246
+ cachedQuestions: Number(total.count),
247
+ totalCacheHits: Number(totalHits.total),
248
+ routerDecisions: routerLog,
249
+ topHitQuestions: topHit,
250
+ };
251
+ }
252
+
253
+ /**
254
+ * 列出所有快取問題(用於除錯)
255
+ * @param {number} [limit=20]
256
+ */
257
+ listAll(limit = 20) {
258
+ return this.db.prepare(`
259
+ SELECT id, question, tags, hit_count, created_at
260
+ FROM qa_cache WHERE step2_answer IS NOT NULL
261
+ ORDER BY created_at DESC LIMIT ?
262
+ `).all(limit).map(row => ({
263
+ id: Number(row.id),
264
+ question: row.question,
265
+ tags: (() => { try { return JSON.parse(row.tags); } catch { return []; } })(),
266
+ hitCount: Number(row.hit_count),
267
+ createdAt: row.created_at,
268
+ }));
269
+ }
270
+
271
+ /**
272
+ * 關閉資料庫連線
273
+ */
274
+ close() {
275
+ this.db.close();
276
+ }
277
+ }
@@ -0,0 +1,217 @@
1
+ // lib/core/ai-router.mjs — 通用 AI 路由決策層
2
+ //
3
+ // 功能:用小模型(Mistral 14B)決定是否使用快取還是呼叫大模型
4
+ //
5
+ // 架構設計(SOLID/DDD):
6
+ // SRP — 只負責路由決策,不執行 LLM 查詢
7
+ // OCP — 可換不同小模型而不改介面
8
+ // DDD — RouterDecision Value Object
9
+ //
10
+ // 兩步路由策略(方案 B:標籤索引 + LLM 確認):
11
+ // 1. 萃取標籤:Mistral 14B 從問題中萃取法律領域標籤
12
+ // 2. 標籤篩選:SQLite 找到標籤重疊的快取候選
13
+ // 3. LLM 確認:Mistral 14B 確認候選是否語義相關
14
+ // 4. 決策輸出:RouterDecision { decision, cacheId, confidence, tags, reason }
15
+ //
16
+ // RouterDecision Value Object:
17
+ // { decision: 'cache'|'search', cacheId: number|null,
18
+ // confidence: float, tags: string[], reason: string }
19
+
20
+ import { createLLM } from './llm.mjs';
21
+
22
+ /**
23
+ * 從可能包含 markdown code block 的文字中萃取 JSON(容錯)
24
+ * Mistral 14B 常回傳 ```json {...} ``` 格式
25
+ * @param {string} text
26
+ * @returns {object|null}
27
+ */
28
+ function extractJson(text) {
29
+ const trimmed = (text || '').trim();
30
+ // 1. 直接嘗試 JSON.parse
31
+ try { return JSON.parse(trimmed); } catch { /* 繼續 */ }
32
+ // 2. 去掉 markdown code block 包裝
33
+ const mdMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
34
+ if (mdMatch) {
35
+ try { return JSON.parse(mdMatch[1].trim()); } catch { /* 繼續 */ }
36
+ }
37
+ // 3. 從文字中提取第一個 {...}
38
+ const braceMatch = trimmed.match(/\{[\s\S]*\}/);
39
+ if (braceMatch) {
40
+ try { return JSON.parse(braceMatch[0]); } catch { /* 繼續 */ }
41
+ }
42
+ return null;
43
+ }
44
+
45
+ // ── 提示詞常數(SOLID OCP:修改提示不改程式結構) ─────────────────────────
46
+
47
+ /**
48
+ * Step1 提示:從問題萃取法律領域標籤
49
+ * 輸出 JSON 格式,方便解析
50
+ */
51
+ const EXTRACT_TAGS_SYSTEM = `你是法律分類專家。你的任務是從使用者的問題中萃取關鍵的法律領域標籤。
52
+
53
+ 規則:
54
+ 1. 只輸出 JSON,不輸出其他文字
55
+ 2. 標籤必須是繁體中文、簡短(1-4 個字)
56
+ 3. 最多 5 個標籤
57
+ 4. 聚焦於法律概念,而非口語描述(例如「驅逐」而非「趕走」)
58
+
59
+ 輸出格式(嚴格遵守):
60
+ {"tags": ["標籤1", "標籤2", "標籤3"]}`;
61
+
62
+ /**
63
+ * Step2 提示:確認快取候選是否適用於當前問題
64
+ * 輸出 JSON 格式的確認結果
65
+ */
66
+ const CONFIRM_CACHE_SYSTEM = `你是法律問答助理。你的任務是判斷既有的快取答案是否適合回答新的使用者問題。
67
+
68
+ 規則:
69
+ 1. 只輸出 JSON,不輸出其他文字
70
+ 2. 若快取答案涵蓋的法律概念與新問題高度重疊(≥ 80%),判定為適用
71
+ 3. 考量問題的法律情境是否相同,不只看表面用詞
72
+
73
+ 輸出格式(嚴格遵守):
74
+ {"useCache": true|false, "cacheId": <數字或null>, "confidence": 0.0~1.0, "reason": "一句話說明"}`;
75
+
76
+ // ─────────────────────────────────────────────────────────────────────────────
77
+
78
+ /**
79
+ * AiRouter — 通用 AI 路由決策器
80
+ *
81
+ * 使用小模型(Mistral 14B)判斷是否使用 SQLite 快取,
82
+ * 避免重複呼叫昂貴的大模型。
83
+ */
84
+ export class AiRouter {
85
+ /**
86
+ * @param {object} opts
87
+ * @param {string} [opts.provider='nchc'] - 路由用小模型 provider
88
+ * @param {string} [opts.model] - 路由用小模型 ID
89
+ * @param {number} [opts.tagCacheThreshold=2] - 標籤重疊數 >= 此值才納入候選
90
+ * @param {number} [opts.confidenceThreshold=0.75] - 信心分數閾值(低於此值仍呼叫大模型)
91
+ */
92
+ constructor({
93
+ provider = 'nchc',
94
+ model = 'Ministral-3-14B-Instruct-2512',
95
+ tagCacheThreshold = 2,
96
+ confidenceThreshold = 0.75,
97
+ } = {}) {
98
+ this.provider = provider;
99
+ this.model = model;
100
+ this.tagCacheThreshold = tagCacheThreshold;
101
+ this.confidenceThreshold = confidenceThreshold;
102
+ }
103
+
104
+ /**
105
+ * 建立小模型 adapter(DRY:兩個步驟共用)
106
+ * @returns {Promise<BaseAdapter>}
107
+ */
108
+ async _createRouterLlm() {
109
+ return createLLM({ provider: this.provider, model: this.model, temperature: 0.1 });
110
+ }
111
+
112
+ /**
113
+ * Step1:從問題萃取法律領域標籤
114
+ *
115
+ * @param {string} question - 使用者問題
116
+ * @returns {Promise<string[]>} 法律標籤陣列
117
+ */
118
+ async extractTags(question) {
119
+ const llm = await this._createRouterLlm();
120
+ const result = await llm.generateContent({
121
+ prompt: `使用者問題:${question}`,
122
+ systemInstruction: EXTRACT_TAGS_SYSTEM,
123
+ });
124
+
125
+ const parsed = extractJson(result.text);
126
+ if (parsed && Array.isArray(parsed.tags)) return parsed.tags;
127
+
128
+ // 最終回退:正則萃取(容錯)
129
+ const match = result.text.match(/"tags"\s*:\s*\[([^\]]+)\]/);
130
+ if (match) {
131
+ return match[1].split(',').map(t => t.replace(/["'\s]/g, '')).filter(Boolean);
132
+ }
133
+ return [];
134
+ }
135
+
136
+ /**
137
+ * Step2:確認快取候選是否適用於當前問題
138
+ *
139
+ * @param {string} question - 使用者問題
140
+ * @param {Array<QaEntry>} candidates - 快取候選(來自 AiCache.findByTags)
141
+ * @returns {Promise<{useCache: boolean, cacheId: number|null, confidence: number, reason: string}>}
142
+ */
143
+ async confirmCacheMatch(question, candidates) {
144
+ if (!candidates || candidates.length === 0) {
145
+ return { useCache: false, cacheId: null, confidence: 0, reason: '無快取候選' };
146
+ }
147
+
148
+ // 建構候選清單(只給 Mistral 看問題和標籤,不給完整答案,避免 context 過長)
149
+ const candidateList = candidates.slice(0, 3).map((c, i) =>
150
+ `[${i + 1}] id=${c.id} 問題:「${c.question}」 標籤:${c.tags.join('、')}`
151
+ ).join('\n');
152
+
153
+ const llm = await this._createRouterLlm();
154
+ const result = await llm.generateContent({
155
+ systemInstruction: CONFIRM_CACHE_SYSTEM,
156
+ prompt: `新問題:「${question}」\n\n快取候選:\n${candidateList}`,
157
+ });
158
+
159
+ const parsed = extractJson(result.text);
160
+ if (parsed && typeof parsed.useCache !== 'undefined') {
161
+ return {
162
+ useCache: Boolean(parsed.useCache),
163
+ cacheId: parsed.cacheId ? Number(parsed.cacheId) : null,
164
+ confidence: Number(parsed.confidence) || 0,
165
+ reason: String(parsed.reason || ''),
166
+ };
167
+ }
168
+ // JSON 解析完全失敗:保守策略,不用快取
169
+ return { useCache: false, cacheId: null, confidence: 0, reason: `路由模型輸出解析失敗(原文:${result.text.substring(0, 100)}),保守策略不用快取` };
170
+ }
171
+
172
+ /**
173
+ * 完整路由決策(組合 Step1 + Step2)
174
+ *
175
+ * @param {string} question - 使用者問題
176
+ * @param {AiCache} cache - 快取存取層(dependency injection)
177
+ * @returns {Promise<RouterDecision>} - 路由決策 Value Object
178
+ */
179
+ async route(question, cache) {
180
+ const t0 = performance.now();
181
+
182
+ // Step1:萃取標籤
183
+ const tags = await this.extractTags(question);
184
+
185
+ // Step2:標籤索引查詢候選
186
+ const candidates = cache.findByTags(tags, this.tagCacheThreshold);
187
+
188
+ // Step3:LLM 確認(只有候選存在時才呼叫)
189
+ let matchResult = { useCache: false, cacheId: null, confidence: 0, reason: '無標籤重疊候選' };
190
+ if (candidates.length > 0) {
191
+ matchResult = await this.confirmCacheMatch(question, candidates);
192
+ }
193
+
194
+ const latencyMs = Math.round(performance.now() - t0);
195
+
196
+ // Step4:信心分數過低時,保守策略不用快取
197
+ if (matchResult.useCache && matchResult.confidence < this.confidenceThreshold) {
198
+ matchResult = {
199
+ ...matchResult,
200
+ useCache: false,
201
+ reason: `信心分數 ${matchResult.confidence} 低於閾值 ${this.confidenceThreshold},保守策略呼叫大模型`,
202
+ };
203
+ }
204
+
205
+ /** @type {RouterDecision} */
206
+ const decision = {
207
+ decision: matchResult.useCache ? 'cache' : 'search',
208
+ cacheId: matchResult.useCache ? matchResult.cacheId : null,
209
+ confidence: matchResult.confidence,
210
+ tags,
211
+ reason: matchResult.reason,
212
+ latencyMs,
213
+ };
214
+
215
+ return decision;
216
+ }
217
+ }
@@ -0,0 +1,170 @@
1
+ // lib/core/cli-utils.mjs — CLI 共用工具
2
+ // 從原 src/cli.mjs 抽出的通用函式
3
+
4
+ import fs from 'fs';
5
+ import path from 'path';
6
+
7
+ const VERSION = '2.0.0';
8
+
9
+ // 支援的文件副檔名(doc 系列指令)
10
+ const DOC_EXTS = new Set(['.pdf', '.docx', '.doc', '.odt', '.pptx', '.ppt', '.odp', '.xlsx', '.xls', '.ods', '.txt', '.md', '.rst']);
11
+
12
+ /**
13
+ * 解析 CLI 參數
14
+ * @param {string[]} argv - process.argv
15
+ * @returns {{ command, subcommand, flags, positionals }}
16
+ */
17
+ export function parseArgs(argv) {
18
+ const args = argv.slice(2);
19
+ const command = args[0];
20
+ const subcommand = args[1];
21
+ const flags = {};
22
+ const positionals = [];
23
+
24
+ // 判斷需跳過子指令的指令群組
25
+ const hasSubcommand = ['dag', 'db', 'doc', 'writing', 'search', 'portal', 'ocr', 'gemini', 'prompt'].includes(command);
26
+ const flagStart = hasSubcommand ? 2 : 1;
27
+ for (const arg of args.slice(flagStart)) {
28
+ if (arg.startsWith('--')) {
29
+ const eq = arg.indexOf('=');
30
+ if (eq > -1) {
31
+ flags[arg.slice(2, eq)] = arg.slice(eq + 1);
32
+ } else {
33
+ flags[arg.slice(2)] = true;
34
+ }
35
+ } else {
36
+ positionals.push(arg);
37
+ }
38
+ }
39
+
40
+ return { command, subcommand, flags, positionals };
41
+ }
42
+
43
+ /** 讀取 stdin(非 TTY 時) */
44
+ export async function readStdin() {
45
+ if (process.stdin.isTTY) return null;
46
+ const chunks = [];
47
+ for await (const chunk of process.stdin) chunks.push(chunk);
48
+ return Buffer.concat(chunks).toString().trim();
49
+ }
50
+
51
+ /**
52
+ * 解析檔案來源:positional args、--dir flag、或 stdin pipe
53
+ * 支援任意文件副檔名(PDF / Office / 純文字),不再限制僅 .pdf
54
+ * 支援 --print0:NUL 分隔的路徑輸出(與 xargs -0 / GNU parallel 相容)
55
+ */
56
+ export async function resolveFiles(positionals, flags) {
57
+ const isDoc = (f) => DOC_EXTS.has(path.extname(f).toLowerCase());
58
+
59
+ if (flags.dir) {
60
+ const dir = path.resolve(flags.dir);
61
+ return fs.readdirSync(dir).filter(isDoc).sort().map(f => path.join(dir, f));
62
+ }
63
+ const useStdin = (positionals.length === 1 && positionals[0] === '-') ||
64
+ (positionals.length === 0 && !process.stdin.isTTY);
65
+ if (useStdin) {
66
+ const stdin = await readStdin();
67
+ if (!stdin) return [];
68
+ // 支援 NUL 分隔(--print0 輸出的來源)與換行分隔兩種格式
69
+ const sep = stdin.includes('\0') ? /\0/ : /\n/;
70
+ return stdin.split(sep).map(l => l.trim()).filter(l => l && isDoc(l));
71
+ }
72
+ if (positionals.length > 0) {
73
+ return positionals.map(p => path.resolve(p));
74
+ }
75
+ return [];
76
+ }
77
+
78
+ /**
79
+ * 將檔案路徑列印到 stdout
80
+ * --print0:NUL 分隔(與 xargs -0 / GNU parallel 相容)
81
+ * 預設:換行分隔
82
+ */
83
+ export function printPaths(paths, flags) {
84
+ if (flags.print0) {
85
+ process.stdout.write(paths.join('\0') + (paths.length ? '\0' : ''));
86
+ } else {
87
+ for (const p of paths) process.stdout.write(p + '\n');
88
+ }
89
+ }
90
+
91
+ /**
92
+ * 進度訊息輸出(僅在非 --quiet 模式下寫到 stderr)
93
+ * GNU parallel 使用時可加 --quiet 靜默進度輸出
94
+ */
95
+ export function logProgress(message, flags) {
96
+ if (!flags?.quiet) process.stderr.write(message + '\n');
97
+ }
98
+
99
+ /** 平行批次執行 */
100
+ export async function parallelBatch(items, concurrency, fn) {
101
+ const results = [];
102
+ for (let i = 0; i < items.length; i += concurrency) {
103
+ const batch = items.slice(i, i + concurrency);
104
+ const settled = await Promise.allSettled(batch.map(fn));
105
+ results.push(...settled);
106
+ }
107
+ return results;
108
+ }
109
+
110
+ /** 計時器 */
111
+ export function timer() {
112
+ const start = performance.now();
113
+ return () => ((performance.now() - start) / 1000).toFixed(2);
114
+ }
115
+
116
+ /**
117
+ * JSON 結構化成功輸出
118
+ *
119
+ * 統一 schema(Agentic AI 友善):
120
+ * { ok: true, data: {...}, meta: { tool, version, command, duration_ms, timestamp } }
121
+ *
122
+ * @param {string} command - 指令名稱(如 'doc ingest')
123
+ * @param {object} data - 指令回傳的結構化資料
124
+ * @param {string} elapsed - 耗時秒數字串(由 timer() 產生)
125
+ * @returns {string} - JSON 字串(含尾換行)
126
+ */
127
+ export function jsonOut(command, data, elapsed) {
128
+ const duration_ms = Math.round(parseFloat(elapsed) * 1000);
129
+ const meta = {
130
+ tool: 'bh',
131
+ version: VERSION,
132
+ command,
133
+ duration_ms,
134
+ elapsed_seconds: parseFloat(elapsed), // 向後相容
135
+ timestamp: new Date().toISOString(),
136
+ };
137
+ return JSON.stringify({ ok: true, data, meta }, null, 2) + '\n';
138
+ }
139
+
140
+ /**
141
+ * JSON 結構化錯誤輸出
142
+ *
143
+ * 統一 schema(Agentic AI 友善):
144
+ * { ok: false, error: string, code: number, meta: { tool, version, command, timestamp } }
145
+ *
146
+ * @param {string} command - 指令名稱
147
+ * @param {string} message - 錯誤訊息
148
+ * @param {number} [code=1] - 錯誤代碼
149
+ * @returns {string} - JSON 字串(含尾換行)
150
+ */
151
+ export function jsonError(command, message, code = 1) {
152
+ const meta = {
153
+ tool: 'bh',
154
+ version: VERSION,
155
+ command,
156
+ timestamp: new Date().toISOString(),
157
+ };
158
+ return JSON.stringify({ ok: false, error: message, code, meta }) + '\n';
159
+ }
160
+
161
+ /** 統一錯誤輸出 */
162
+ export function emitError(command, message, format) {
163
+ if (format === 'json') {
164
+ process.stderr.write(jsonError(command, message));
165
+ } else {
166
+ process.stderr.write(`${message}\n`);
167
+ }
168
+ }
169
+
170
+ export { VERSION, DOC_EXTS };