botrun-horse 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +1 -0
  2. package/bin/bh.mjs +193 -0
  3. package/bin/commands/dag-cmd.mjs +74 -0
  4. package/bin/commands/db-cmd.mjs +73 -0
  5. package/bin/commands/doc.mjs +185 -0
  6. package/bin/commands/gemini.mjs +120 -0
  7. package/bin/commands/help.mjs +109 -0
  8. package/bin/commands/legal.mjs +174 -0
  9. package/bin/commands/nchc.mjs +212 -0
  10. package/bin/commands/openrouter.mjs +154 -0
  11. package/bin/commands/prompt.mjs +175 -0
  12. package/bin/commands/schema.mjs +258 -0
  13. package/bin/commands/search.mjs +46 -0
  14. package/bin/commands/writing.mjs +33 -0
  15. package/lib/core/adapters/base.mjs +52 -0
  16. package/lib/core/adapters/claude.mjs +13 -0
  17. package/lib/core/adapters/gemini-api.mjs +174 -0
  18. package/lib/core/adapters/gemini-shared.mjs +164 -0
  19. package/lib/core/adapters/gemini-vertex.mjs +232 -0
  20. package/lib/core/adapters/local.mjs +13 -0
  21. package/lib/core/adapters/nchc.mjs +236 -0
  22. package/lib/core/adapters/openai-shared.mjs +34 -0
  23. package/lib/core/adapters/openrouter.mjs +304 -0
  24. package/lib/core/ai-cache.mjs +277 -0
  25. package/lib/core/ai-router.mjs +217 -0
  26. package/lib/core/cli-utils.mjs +170 -0
  27. package/lib/core/dag.mjs +114 -0
  28. package/lib/core/db.mjs +412 -0
  29. package/lib/core/env.mjs +64 -0
  30. package/lib/core/llm.mjs +58 -0
  31. package/lib/core/paths.mjs +115 -0
  32. package/lib/core/proxy.mjs +46 -0
  33. package/lib/core/watermelon.mjs +9 -0
  34. package/lib/doc/index.mjs +419 -0
  35. package/lib/doc/office2text.mjs +234 -0
  36. package/lib/doc/pdf2text.mjs +133 -0
  37. package/lib/doc/split.mjs +132 -0
  38. package/lib/flows/draft-writing.mjs +29 -0
  39. package/lib/flows/gemini-ask.mjs +185 -0
  40. package/lib/flows/hatch-portal.mjs +13 -0
  41. package/lib/flows/legal-ask.mjs +325 -0
  42. package/lib/flows/openai-agent.mjs +167 -0
  43. package/lib/flows/opencode-agent.mjs +240 -0
  44. package/lib/flows/openrouter-ask.mjs +111 -0
  45. package/lib/flows/review-doc.mjs +18 -0
  46. package/lib/ocr/index.mjs +6 -0
  47. package/lib/portal/hatch.mjs +6 -0
  48. package/lib/portal/index.mjs +6 -0
  49. package/lib/prompt/prompt-search.mjs +55 -0
  50. package/lib/prompt/prompt-store.mjs +94 -0
  51. package/lib/prompt/prompts/zero-framework/coding.md +15 -0
  52. package/lib/prompt/prompts/zero-framework/search.md +12 -0
  53. package/lib/prompt/prompts/zero-framework/slice.md +11 -0
  54. package/lib/search/crawler.mjs +6 -0
  55. package/lib/search/index.mjs +7 -0
  56. package/lib/tools/fs-tools.mjs +268 -0
  57. package/lib/tools/index.mjs +27 -0
  58. package/lib/writing/generate.mjs +86 -0
  59. package/lib/writing/generators/nstc-generators.mjs +279 -0
  60. package/lib/writing/generators/nstc-top5.mjs +554 -0
  61. package/lib/writing/index.mjs +5 -0
  62. package/lib/writing/layouts/nstc-layout.mjs +249 -0
  63. package/lib/writing/renderer.mjs +61 -0
  64. package/package.json +35 -0
@@ -0,0 +1,114 @@
1
+ // lib/core/dag.mjs — 通用 DAG 依賴追蹤器
2
+ // 管理任務的狀態、依賴關係、平行調度
3
+ // 支援多專案:透過 project 參數決定路徑
4
+
5
+ import fs from 'fs';
6
+ import path from 'path';
7
+ import * as paths from './paths.mjs';
8
+
9
+ /**
10
+ * 載入 DAG 定義
11
+ * @param {string} [project='nstc'] - 專案名稱
12
+ * @param {string} [dagPath] - 自訂 DAG 檔路徑(覆蓋預設)
13
+ */
14
+ export function loadDag(project = 'nstc', dagPath = null) {
15
+ const filePath = dagPath || paths.dagDefinitionPath(project);
16
+ const raw = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
17
+ const allDocs = raw.categories.flatMap(c =>
18
+ c.documents.map(d => ({ ...d, category: c.name }))
19
+ );
20
+ return { meta: raw, documents: allDocs };
21
+ }
22
+
23
+ export function initState(documents) {
24
+ const state = {};
25
+ for (const doc of documents) {
26
+ state[doc.id] = {
27
+ id: doc.id,
28
+ type: doc.type,
29
+ title: doc.title,
30
+ category: doc.category,
31
+ deps: doc.deps || [],
32
+ status: 'pending',
33
+ startedAt: null,
34
+ finishedAt: null,
35
+ outputPath: null,
36
+ error: null,
37
+ };
38
+ }
39
+ return state;
40
+ }
41
+
42
+ export function saveState(state, project = 'nstc') {
43
+ const stateFile = paths.dagStatePath(project);
44
+ fs.mkdirSync(path.dirname(stateFile), { recursive: true });
45
+ fs.writeFileSync(stateFile, JSON.stringify(state, null, 2));
46
+ }
47
+
48
+ export function loadState(project = 'nstc') {
49
+ const stateFile = paths.dagStatePath(project);
50
+ if (fs.existsSync(stateFile)) {
51
+ return JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
52
+ }
53
+ return null;
54
+ }
55
+
56
+ export function getReady(state) {
57
+ return Object.values(state).filter(task => {
58
+ if (task.status !== 'pending') return false;
59
+ return task.deps.every(depId => state[depId]?.status === 'done');
60
+ });
61
+ }
62
+
63
+ export function markRunning(state, id) {
64
+ state[id].status = 'running';
65
+ state[id].startedAt = new Date().toISOString();
66
+ }
67
+
68
+ export function markDone(state, id, outputPath) {
69
+ state[id].status = 'done';
70
+ state[id].finishedAt = new Date().toISOString();
71
+ state[id].outputPath = outputPath;
72
+ }
73
+
74
+ export function markFailed(state, id, error) {
75
+ state[id].status = 'failed';
76
+ state[id].finishedAt = new Date().toISOString();
77
+ state[id].error = String(error);
78
+ }
79
+
80
+ export function getStatusSummary(state) {
81
+ const all = Object.values(state);
82
+ const done = all.filter(t => t.status === 'done').length;
83
+ const running = all.filter(t => t.status === 'running').length;
84
+ const pending = all.filter(t => t.status === 'pending').length;
85
+ const ready = getReady(state).length;
86
+ const failed = all.filter(t => t.status === 'failed').length;
87
+ const total = all.length;
88
+ const pct = Math.floor(done / total * 100);
89
+ return { total, done, running, pending, ready, failed, pct };
90
+ }
91
+
92
+ export function formatStatus(state) {
93
+ const s = getStatusSummary(state);
94
+ const bar = '█'.repeat(Math.floor(s.pct / 2.5)) + '░'.repeat(40 - Math.floor(s.pct / 2.5));
95
+ const lines = [
96
+ `========== DAG 狀態 ==========`,
97
+ `總計: ${s.total} | 完成: ${s.done} | 執行中: ${s.running} | 就緒: ${s.ready} | 等待中: ${s.pending} | 失敗: ${s.failed}`,
98
+ `進度: [${bar}] ${s.pct}%`,
99
+ `==============================`,
100
+ ];
101
+
102
+ // 分類統計
103
+ const byCategory = {};
104
+ for (const t of Object.values(state)) {
105
+ if (!byCategory[t.category]) byCategory[t.category] = { done: 0, total: 0 };
106
+ byCategory[t.category].total++;
107
+ if (t.status === 'done') byCategory[t.category].done++;
108
+ }
109
+ for (const [cat, info] of Object.entries(byCategory)) {
110
+ lines.push(` ${cat}: ${info.done}/${info.total}`);
111
+ }
112
+
113
+ return lines.join('\n');
114
+ }
@@ -0,0 +1,412 @@
1
+ // lib/core/db.mjs — 通用文件 SQLite 存儲模組
2
+ //
3
+ // 設計原則:SOLID / DRY / KISS
4
+ // - 單一職責:只負責 SQLite 讀寫,不含業務邏輯
5
+ // - 冪等設計:INSERT OR IGNORE,重複入庫不拋例外
6
+ // - 斷點續作:ingestion_log 記錄每個檔案的匯入狀態
7
+ // - AI/LLM 友善:_meta 表詳細說明 schema 用法
8
+ //
9
+ // 執行時需加 --experimental-sqlite flag
10
+ // node --experimental-sqlite bin/bh.mjs ...
11
+ //
12
+ // 相依:Node.js 22+ 內建 node:sqlite (DatabaseSync)
13
+
14
+ import { DatabaseSync } from 'node:sqlite';
15
+ import fs from 'fs';
16
+ import path from 'path';
17
+
18
+ // ── _meta 表的完整使用說明(供 AI/LLM 引證)──
19
+ const META_USAGE_GUIDE = `
20
+ 此 SQLite 資料庫由 botrun-horse 自動建立,用於儲存 PDF/Office 文件的逐頁文字內容。
21
+
22
+ ## 核心資料表
23
+
24
+ ### documents — 文件 metadata
25
+ id INTEGER 主鍵(用於 JOIN pages)
26
+ source_path TEXT 原始檔案完整路徑(唯一)
27
+ filename TEXT 原始檔案名稱
28
+ doc_type TEXT 文件類型(公文/手冊/法規/報告/論文等)
29
+ title TEXT 文件標題
30
+ total_pages INTEGER 總頁數
31
+ file_size INTEGER 檔案大小(bytes)
32
+ created_at TEXT 入庫時間(UTC)
33
+
34
+ ### pages — 逐頁文字(引證核心)
35
+ id INTEGER 主鍵
36
+ doc_id INTEGER → documents.id
37
+ page_number INTEGER 頁碼(從 1 開始)
38
+ page_text TEXT 該頁全文文字(AI/LLM 使用此欄位)
39
+ char_count INTEGER 字元數
40
+ source_path TEXT 原始檔案完整路徑(引證用)
41
+ source_file TEXT 原始檔案名稱(引證用)
42
+ source_page INTEGER 原始頁碼(引證用,與 page_number 相同)
43
+ split_pdf TEXT 單頁獨立 PDF 路徑(可 NULL,僅在執行 doc split 後有值)
44
+
45
+ ### pages_fts — FTS5 trigram 全文檢索虛擬表
46
+ 搜尋語法: SELECT ... FROM pages_fts WHERE pages_fts MATCH '關鍵字'
47
+ 特性: trigram tokenizer,天然支援繁體中文/日文/韓文(≥3 字元子字串搜尋)
48
+
49
+ ### ingestion_log — 匯入紀錄(斷點續作)
50
+ source_path TEXT 原始檔案完整路徑(唯一鍵)
51
+ doc_id INTEGER 對應 documents.id(NULL 表示尚未完成)
52
+ status TEXT done | failed
53
+ pages INTEGER 成功匯入的頁數
54
+ ingested_at TEXT 完成時間(UTC)
55
+ error_msg TEXT 失敗原因(status=failed 時有值)
56
+
57
+ ## AI/LLM 建議查詢模式
58
+
59
+ ### 1. 全文搜尋(關鍵字引證)
60
+ SELECT p.source_file, p.source_page, d.title, d.doc_type,
61
+ snippet(pages_fts, 0, '【', '】', '...', 64) AS context
62
+ FROM pages_fts
63
+ JOIN pages p ON p.id = pages_fts.rowid
64
+ JOIN documents d ON d.id = p.doc_id
65
+ WHERE pages_fts MATCH '搜尋關鍵字'
66
+ ORDER BY rank LIMIT 10;
67
+
68
+ ### 2. 取得特定文件的所有頁面
69
+ SELECT page_number, page_text, source_file, source_page
70
+ FROM pages WHERE doc_id = ? ORDER BY page_number;
71
+
72
+ ### 3. 引證格式(LLM 回答時建議附上)
73
+ 「根據《{title}》第 {source_page} 頁({source_file}):{context}」
74
+
75
+ ### 4. 統計資訊
76
+ SELECT d.doc_type, COUNT(*) AS 文件數, SUM(d.total_pages) AS 總頁數
77
+ FROM documents d GROUP BY d.doc_type;
78
+
79
+ ## 後續疊加建議
80
+ - 新增 tags 表:關聯 documents,支援多標籤分類
81
+ - 新增 summaries 表:儲存 AI 生成的頁面/章節摘要
82
+ - 新增 embeddings 表:儲存向量嵌入,支援語義搜尋
83
+ - 使用 SQLite 的 JSON 欄位擴充任意 metadata(documents.extra_json)
84
+ `.trim();
85
+
86
+ /**
87
+ * DocStore — 通用文件 SQLite 存儲層
88
+ *
89
+ * 單一職責:存取 SQLite,不含業務邏輯。
90
+ * 所有寫入操作皆使用 INSERT OR IGNORE,保證冪等性(可重複執行)。
91
+ */
92
+ export class DocStore {
93
+ /**
94
+ * 開啟或建立 SQLite 資料庫
95
+ * @param {string} dbPath - 資料庫檔案路徑
96
+ */
97
+ constructor(dbPath = './output/pdf_docs.db') {
98
+ const dir = path.dirname(dbPath);
99
+ if (!fs.existsSync(dir)) {
100
+ fs.mkdirSync(dir, { recursive: true });
101
+ }
102
+
103
+ this.dbPath = dbPath;
104
+ this.db = new DatabaseSync(dbPath);
105
+
106
+ // WAL 模式:提升並發寫入效能(讀寫可同時進行)
107
+ this.db.exec('PRAGMA journal_mode = WAL');
108
+ // 外鍵約束
109
+ this.db.exec('PRAGMA foreign_keys = ON');
110
+ // 提升寫入效能(匯入大量頁面時明顯加速)
111
+ this.db.exec('PRAGMA synchronous = NORMAL');
112
+ }
113
+
114
+ /**
115
+ * 建立所有資料表、FTS 虛擬表、觸發器與 _meta 說明
116
+ * 使用 IF NOT EXISTS,可安全重複呼叫(冪等)
117
+ */
118
+ initSchema() {
119
+ // ── _meta:AI/LLM 友善的 schema 說明 ──
120
+ this.db.exec(`
121
+ CREATE TABLE IF NOT EXISTS _meta (
122
+ key TEXT PRIMARY KEY,
123
+ value TEXT NOT NULL
124
+ )
125
+ `);
126
+
127
+ // 插入使用說明(OR REPLACE:每次 initSchema 更新最新說明)
128
+ this.db.prepare(`
129
+ INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)
130
+ `).run('schema_version', '2');
131
+ this.db.prepare(`
132
+ INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)
133
+ `).run('usage_guide', META_USAGE_GUIDE);
134
+ this.db.prepare(`
135
+ INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)
136
+ `).run('created_by', 'botrun-horse');
137
+ this.db.prepare(`
138
+ INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)
139
+ `).run('updated_at', new Date().toISOString());
140
+
141
+ // ── documents:文件 metadata 總表 ──
142
+ this.db.exec(`
143
+ CREATE TABLE IF NOT EXISTS documents (
144
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
145
+ source_path TEXT NOT NULL UNIQUE,
146
+ filename TEXT NOT NULL,
147
+ doc_type TEXT,
148
+ title TEXT,
149
+ total_pages INTEGER,
150
+ file_size INTEGER,
151
+ created_at TEXT DEFAULT (datetime('now'))
152
+ )
153
+ `);
154
+
155
+ // ── pages:逐頁文字(引證鏈核心)──
156
+ this.db.exec(`
157
+ CREATE TABLE IF NOT EXISTS pages (
158
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
159
+ doc_id INTEGER NOT NULL REFERENCES documents(id),
160
+ page_number INTEGER NOT NULL,
161
+ page_text TEXT,
162
+ char_count INTEGER,
163
+ source_path TEXT NOT NULL,
164
+ source_file TEXT NOT NULL,
165
+ source_page INTEGER NOT NULL,
166
+ split_pdf TEXT,
167
+ UNIQUE(doc_id, page_number)
168
+ )
169
+ `);
170
+
171
+ // ── pages_fts:FTS5 trigram 全文檢索 ──
172
+ // trigram tokenizer:三字元組比對,天然支援 CJK(繁中/日/韓)
173
+ this.db.exec(`
174
+ CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts USING fts5(
175
+ page_text,
176
+ content='pages',
177
+ content_rowid='id',
178
+ tokenize='trigram case_sensitive 0 remove_diacritics 0'
179
+ )
180
+ `);
181
+
182
+ // ── FTS 自動同步觸發器 ──
183
+ this.db.exec(`
184
+ CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
185
+ INSERT INTO pages_fts(rowid, page_text) VALUES (new.id, new.page_text);
186
+ END
187
+ `);
188
+ this.db.exec(`
189
+ CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN
190
+ INSERT INTO pages_fts(pages_fts, rowid, page_text) VALUES('delete', old.id, old.page_text);
191
+ END
192
+ `);
193
+ this.db.exec(`
194
+ CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN
195
+ INSERT INTO pages_fts(pages_fts, rowid, page_text) VALUES('delete', old.id, old.page_text);
196
+ INSERT INTO pages_fts(rowid, page_text) VALUES (new.id, new.page_text);
197
+ END
198
+ `);
199
+
200
+ // ── ingestion_log:匯入紀錄(斷點續作)──
201
+ this.db.exec(`
202
+ CREATE TABLE IF NOT EXISTS ingestion_log (
203
+ source_path TEXT PRIMARY KEY,
204
+ doc_id INTEGER REFERENCES documents(id),
205
+ status TEXT NOT NULL DEFAULT 'done',
206
+ pages INTEGER,
207
+ ingested_at TEXT DEFAULT (datetime('now')),
208
+ error_msg TEXT
209
+ )
210
+ `);
211
+ }
212
+
213
+ // ─────────────────────────────────────────────────────────
214
+ // ingestion_log:斷點續作
215
+ // ─────────────────────────────────────────────────────────
216
+
217
+ /**
218
+ * 檢查檔案是否已成功匯入(斷點續作用)
219
+ * @param {string} sourcePath - 原始檔案完整路徑
220
+ * @returns {boolean} true = 已匯入,可跳過
221
+ */
222
+ isIngested(sourcePath) {
223
+ const row = this.db.prepare(
224
+ `SELECT status FROM ingestion_log WHERE source_path = ?`
225
+ ).get(sourcePath);
226
+ return row?.status === 'done';
227
+ }
228
+
229
+ /**
230
+ * 記錄匯入成功
231
+ * @param {string} sourcePath
232
+ * @param {number} docId
233
+ * @param {number} pages
234
+ */
235
+ logIngested(sourcePath, docId, pages) {
236
+ this.db.prepare(`
237
+ INSERT OR REPLACE INTO ingestion_log (source_path, doc_id, status, pages, ingested_at)
238
+ VALUES (?, ?, 'done', ?, datetime('now'))
239
+ `).run(sourcePath, docId, pages);
240
+ }
241
+
242
+ /**
243
+ * 記錄匯入失敗
244
+ * @param {string} sourcePath
245
+ * @param {string} errorMsg
246
+ */
247
+ logFailed(sourcePath, errorMsg) {
248
+ this.db.prepare(`
249
+ INSERT OR REPLACE INTO ingestion_log (source_path, status, error_msg, ingested_at)
250
+ VALUES (?, 'failed', ?, datetime('now'))
251
+ `).run(sourcePath, errorMsg);
252
+ }
253
+
254
+ // ─────────────────────────────────────────────────────────
255
+ // documents:文件 metadata CRUD
256
+ // ─────────────────────────────────────────────────────────
257
+
258
+ /**
259
+ * 新增文件 metadata(冪等:已存在時回傳現有 ID)
260
+ *
261
+ * @param {Object} doc
262
+ * @param {string} doc.sourcePath - 來源檔案路徑(唯一鍵)
263
+ * @param {string} doc.filename - 檔案名稱
264
+ * @param {string} [doc.docType] - 文件類型
265
+ * @param {string} [doc.title] - 文件標題
266
+ * @param {number} [doc.totalPages] - 總頁數
267
+ * @param {number} [doc.fileSize] - 檔案大小(bytes)
268
+ * @returns {number} doc_id(新建或現有)
269
+ */
270
+ insertDocument({ sourcePath, filename, docType = null, title = null, totalPages = null, fileSize = null }) {
271
+ // OR IGNORE:source_path 重複時靜默忽略
272
+ this.db.prepare(`
273
+ INSERT OR IGNORE INTO documents (source_path, filename, doc_type, title, total_pages, file_size)
274
+ VALUES (?, ?, ?, ?, ?, ?)
275
+ `).run(sourcePath, filename, docType, title, totalPages, fileSize);
276
+
277
+ // 回傳現有或剛插入的 ID
278
+ const row = this.db.prepare(`SELECT id FROM documents WHERE source_path = ?`).get(sourcePath);
279
+ return Number(row.id);
280
+ }
281
+
282
+ /**
283
+ * 新增單頁文字內容(冪等:(doc_id, page_number) 重複時靜默忽略)
284
+ *
285
+ * @param {Object} page
286
+ * @param {number} page.docId - 所屬文件 ID
287
+ * @param {number} page.pageNumber - 頁碼
288
+ * @param {string} [page.pageText] - 頁面文字內容
289
+ * @param {string} page.sourcePath - 來源檔案完整路徑(引證用)
290
+ * @param {string} page.sourceFile - 來源檔案名稱(引證用)
291
+ * @param {number} page.sourcePage - 來源頁碼(引證用)
292
+ * @param {string} [page.splitPdf] - 拆分後的單頁 PDF 路徑
293
+ * @returns {number} page_id(新建或現有)
294
+ */
295
+ insertPage({ docId, pageNumber, pageText = null, sourcePath, sourceFile, sourcePage, splitPdf = null }) {
296
+ const charCount = pageText ? pageText.length : 0;
297
+ // OR IGNORE:(doc_id, page_number) 重複時靜默忽略
298
+ this.db.prepare(`
299
+ INSERT OR IGNORE INTO pages
300
+ (doc_id, page_number, page_text, char_count, source_path, source_file, source_page, split_pdf)
301
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
302
+ `).run(docId, pageNumber, pageText, charCount, sourcePath, sourceFile, sourcePage, splitPdf);
303
+
304
+ const row = this.db.prepare(
305
+ `SELECT id FROM pages WHERE doc_id = ? AND page_number = ?`
306
+ ).get(docId, pageNumber);
307
+ return Number(row.id);
308
+ }
309
+
310
+ // ─────────────────────────────────────────────────────────
311
+ // 查詢
312
+ // ─────────────────────────────────────────────────────────
313
+
314
+ /**
315
+ * FTS5 trigram 全文檢索
316
+ * 自動將查詢字串拆成三字元組,天然支援 CJK 子字串搜尋(≥3 字元)
317
+ *
318
+ * @param {string} query - 搜尋關鍵字(≥3 字元)
319
+ * @param {number} [limit] - 最多回傳筆數(預設不限)
320
+ * @returns {Array<Object>} 搜尋結果(含引證資訊與 snippet)
321
+ */
322
+ search(query, limit = null) {
323
+ if (!query || query.trim().length === 0) return [];
324
+
325
+ let sql = `
326
+ SELECT
327
+ p.doc_id,
328
+ p.id AS page_id,
329
+ p.page_number,
330
+ p.source_path,
331
+ p.source_file,
332
+ p.source_page,
333
+ p.split_pdf,
334
+ snippet(pages_fts, 0, '【', '】', '...', 64) AS snippet,
335
+ d.doc_type,
336
+ d.title
337
+ FROM pages_fts
338
+ JOIN pages p ON p.id = pages_fts.rowid
339
+ JOIN documents d ON d.id = p.doc_id
340
+ WHERE pages_fts MATCH ?
341
+ ORDER BY rank
342
+ `;
343
+ if (limit && limit > 0) sql += ` LIMIT ${parseInt(limit)}`;
344
+
345
+ return this.db.prepare(sql).all(query);
346
+ }
347
+
348
+ /**
349
+ * 依 ID 取得單一文件 metadata
350
+ * @param {number} id - 文件 ID
351
+ * @returns {Object|undefined}
352
+ */
353
+ getDocument(id) {
354
+ return this.db.prepare('SELECT * FROM documents WHERE id = ?').get(id);
355
+ }
356
+
357
+ /**
358
+ * 取得指定文件的所有頁面(依頁碼排序)
359
+ * @param {number} docId - 文件 ID
360
+ * @returns {Array<Object>}
361
+ */
362
+ getPages(docId) {
363
+ return this.db.prepare('SELECT * FROM pages WHERE doc_id = ? ORDER BY page_number').all(docId);
364
+ }
365
+
366
+ /**
367
+ * 取得資料庫統計資訊
368
+ * @returns {{ documents, pages, totalChars, byType, ingestionLog }}
369
+ */
370
+ stats() {
371
+ const docCount = this.db.prepare('SELECT COUNT(*) AS count FROM documents').get();
372
+ const pageCount = this.db.prepare('SELECT COUNT(*) AS count FROM pages').get();
373
+ const totalChars = this.db.prepare('SELECT COALESCE(SUM(char_count), 0) AS total FROM pages').get();
374
+
375
+ const byType = this.db.prepare(`
376
+ SELECT doc_type, COUNT(*) AS count
377
+ FROM documents
378
+ GROUP BY doc_type
379
+ ORDER BY count DESC
380
+ `).all();
381
+
382
+ const ingestionLog = this.db.prepare(`
383
+ SELECT status, COUNT(*) AS count
384
+ FROM ingestion_log
385
+ GROUP BY status
386
+ `).all();
387
+
388
+ return {
389
+ documents: docCount.count,
390
+ pages: pageCount.count,
391
+ totalChars: totalChars.total,
392
+ byType,
393
+ ingestionLog,
394
+ };
395
+ }
396
+
397
+ /**
398
+ * 取得 _meta 表中的 schema 使用說明(供 AI/LLM 參考)
399
+ * @returns {string} 使用說明 Markdown
400
+ */
401
+ getUsageGuide() {
402
+ const row = this.db.prepare(`SELECT value FROM _meta WHERE key = 'usage_guide'`).get();
403
+ return row?.value || '';
404
+ }
405
+
406
+ /**
407
+ * 關閉資料庫連線
408
+ */
409
+ close() {
410
+ this.db.close();
411
+ }
412
+ }
@@ -0,0 +1,64 @@
1
+ // lib/core/env.mjs — 零依賴 .env 載入器
2
+ //
3
+ // 設計原則:KISS / DRY
4
+ // - 不引入 dotenv npm 套件,純 Node.js fs 讀取
5
+ // - 預設覆蓋已有環境變數(.env 是專案級設定,優先於 shell 全域 export)
6
+ // - 支援 # 註解、空行、雙引號值、等號左右無空格
7
+ //
8
+ // 使用方式(必須在所有 import 之前):
9
+ // import { loadEnv } from '../core/env.mjs';
10
+ // loadEnv(); // 自動從專案根目錄讀取 .env
11
+
12
+ import fs from 'fs';
13
+ import path from 'path';
14
+ import { fileURLToPath } from 'url';
15
+
16
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
17
+ const PROJECT_ROOT = path.resolve(__dirname, '../..');
18
+
19
+ /**
20
+ * 載入 .env 檔案到 process.env
21
+ *
22
+ * @param {string} [envPath] - .env 檔案路徑(預設為專案根目錄 .env)
23
+ * @param {object} [opts]
24
+ * @param {boolean} [opts.override=true] - .env 優先覆蓋 shell 全域環境變數
25
+ * @returns {number} 成功載入的變數數量
26
+ */
27
+ export function loadEnv(envPath, opts = {}) {
28
+ const filePath = envPath || path.join(PROJECT_ROOT, '.env');
29
+ let content;
30
+ try {
31
+ content = fs.readFileSync(filePath, 'utf-8');
32
+ } catch {
33
+ return 0; // 檔案不存在,靜默跳過
34
+ }
35
+
36
+ let count = 0;
37
+ for (const line of content.split('\n')) {
38
+ const trimmed = line.trim();
39
+ // 跳過空行和註解
40
+ if (!trimmed || trimmed.startsWith('#')) continue;
41
+
42
+ // 解析 KEY=VALUE(支援 export KEY=VALUE)
43
+ const match = trimmed.match(/^(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)=(.*)$/);
44
+ if (!match) continue;
45
+
46
+ const key = match[1];
47
+ let value = match[2].trim();
48
+
49
+ // 去掉包裹的引號("value" 或 'value')
50
+ if ((value.startsWith('"') && value.endsWith('"')) ||
51
+ (value.startsWith("'") && value.endsWith("'"))) {
52
+ value = value.slice(1, -1);
53
+ }
54
+
55
+ // .env 專案級設定優先於 shell 全域 export(除非 override=false)
56
+ const override = opts.override ?? true;
57
+ if (!override && process.env[key] !== undefined) continue;
58
+
59
+ process.env[key] = value;
60
+ count++;
61
+ }
62
+
63
+ return count;
64
+ }
@@ -0,0 +1,58 @@
1
+ // lib/core/llm.mjs — LLM 統一介面 (Factory Pattern)
2
+ // 遵循 SOLID OCP:新增 provider 不需修改既有程式碼
3
+ //
4
+ // Provider 選擇策略:
5
+ // 1. 明確指定 provider 參數
6
+ // 2. 若 provider='gemini-auto'(或未指定)→ 自動偵測:
7
+ // - 有 GOOGLE_CLOUD_PROJECT + ADC → gemini(Vertex AI)
8
+ // - 有 GEMINI_API_KEY → gemini-api(Direct API)
9
+
10
+ /**
11
+ * 自動偵測最佳 Gemini provider
12
+ * @returns {'gemini' | 'gemini-api' | null}
13
+ */
14
+ function detectGeminiProvider() {
15
+ if (process.env.GOOGLE_CLOUD_PROJECT) return 'gemini';
16
+ if (process.env.GEMINI_API_KEY) return 'gemini-api';
17
+ return null;
18
+ }
19
+
20
+ /**
21
+ * 建立 LLM adapter
22
+ * @param {object} config - { provider, ...adapterOpts }
23
+ * @param {string} [config.provider='gemini-auto'] - LLM provider
24
+ * @returns {Promise<object>} adapter 實例
25
+ */
26
+ export async function createLLM(config = {}) {
27
+ const { provider: rawProvider, ...opts } = config;
28
+
29
+ // 自動偵測
30
+ const provider = (rawProvider === 'gemini-auto' || !rawProvider)
31
+ ? (detectGeminiProvider() || 'gemini')
32
+ : rawProvider;
33
+
34
+ switch (provider) {
35
+ case 'gemini': {
36
+ const { GeminiVertexAdapter } = await import('./adapters/gemini-vertex.mjs');
37
+ return new GeminiVertexAdapter(opts);
38
+ }
39
+ case 'gemini-api': {
40
+ const { GeminiApiAdapter } = await import('./adapters/gemini-api.mjs');
41
+ return new GeminiApiAdapter(opts);
42
+ }
43
+ case 'claude': {
44
+ const { ClaudeAdapter } = await import('./adapters/claude.mjs');
45
+ return new ClaudeAdapter(opts);
46
+ }
47
+ case 'nchc': {
48
+ const { NchcAdapter } = await import('./adapters/nchc.mjs');
49
+ return new NchcAdapter(opts);
50
+ }
51
+ case 'openrouter': {
52
+ const { OpenRouterAdapter } = await import('./adapters/openrouter.mjs');
53
+ return new OpenRouterAdapter(opts);
54
+ }
55
+ default:
56
+ throw new Error(`LLM provider "${provider}" 尚未實作。可用: gemini, gemini-api, claude, nchc, openrouter`);
57
+ }
58
+ }