botrun-horse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/bin/bh.mjs +193 -0
- package/bin/commands/dag-cmd.mjs +74 -0
- package/bin/commands/db-cmd.mjs +73 -0
- package/bin/commands/doc.mjs +185 -0
- package/bin/commands/gemini.mjs +120 -0
- package/bin/commands/help.mjs +109 -0
- package/bin/commands/legal.mjs +174 -0
- package/bin/commands/nchc.mjs +212 -0
- package/bin/commands/openrouter.mjs +154 -0
- package/bin/commands/prompt.mjs +175 -0
- package/bin/commands/schema.mjs +258 -0
- package/bin/commands/search.mjs +46 -0
- package/bin/commands/writing.mjs +33 -0
- package/lib/core/adapters/base.mjs +52 -0
- package/lib/core/adapters/claude.mjs +13 -0
- package/lib/core/adapters/gemini-api.mjs +174 -0
- package/lib/core/adapters/gemini-shared.mjs +164 -0
- package/lib/core/adapters/gemini-vertex.mjs +232 -0
- package/lib/core/adapters/local.mjs +13 -0
- package/lib/core/adapters/nchc.mjs +236 -0
- package/lib/core/adapters/openai-shared.mjs +34 -0
- package/lib/core/adapters/openrouter.mjs +304 -0
- package/lib/core/ai-cache.mjs +277 -0
- package/lib/core/ai-router.mjs +217 -0
- package/lib/core/cli-utils.mjs +170 -0
- package/lib/core/dag.mjs +114 -0
- package/lib/core/db.mjs +412 -0
- package/lib/core/env.mjs +64 -0
- package/lib/core/llm.mjs +58 -0
- package/lib/core/paths.mjs +115 -0
- package/lib/core/proxy.mjs +46 -0
- package/lib/core/watermelon.mjs +9 -0
- package/lib/doc/index.mjs +419 -0
- package/lib/doc/office2text.mjs +234 -0
- package/lib/doc/pdf2text.mjs +133 -0
- package/lib/doc/split.mjs +132 -0
- package/lib/flows/draft-writing.mjs +29 -0
- package/lib/flows/gemini-ask.mjs +185 -0
- package/lib/flows/hatch-portal.mjs +13 -0
- package/lib/flows/legal-ask.mjs +325 -0
- package/lib/flows/openai-agent.mjs +167 -0
- package/lib/flows/opencode-agent.mjs +240 -0
- package/lib/flows/openrouter-ask.mjs +111 -0
- package/lib/flows/review-doc.mjs +18 -0
- package/lib/ocr/index.mjs +6 -0
- package/lib/portal/hatch.mjs +6 -0
- package/lib/portal/index.mjs +6 -0
- package/lib/prompt/prompt-search.mjs +55 -0
- package/lib/prompt/prompt-store.mjs +94 -0
- package/lib/prompt/prompts/zero-framework/coding.md +15 -0
- package/lib/prompt/prompts/zero-framework/search.md +12 -0
- package/lib/prompt/prompts/zero-framework/slice.md +11 -0
- package/lib/search/crawler.mjs +6 -0
- package/lib/search/index.mjs +7 -0
- package/lib/tools/fs-tools.mjs +268 -0
- package/lib/tools/index.mjs +27 -0
- package/lib/writing/generate.mjs +86 -0
- package/lib/writing/generators/nstc-generators.mjs +279 -0
- package/lib/writing/generators/nstc-top5.mjs +554 -0
- package/lib/writing/index.mjs +5 -0
- package/lib/writing/layouts/nstc-layout.mjs +249 -0
- package/lib/writing/renderer.mjs +61 -0
- package/package.json +35 -0
package/lib/core/dag.mjs
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// lib/core/dag.mjs — 通用 DAG 依賴追蹤器
|
|
2
|
+
// 管理任務的狀態、依賴關係、平行調度
|
|
3
|
+
// 支援多專案:透過 project 參數決定路徑
|
|
4
|
+
|
|
5
|
+
import fs from 'fs';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import * as paths from './paths.mjs';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* 載入 DAG 定義
|
|
11
|
+
* @param {string} [project='nstc'] - 專案名稱
|
|
12
|
+
* @param {string} [dagPath] - 自訂 DAG 檔路徑(覆蓋預設)
|
|
13
|
+
*/
|
|
14
|
+
export function loadDag(project = 'nstc', dagPath = null) {
|
|
15
|
+
const filePath = dagPath || paths.dagDefinitionPath(project);
|
|
16
|
+
const raw = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
|
|
17
|
+
const allDocs = raw.categories.flatMap(c =>
|
|
18
|
+
c.documents.map(d => ({ ...d, category: c.name }))
|
|
19
|
+
);
|
|
20
|
+
return { meta: raw, documents: allDocs };
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function initState(documents) {
|
|
24
|
+
const state = {};
|
|
25
|
+
for (const doc of documents) {
|
|
26
|
+
state[doc.id] = {
|
|
27
|
+
id: doc.id,
|
|
28
|
+
type: doc.type,
|
|
29
|
+
title: doc.title,
|
|
30
|
+
category: doc.category,
|
|
31
|
+
deps: doc.deps || [],
|
|
32
|
+
status: 'pending',
|
|
33
|
+
startedAt: null,
|
|
34
|
+
finishedAt: null,
|
|
35
|
+
outputPath: null,
|
|
36
|
+
error: null,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
return state;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function saveState(state, project = 'nstc') {
|
|
43
|
+
const stateFile = paths.dagStatePath(project);
|
|
44
|
+
fs.mkdirSync(path.dirname(stateFile), { recursive: true });
|
|
45
|
+
fs.writeFileSync(stateFile, JSON.stringify(state, null, 2));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function loadState(project = 'nstc') {
|
|
49
|
+
const stateFile = paths.dagStatePath(project);
|
|
50
|
+
if (fs.existsSync(stateFile)) {
|
|
51
|
+
return JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function getReady(state) {
|
|
57
|
+
return Object.values(state).filter(task => {
|
|
58
|
+
if (task.status !== 'pending') return false;
|
|
59
|
+
return task.deps.every(depId => state[depId]?.status === 'done');
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function markRunning(state, id) {
|
|
64
|
+
state[id].status = 'running';
|
|
65
|
+
state[id].startedAt = new Date().toISOString();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function markDone(state, id, outputPath) {
|
|
69
|
+
state[id].status = 'done';
|
|
70
|
+
state[id].finishedAt = new Date().toISOString();
|
|
71
|
+
state[id].outputPath = outputPath;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function markFailed(state, id, error) {
|
|
75
|
+
state[id].status = 'failed';
|
|
76
|
+
state[id].finishedAt = new Date().toISOString();
|
|
77
|
+
state[id].error = String(error);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function getStatusSummary(state) {
|
|
81
|
+
const all = Object.values(state);
|
|
82
|
+
const done = all.filter(t => t.status === 'done').length;
|
|
83
|
+
const running = all.filter(t => t.status === 'running').length;
|
|
84
|
+
const pending = all.filter(t => t.status === 'pending').length;
|
|
85
|
+
const ready = getReady(state).length;
|
|
86
|
+
const failed = all.filter(t => t.status === 'failed').length;
|
|
87
|
+
const total = all.length;
|
|
88
|
+
const pct = Math.floor(done / total * 100);
|
|
89
|
+
return { total, done, running, pending, ready, failed, pct };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function formatStatus(state) {
|
|
93
|
+
const s = getStatusSummary(state);
|
|
94
|
+
const bar = '█'.repeat(Math.floor(s.pct / 2.5)) + '░'.repeat(40 - Math.floor(s.pct / 2.5));
|
|
95
|
+
const lines = [
|
|
96
|
+
`========== DAG 狀態 ==========`,
|
|
97
|
+
`總計: ${s.total} | 完成: ${s.done} | 執行中: ${s.running} | 就緒: ${s.ready} | 等待中: ${s.pending} | 失敗: ${s.failed}`,
|
|
98
|
+
`進度: [${bar}] ${s.pct}%`,
|
|
99
|
+
`==============================`,
|
|
100
|
+
];
|
|
101
|
+
|
|
102
|
+
// 分類統計
|
|
103
|
+
const byCategory = {};
|
|
104
|
+
for (const t of Object.values(state)) {
|
|
105
|
+
if (!byCategory[t.category]) byCategory[t.category] = { done: 0, total: 0 };
|
|
106
|
+
byCategory[t.category].total++;
|
|
107
|
+
if (t.status === 'done') byCategory[t.category].done++;
|
|
108
|
+
}
|
|
109
|
+
for (const [cat, info] of Object.entries(byCategory)) {
|
|
110
|
+
lines.push(` ${cat}: ${info.done}/${info.total}`);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return lines.join('\n');
|
|
114
|
+
}
|
package/lib/core/db.mjs
ADDED
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
// lib/core/db.mjs — 通用文件 SQLite 存儲模組
|
|
2
|
+
//
|
|
3
|
+
// 設計原則:SOLID / DRY / KISS
|
|
4
|
+
// - 單一職責:只負責 SQLite 讀寫,不含業務邏輯
|
|
5
|
+
// - 冪等設計:INSERT OR IGNORE,重複入庫不拋例外
|
|
6
|
+
// - 斷點續作:ingestion_log 記錄每個檔案的匯入狀態
|
|
7
|
+
// - AI/LLM 友善:_meta 表詳細說明 schema 用法
|
|
8
|
+
//
|
|
9
|
+
// 執行時需加 --experimental-sqlite flag
|
|
10
|
+
// node --experimental-sqlite bin/bh.mjs ...
|
|
11
|
+
//
|
|
12
|
+
// 相依:Node.js 22+ 內建 node:sqlite (DatabaseSync)
|
|
13
|
+
|
|
14
|
+
import { DatabaseSync } from 'node:sqlite';
|
|
15
|
+
import fs from 'fs';
|
|
16
|
+
import path from 'path';
|
|
17
|
+
|
|
18
|
+
// ── _meta 表的完整使用說明(供 AI/LLM 引證)──
|
|
19
|
+
const META_USAGE_GUIDE = `
|
|
20
|
+
此 SQLite 資料庫由 botrun-horse 自動建立,用於儲存 PDF/Office 文件的逐頁文字內容。
|
|
21
|
+
|
|
22
|
+
## 核心資料表
|
|
23
|
+
|
|
24
|
+
### documents — 文件 metadata
|
|
25
|
+
id INTEGER 主鍵(用於 JOIN pages)
|
|
26
|
+
source_path TEXT 原始檔案完整路徑(唯一)
|
|
27
|
+
filename TEXT 原始檔案名稱
|
|
28
|
+
doc_type TEXT 文件類型(公文/手冊/法規/報告/論文等)
|
|
29
|
+
title TEXT 文件標題
|
|
30
|
+
total_pages INTEGER 總頁數
|
|
31
|
+
file_size INTEGER 檔案大小(bytes)
|
|
32
|
+
created_at TEXT 入庫時間(UTC)
|
|
33
|
+
|
|
34
|
+
### pages — 逐頁文字(引證核心)
|
|
35
|
+
id INTEGER 主鍵
|
|
36
|
+
doc_id INTEGER → documents.id
|
|
37
|
+
page_number INTEGER 頁碼(從 1 開始)
|
|
38
|
+
page_text TEXT 該頁全文文字(AI/LLM 使用此欄位)
|
|
39
|
+
char_count INTEGER 字元數
|
|
40
|
+
source_path TEXT 原始檔案完整路徑(引證用)
|
|
41
|
+
source_file TEXT 原始檔案名稱(引證用)
|
|
42
|
+
source_page INTEGER 原始頁碼(引證用,與 page_number 相同)
|
|
43
|
+
split_pdf TEXT 單頁獨立 PDF 路徑(可 NULL,僅在執行 doc split 後有值)
|
|
44
|
+
|
|
45
|
+
### pages_fts — FTS5 trigram 全文檢索虛擬表
|
|
46
|
+
搜尋語法: SELECT ... FROM pages_fts WHERE pages_fts MATCH '關鍵字'
|
|
47
|
+
特性: trigram tokenizer,天然支援繁體中文/日文/韓文(≥3 字元子字串搜尋)
|
|
48
|
+
|
|
49
|
+
### ingestion_log — 匯入紀錄(斷點續作)
|
|
50
|
+
source_path TEXT 原始檔案完整路徑(唯一鍵)
|
|
51
|
+
doc_id INTEGER 對應 documents.id(NULL 表示尚未完成)
|
|
52
|
+
status TEXT done | failed
|
|
53
|
+
pages INTEGER 成功匯入的頁數
|
|
54
|
+
ingested_at TEXT 完成時間(UTC)
|
|
55
|
+
error_msg TEXT 失敗原因(status=failed 時有值)
|
|
56
|
+
|
|
57
|
+
## AI/LLM 建議查詢模式
|
|
58
|
+
|
|
59
|
+
### 1. 全文搜尋(關鍵字引證)
|
|
60
|
+
SELECT p.source_file, p.source_page, d.title, d.doc_type,
|
|
61
|
+
snippet(pages_fts, 0, '【', '】', '...', 64) AS context
|
|
62
|
+
FROM pages_fts
|
|
63
|
+
JOIN pages p ON p.id = pages_fts.rowid
|
|
64
|
+
JOIN documents d ON d.id = p.doc_id
|
|
65
|
+
WHERE pages_fts MATCH '搜尋關鍵字'
|
|
66
|
+
ORDER BY rank LIMIT 10;
|
|
67
|
+
|
|
68
|
+
### 2. 取得特定文件的所有頁面
|
|
69
|
+
SELECT page_number, page_text, source_file, source_page
|
|
70
|
+
FROM pages WHERE doc_id = ? ORDER BY page_number;
|
|
71
|
+
|
|
72
|
+
### 3. 引證格式(LLM 回答時建議附上)
|
|
73
|
+
「根據《{title}》第 {source_page} 頁({source_file}):{context}」
|
|
74
|
+
|
|
75
|
+
### 4. 統計資訊
|
|
76
|
+
SELECT d.doc_type, COUNT(*) AS 文件數, SUM(d.total_pages) AS 總頁數
|
|
77
|
+
FROM documents d GROUP BY d.doc_type;
|
|
78
|
+
|
|
79
|
+
## 後續疊加建議
|
|
80
|
+
- 新增 tags 表:關聯 documents,支援多標籤分類
|
|
81
|
+
- 新增 summaries 表:儲存 AI 生成的頁面/章節摘要
|
|
82
|
+
- 新增 embeddings 表:儲存向量嵌入,支援語義搜尋
|
|
83
|
+
- 使用 SQLite 的 JSON 欄位擴充任意 metadata(documents.extra_json)
|
|
84
|
+
`.trim();
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* DocStore — 通用文件 SQLite 存儲層
|
|
88
|
+
*
|
|
89
|
+
* 單一職責:存取 SQLite,不含業務邏輯。
|
|
90
|
+
* 所有寫入操作皆使用 INSERT OR IGNORE,保證冪等性(可重複執行)。
|
|
91
|
+
*/
|
|
92
|
+
export class DocStore {
|
|
93
|
+
/**
|
|
94
|
+
* 開啟或建立 SQLite 資料庫
|
|
95
|
+
* @param {string} dbPath - 資料庫檔案路徑
|
|
96
|
+
*/
|
|
97
|
+
constructor(dbPath = './output/pdf_docs.db') {
|
|
98
|
+
const dir = path.dirname(dbPath);
|
|
99
|
+
if (!fs.existsSync(dir)) {
|
|
100
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
this.dbPath = dbPath;
|
|
104
|
+
this.db = new DatabaseSync(dbPath);
|
|
105
|
+
|
|
106
|
+
// WAL 模式:提升並發寫入效能(讀寫可同時進行)
|
|
107
|
+
this.db.exec('PRAGMA journal_mode = WAL');
|
|
108
|
+
// 外鍵約束
|
|
109
|
+
this.db.exec('PRAGMA foreign_keys = ON');
|
|
110
|
+
// 提升寫入效能(匯入大量頁面時明顯加速)
|
|
111
|
+
this.db.exec('PRAGMA synchronous = NORMAL');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* 建立所有資料表、FTS 虛擬表、觸發器與 _meta 說明
|
|
116
|
+
* 使用 IF NOT EXISTS,可安全重複呼叫(冪等)
|
|
117
|
+
*/
|
|
118
|
+
initSchema() {
|
|
119
|
+
// ── _meta:AI/LLM 友善的 schema 說明 ──
|
|
120
|
+
this.db.exec(`
|
|
121
|
+
CREATE TABLE IF NOT EXISTS _meta (
|
|
122
|
+
key TEXT PRIMARY KEY,
|
|
123
|
+
value TEXT NOT NULL
|
|
124
|
+
)
|
|
125
|
+
`);
|
|
126
|
+
|
|
127
|
+
// 插入使用說明(OR REPLACE:每次 initSchema 更新最新說明)
|
|
128
|
+
this.db.prepare(`
|
|
129
|
+
INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)
|
|
130
|
+
`).run('schema_version', '2');
|
|
131
|
+
this.db.prepare(`
|
|
132
|
+
INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)
|
|
133
|
+
`).run('usage_guide', META_USAGE_GUIDE);
|
|
134
|
+
this.db.prepare(`
|
|
135
|
+
INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)
|
|
136
|
+
`).run('created_by', 'botrun-horse');
|
|
137
|
+
this.db.prepare(`
|
|
138
|
+
INSERT OR REPLACE INTO _meta (key, value) VALUES (?, ?)
|
|
139
|
+
`).run('updated_at', new Date().toISOString());
|
|
140
|
+
|
|
141
|
+
// ── documents:文件 metadata 總表 ──
|
|
142
|
+
this.db.exec(`
|
|
143
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
144
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
145
|
+
source_path TEXT NOT NULL UNIQUE,
|
|
146
|
+
filename TEXT NOT NULL,
|
|
147
|
+
doc_type TEXT,
|
|
148
|
+
title TEXT,
|
|
149
|
+
total_pages INTEGER,
|
|
150
|
+
file_size INTEGER,
|
|
151
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
152
|
+
)
|
|
153
|
+
`);
|
|
154
|
+
|
|
155
|
+
// ── pages:逐頁文字(引證鏈核心)──
|
|
156
|
+
this.db.exec(`
|
|
157
|
+
CREATE TABLE IF NOT EXISTS pages (
|
|
158
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
159
|
+
doc_id INTEGER NOT NULL REFERENCES documents(id),
|
|
160
|
+
page_number INTEGER NOT NULL,
|
|
161
|
+
page_text TEXT,
|
|
162
|
+
char_count INTEGER,
|
|
163
|
+
source_path TEXT NOT NULL,
|
|
164
|
+
source_file TEXT NOT NULL,
|
|
165
|
+
source_page INTEGER NOT NULL,
|
|
166
|
+
split_pdf TEXT,
|
|
167
|
+
UNIQUE(doc_id, page_number)
|
|
168
|
+
)
|
|
169
|
+
`);
|
|
170
|
+
|
|
171
|
+
// ── pages_fts:FTS5 trigram 全文檢索 ──
|
|
172
|
+
// trigram tokenizer:三字元組比對,天然支援 CJK(繁中/日/韓)
|
|
173
|
+
this.db.exec(`
|
|
174
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts USING fts5(
|
|
175
|
+
page_text,
|
|
176
|
+
content='pages',
|
|
177
|
+
content_rowid='id',
|
|
178
|
+
tokenize='trigram case_sensitive 0 remove_diacritics 0'
|
|
179
|
+
)
|
|
180
|
+
`);
|
|
181
|
+
|
|
182
|
+
// ── FTS 自動同步觸發器 ──
|
|
183
|
+
this.db.exec(`
|
|
184
|
+
CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
|
|
185
|
+
INSERT INTO pages_fts(rowid, page_text) VALUES (new.id, new.page_text);
|
|
186
|
+
END
|
|
187
|
+
`);
|
|
188
|
+
this.db.exec(`
|
|
189
|
+
CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN
|
|
190
|
+
INSERT INTO pages_fts(pages_fts, rowid, page_text) VALUES('delete', old.id, old.page_text);
|
|
191
|
+
END
|
|
192
|
+
`);
|
|
193
|
+
this.db.exec(`
|
|
194
|
+
CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN
|
|
195
|
+
INSERT INTO pages_fts(pages_fts, rowid, page_text) VALUES('delete', old.id, old.page_text);
|
|
196
|
+
INSERT INTO pages_fts(rowid, page_text) VALUES (new.id, new.page_text);
|
|
197
|
+
END
|
|
198
|
+
`);
|
|
199
|
+
|
|
200
|
+
// ── ingestion_log:匯入紀錄(斷點續作)──
|
|
201
|
+
this.db.exec(`
|
|
202
|
+
CREATE TABLE IF NOT EXISTS ingestion_log (
|
|
203
|
+
source_path TEXT PRIMARY KEY,
|
|
204
|
+
doc_id INTEGER REFERENCES documents(id),
|
|
205
|
+
status TEXT NOT NULL DEFAULT 'done',
|
|
206
|
+
pages INTEGER,
|
|
207
|
+
ingested_at TEXT DEFAULT (datetime('now')),
|
|
208
|
+
error_msg TEXT
|
|
209
|
+
)
|
|
210
|
+
`);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ─────────────────────────────────────────────────────────
|
|
214
|
+
// ingestion_log:斷點續作
|
|
215
|
+
// ─────────────────────────────────────────────────────────
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* 檢查檔案是否已成功匯入(斷點續作用)
|
|
219
|
+
* @param {string} sourcePath - 原始檔案完整路徑
|
|
220
|
+
* @returns {boolean} true = 已匯入,可跳過
|
|
221
|
+
*/
|
|
222
|
+
isIngested(sourcePath) {
|
|
223
|
+
const row = this.db.prepare(
|
|
224
|
+
`SELECT status FROM ingestion_log WHERE source_path = ?`
|
|
225
|
+
).get(sourcePath);
|
|
226
|
+
return row?.status === 'done';
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* 記錄匯入成功
|
|
231
|
+
* @param {string} sourcePath
|
|
232
|
+
* @param {number} docId
|
|
233
|
+
* @param {number} pages
|
|
234
|
+
*/
|
|
235
|
+
logIngested(sourcePath, docId, pages) {
|
|
236
|
+
this.db.prepare(`
|
|
237
|
+
INSERT OR REPLACE INTO ingestion_log (source_path, doc_id, status, pages, ingested_at)
|
|
238
|
+
VALUES (?, ?, 'done', ?, datetime('now'))
|
|
239
|
+
`).run(sourcePath, docId, pages);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* 記錄匯入失敗
|
|
244
|
+
* @param {string} sourcePath
|
|
245
|
+
* @param {string} errorMsg
|
|
246
|
+
*/
|
|
247
|
+
logFailed(sourcePath, errorMsg) {
|
|
248
|
+
this.db.prepare(`
|
|
249
|
+
INSERT OR REPLACE INTO ingestion_log (source_path, status, error_msg, ingested_at)
|
|
250
|
+
VALUES (?, 'failed', ?, datetime('now'))
|
|
251
|
+
`).run(sourcePath, errorMsg);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// ─────────────────────────────────────────────────────────
|
|
255
|
+
// documents:文件 metadata CRUD
|
|
256
|
+
// ─────────────────────────────────────────────────────────
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* 新增文件 metadata(冪等:已存在時回傳現有 ID)
|
|
260
|
+
*
|
|
261
|
+
* @param {Object} doc
|
|
262
|
+
* @param {string} doc.sourcePath - 來源檔案路徑(唯一鍵)
|
|
263
|
+
* @param {string} doc.filename - 檔案名稱
|
|
264
|
+
* @param {string} [doc.docType] - 文件類型
|
|
265
|
+
* @param {string} [doc.title] - 文件標題
|
|
266
|
+
* @param {number} [doc.totalPages] - 總頁數
|
|
267
|
+
* @param {number} [doc.fileSize] - 檔案大小(bytes)
|
|
268
|
+
* @returns {number} doc_id(新建或現有)
|
|
269
|
+
*/
|
|
270
|
+
insertDocument({ sourcePath, filename, docType = null, title = null, totalPages = null, fileSize = null }) {
|
|
271
|
+
// OR IGNORE:source_path 重複時靜默忽略
|
|
272
|
+
this.db.prepare(`
|
|
273
|
+
INSERT OR IGNORE INTO documents (source_path, filename, doc_type, title, total_pages, file_size)
|
|
274
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
275
|
+
`).run(sourcePath, filename, docType, title, totalPages, fileSize);
|
|
276
|
+
|
|
277
|
+
// 回傳現有或剛插入的 ID
|
|
278
|
+
const row = this.db.prepare(`SELECT id FROM documents WHERE source_path = ?`).get(sourcePath);
|
|
279
|
+
return Number(row.id);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* 新增單頁文字內容(冪等:(doc_id, page_number) 重複時靜默忽略)
|
|
284
|
+
*
|
|
285
|
+
* @param {Object} page
|
|
286
|
+
* @param {number} page.docId - 所屬文件 ID
|
|
287
|
+
* @param {number} page.pageNumber - 頁碼
|
|
288
|
+
* @param {string} [page.pageText] - 頁面文字內容
|
|
289
|
+
* @param {string} page.sourcePath - 來源檔案完整路徑(引證用)
|
|
290
|
+
* @param {string} page.sourceFile - 來源檔案名稱(引證用)
|
|
291
|
+
* @param {number} page.sourcePage - 來源頁碼(引證用)
|
|
292
|
+
* @param {string} [page.splitPdf] - 拆分後的單頁 PDF 路徑
|
|
293
|
+
* @returns {number} page_id(新建或現有)
|
|
294
|
+
*/
|
|
295
|
+
insertPage({ docId, pageNumber, pageText = null, sourcePath, sourceFile, sourcePage, splitPdf = null }) {
|
|
296
|
+
const charCount = pageText ? pageText.length : 0;
|
|
297
|
+
// OR IGNORE:(doc_id, page_number) 重複時靜默忽略
|
|
298
|
+
this.db.prepare(`
|
|
299
|
+
INSERT OR IGNORE INTO pages
|
|
300
|
+
(doc_id, page_number, page_text, char_count, source_path, source_file, source_page, split_pdf)
|
|
301
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
302
|
+
`).run(docId, pageNumber, pageText, charCount, sourcePath, sourceFile, sourcePage, splitPdf);
|
|
303
|
+
|
|
304
|
+
const row = this.db.prepare(
|
|
305
|
+
`SELECT id FROM pages WHERE doc_id = ? AND page_number = ?`
|
|
306
|
+
).get(docId, pageNumber);
|
|
307
|
+
return Number(row.id);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// ─────────────────────────────────────────────────────────
|
|
311
|
+
// 查詢
|
|
312
|
+
// ─────────────────────────────────────────────────────────
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* FTS5 trigram 全文檢索
|
|
316
|
+
* 自動將查詢字串拆成三字元組,天然支援 CJK 子字串搜尋(≥3 字元)
|
|
317
|
+
*
|
|
318
|
+
* @param {string} query - 搜尋關鍵字(≥3 字元)
|
|
319
|
+
* @param {number} [limit] - 最多回傳筆數(預設不限)
|
|
320
|
+
* @returns {Array<Object>} 搜尋結果(含引證資訊與 snippet)
|
|
321
|
+
*/
|
|
322
|
+
search(query, limit = null) {
|
|
323
|
+
if (!query || query.trim().length === 0) return [];
|
|
324
|
+
|
|
325
|
+
let sql = `
|
|
326
|
+
SELECT
|
|
327
|
+
p.doc_id,
|
|
328
|
+
p.id AS page_id,
|
|
329
|
+
p.page_number,
|
|
330
|
+
p.source_path,
|
|
331
|
+
p.source_file,
|
|
332
|
+
p.source_page,
|
|
333
|
+
p.split_pdf,
|
|
334
|
+
snippet(pages_fts, 0, '【', '】', '...', 64) AS snippet,
|
|
335
|
+
d.doc_type,
|
|
336
|
+
d.title
|
|
337
|
+
FROM pages_fts
|
|
338
|
+
JOIN pages p ON p.id = pages_fts.rowid
|
|
339
|
+
JOIN documents d ON d.id = p.doc_id
|
|
340
|
+
WHERE pages_fts MATCH ?
|
|
341
|
+
ORDER BY rank
|
|
342
|
+
`;
|
|
343
|
+
if (limit && limit > 0) sql += ` LIMIT ${parseInt(limit)}`;
|
|
344
|
+
|
|
345
|
+
return this.db.prepare(sql).all(query);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* 依 ID 取得單一文件 metadata
|
|
350
|
+
* @param {number} id - 文件 ID
|
|
351
|
+
* @returns {Object|undefined}
|
|
352
|
+
*/
|
|
353
|
+
getDocument(id) {
|
|
354
|
+
return this.db.prepare('SELECT * FROM documents WHERE id = ?').get(id);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* 取得指定文件的所有頁面(依頁碼排序)
|
|
359
|
+
* @param {number} docId - 文件 ID
|
|
360
|
+
* @returns {Array<Object>}
|
|
361
|
+
*/
|
|
362
|
+
getPages(docId) {
|
|
363
|
+
return this.db.prepare('SELECT * FROM pages WHERE doc_id = ? ORDER BY page_number').all(docId);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* 取得資料庫統計資訊
|
|
368
|
+
* @returns {{ documents, pages, totalChars, byType, ingestionLog }}
|
|
369
|
+
*/
|
|
370
|
+
stats() {
|
|
371
|
+
const docCount = this.db.prepare('SELECT COUNT(*) AS count FROM documents').get();
|
|
372
|
+
const pageCount = this.db.prepare('SELECT COUNT(*) AS count FROM pages').get();
|
|
373
|
+
const totalChars = this.db.prepare('SELECT COALESCE(SUM(char_count), 0) AS total FROM pages').get();
|
|
374
|
+
|
|
375
|
+
const byType = this.db.prepare(`
|
|
376
|
+
SELECT doc_type, COUNT(*) AS count
|
|
377
|
+
FROM documents
|
|
378
|
+
GROUP BY doc_type
|
|
379
|
+
ORDER BY count DESC
|
|
380
|
+
`).all();
|
|
381
|
+
|
|
382
|
+
const ingestionLog = this.db.prepare(`
|
|
383
|
+
SELECT status, COUNT(*) AS count
|
|
384
|
+
FROM ingestion_log
|
|
385
|
+
GROUP BY status
|
|
386
|
+
`).all();
|
|
387
|
+
|
|
388
|
+
return {
|
|
389
|
+
documents: docCount.count,
|
|
390
|
+
pages: pageCount.count,
|
|
391
|
+
totalChars: totalChars.total,
|
|
392
|
+
byType,
|
|
393
|
+
ingestionLog,
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* 取得 _meta 表中的 schema 使用說明(供 AI/LLM 參考)
|
|
399
|
+
* @returns {string} 使用說明 Markdown
|
|
400
|
+
*/
|
|
401
|
+
getUsageGuide() {
|
|
402
|
+
const row = this.db.prepare(`SELECT value FROM _meta WHERE key = 'usage_guide'`).get();
|
|
403
|
+
return row?.value || '';
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* 關閉資料庫連線
|
|
408
|
+
*/
|
|
409
|
+
close() {
|
|
410
|
+
this.db.close();
|
|
411
|
+
}
|
|
412
|
+
}
|
package/lib/core/env.mjs
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
// lib/core/env.mjs — 零依賴 .env 載入器
|
|
2
|
+
//
|
|
3
|
+
// 設計原則:KISS / DRY
|
|
4
|
+
// - 不引入 dotenv npm 套件,純 Node.js fs 讀取
|
|
5
|
+
// - 預設覆蓋已有環境變數(.env 是專案級設定,優先於 shell 全域 export)
|
|
6
|
+
// - 支援 # 註解、空行、雙引號值、等號左右無空格
|
|
7
|
+
//
|
|
8
|
+
// 使用方式(必須在所有 import 之前):
|
|
9
|
+
// import { loadEnv } from '../core/env.mjs';
|
|
10
|
+
// loadEnv(); // 自動從專案根目錄讀取 .env
|
|
11
|
+
|
|
12
|
+
import fs from 'fs';
|
|
13
|
+
import path from 'path';
|
|
14
|
+
import { fileURLToPath } from 'url';
|
|
15
|
+
|
|
16
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
const PROJECT_ROOT = path.resolve(__dirname, '../..');
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* 載入 .env 檔案到 process.env
|
|
21
|
+
*
|
|
22
|
+
* @param {string} [envPath] - .env 檔案路徑(預設為專案根目錄 .env)
|
|
23
|
+
* @param {object} [opts]
|
|
24
|
+
* @param {boolean} [opts.override=true] - .env 優先覆蓋 shell 全域環境變數
|
|
25
|
+
* @returns {number} 成功載入的變數數量
|
|
26
|
+
*/
|
|
27
|
+
export function loadEnv(envPath, opts = {}) {
|
|
28
|
+
const filePath = envPath || path.join(PROJECT_ROOT, '.env');
|
|
29
|
+
let content;
|
|
30
|
+
try {
|
|
31
|
+
content = fs.readFileSync(filePath, 'utf-8');
|
|
32
|
+
} catch {
|
|
33
|
+
return 0; // 檔案不存在,靜默跳過
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
let count = 0;
|
|
37
|
+
for (const line of content.split('\n')) {
|
|
38
|
+
const trimmed = line.trim();
|
|
39
|
+
// 跳過空行和註解
|
|
40
|
+
if (!trimmed || trimmed.startsWith('#')) continue;
|
|
41
|
+
|
|
42
|
+
// 解析 KEY=VALUE(支援 export KEY=VALUE)
|
|
43
|
+
const match = trimmed.match(/^(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)=(.*)$/);
|
|
44
|
+
if (!match) continue;
|
|
45
|
+
|
|
46
|
+
const key = match[1];
|
|
47
|
+
let value = match[2].trim();
|
|
48
|
+
|
|
49
|
+
// 去掉包裹的引號("value" 或 'value')
|
|
50
|
+
if ((value.startsWith('"') && value.endsWith('"')) ||
|
|
51
|
+
(value.startsWith("'") && value.endsWith("'"))) {
|
|
52
|
+
value = value.slice(1, -1);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// .env 專案級設定優先於 shell 全域 export(除非 override=false)
|
|
56
|
+
const override = opts.override ?? true;
|
|
57
|
+
if (!override && process.env[key] !== undefined) continue;
|
|
58
|
+
|
|
59
|
+
process.env[key] = value;
|
|
60
|
+
count++;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return count;
|
|
64
|
+
}
|
package/lib/core/llm.mjs
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// lib/core/llm.mjs — LLM 統一介面 (Factory Pattern)
|
|
2
|
+
// 遵循 SOLID OCP:新增 provider 不需修改既有程式碼
|
|
3
|
+
//
|
|
4
|
+
// Provider 選擇策略:
|
|
5
|
+
// 1. 明確指定 provider 參數
|
|
6
|
+
// 2. 若 provider='gemini-auto'(或未指定)→ 自動偵測:
|
|
7
|
+
// - 有 GOOGLE_CLOUD_PROJECT + ADC → gemini(Vertex AI)
|
|
8
|
+
// - 有 GEMINI_API_KEY → gemini-api(Direct API)
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* 自動偵測最佳 Gemini provider
|
|
12
|
+
* @returns {'gemini' | 'gemini-api' | null}
|
|
13
|
+
*/
|
|
14
|
+
function detectGeminiProvider() {
|
|
15
|
+
if (process.env.GOOGLE_CLOUD_PROJECT) return 'gemini';
|
|
16
|
+
if (process.env.GEMINI_API_KEY) return 'gemini-api';
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* 建立 LLM adapter
|
|
22
|
+
* @param {object} config - { provider, ...adapterOpts }
|
|
23
|
+
* @param {string} [config.provider='gemini-auto'] - LLM provider
|
|
24
|
+
* @returns {Promise<object>} adapter 實例
|
|
25
|
+
*/
|
|
26
|
+
export async function createLLM(config = {}) {
|
|
27
|
+
const { provider: rawProvider, ...opts } = config;
|
|
28
|
+
|
|
29
|
+
// 自動偵測
|
|
30
|
+
const provider = (rawProvider === 'gemini-auto' || !rawProvider)
|
|
31
|
+
? (detectGeminiProvider() || 'gemini')
|
|
32
|
+
: rawProvider;
|
|
33
|
+
|
|
34
|
+
switch (provider) {
|
|
35
|
+
case 'gemini': {
|
|
36
|
+
const { GeminiVertexAdapter } = await import('./adapters/gemini-vertex.mjs');
|
|
37
|
+
return new GeminiVertexAdapter(opts);
|
|
38
|
+
}
|
|
39
|
+
case 'gemini-api': {
|
|
40
|
+
const { GeminiApiAdapter } = await import('./adapters/gemini-api.mjs');
|
|
41
|
+
return new GeminiApiAdapter(opts);
|
|
42
|
+
}
|
|
43
|
+
case 'claude': {
|
|
44
|
+
const { ClaudeAdapter } = await import('./adapters/claude.mjs');
|
|
45
|
+
return new ClaudeAdapter(opts);
|
|
46
|
+
}
|
|
47
|
+
case 'nchc': {
|
|
48
|
+
const { NchcAdapter } = await import('./adapters/nchc.mjs');
|
|
49
|
+
return new NchcAdapter(opts);
|
|
50
|
+
}
|
|
51
|
+
case 'openrouter': {
|
|
52
|
+
const { OpenRouterAdapter } = await import('./adapters/openrouter.mjs');
|
|
53
|
+
return new OpenRouterAdapter(opts);
|
|
54
|
+
}
|
|
55
|
+
default:
|
|
56
|
+
throw new Error(`LLM provider "${provider}" 尚未實作。可用: gemini, gemini-api, claude, nchc, openrouter`);
|
|
57
|
+
}
|
|
58
|
+
}
|