npm - botrun-horse - Versions diffs - 1.0.0 - Mend

botrun-horse 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/README.md +1 -0
package/bin/bh.mjs +193 -0
package/bin/commands/dag-cmd.mjs +74 -0
package/bin/commands/db-cmd.mjs +73 -0
package/bin/commands/doc.mjs +185 -0
package/bin/commands/gemini.mjs +120 -0
package/bin/commands/help.mjs +109 -0
package/bin/commands/legal.mjs +174 -0
package/bin/commands/nchc.mjs +212 -0
package/bin/commands/openrouter.mjs +154 -0
package/bin/commands/prompt.mjs +175 -0
package/bin/commands/schema.mjs +258 -0
package/bin/commands/search.mjs +46 -0
package/bin/commands/writing.mjs +33 -0
package/lib/core/adapters/base.mjs +52 -0
package/lib/core/adapters/claude.mjs +13 -0
package/lib/core/adapters/gemini-api.mjs +174 -0
package/lib/core/adapters/gemini-shared.mjs +164 -0
package/lib/core/adapters/gemini-vertex.mjs +232 -0
package/lib/core/adapters/local.mjs +13 -0
package/lib/core/adapters/nchc.mjs +236 -0
package/lib/core/adapters/openai-shared.mjs +34 -0
package/lib/core/adapters/openrouter.mjs +304 -0
package/lib/core/ai-cache.mjs +277 -0
package/lib/core/ai-router.mjs +217 -0
package/lib/core/cli-utils.mjs +170 -0
package/lib/core/dag.mjs +114 -0
package/lib/core/db.mjs +412 -0
package/lib/core/env.mjs +64 -0
package/lib/core/llm.mjs +58 -0
package/lib/core/paths.mjs +115 -0
package/lib/core/proxy.mjs +46 -0
package/lib/core/watermelon.mjs +9 -0
package/lib/doc/index.mjs +419 -0
package/lib/doc/office2text.mjs +234 -0
package/lib/doc/pdf2text.mjs +133 -0
package/lib/doc/split.mjs +132 -0
package/lib/flows/draft-writing.mjs +29 -0
package/lib/flows/gemini-ask.mjs +185 -0
package/lib/flows/hatch-portal.mjs +13 -0
package/lib/flows/legal-ask.mjs +325 -0
package/lib/flows/openai-agent.mjs +167 -0
package/lib/flows/opencode-agent.mjs +240 -0
package/lib/flows/openrouter-ask.mjs +111 -0
package/lib/flows/review-doc.mjs +18 -0
package/lib/ocr/index.mjs +6 -0
package/lib/portal/hatch.mjs +6 -0
package/lib/portal/index.mjs +6 -0
package/lib/prompt/prompt-search.mjs +55 -0
package/lib/prompt/prompt-store.mjs +94 -0
package/lib/prompt/prompts/zero-framework/coding.md +15 -0
package/lib/prompt/prompts/zero-framework/search.md +12 -0
package/lib/prompt/prompts/zero-framework/slice.md +11 -0
package/lib/search/crawler.mjs +6 -0
package/lib/search/index.mjs +7 -0
package/lib/tools/fs-tools.mjs +268 -0
package/lib/tools/index.mjs +27 -0
package/lib/writing/generate.mjs +86 -0
package/lib/writing/generators/nstc-generators.mjs +279 -0
package/lib/writing/generators/nstc-top5.mjs +554 -0
package/lib/writing/index.mjs +5 -0
package/lib/writing/layouts/nstc-layout.mjs +249 -0
package/lib/writing/renderer.mjs +61 -0
package/package.json +35 -0

package/lib/doc/office2text.mjs ADDED Viewed

@@ -0,0 +1,234 @@
+// lib/doc/office2text.mjs — Office 全系列文件轉文字
+//
+// 核心設計（方案 E 混合最佳化）：
+//   LibreOffice --headless --convert-to txt:Text 直接輸出純文字，
+//   跳過「先轉 PDF 再用 pdftotext」的中間步驟，更快、更省資源。
+//
+//   頁面分割：LibreOffice 無法直接輸出 page-by-page 文字，
+//   改用換頁符號（\f，ASCII 12）分割頁面。
+//   此方法對 Writer/Impress 文件效果良好；
+//   Calc 試算表則以每個 sheet 作為一頁。
+//
+// 支援格式：
+//   Writer:  .doc .docx .odt .rtf .odm
+//   Impress: .ppt .pptx .odp
+//   Calc:    .xls .xlsx .ods .csv
+//
+// 平台需求：libreoffice 或 soffice 在 PATH 中
+//   Ubuntu/Debian: apt install libreoffice
+//   macOS: brew install --cask libreoffice
+//   Docker: ghcr.io/jlesage/libreoffice
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+import { mkdtemp, rm, readdir, readFile } from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+import { toolBin } from '../core/paths.mjs';
+const execFileAsync = promisify(execFile);
+/** 支援的 Office 副檔名（小寫） */
+const OFFICE_EXTENSIONS = new Set([
+  '.doc', '.docx', '.odt', '.rtf', '.odm', '.fodt',
+  '.ppt', '.pptx', '.odp', '.fodp',
+  '.xls', '.xlsx', '.ods', '.fods', '.csv',
+]);
+/** 純文字副檔名（小寫） */
+const TEXT_EXTENSIONS = new Set(['.txt', '.text', '.md', '.markdown', '.rst', '.log']);
+/**
+ * 判斷檔案是否為支援的 Office 格式
+ *
+ * @param {string} filePath - 檔案路徑
+ * @returns {boolean}
+ */
+export function isOfficeFile(filePath) {
+  return OFFICE_EXTENSIONS.has(path.extname(filePath).toLowerCase());
+}
+/**
+ * 判斷檔案是否為純文字格式（.txt / .md / .rst 等）
+ *
+ * @param {string} filePath - 檔案路徑
+ * @returns {boolean}
+ */
+export function isTextFile(filePath) {
+  return TEXT_EXTENSIONS.has(path.extname(filePath).toLowerCase());
+}
+/**
+ * 將純文字檔案轉為逐頁文字陣列
+ *
+ * 分頁策略：
+ *   - 若包含換頁符號（\f）→ 依 \f 分頁
+ *   - 否則以每 N 行為一頁（預設 80 行/頁，近似 A4 文字頁）
+ *
+ * @param {string} filePath  - 純文字檔案路徑
+ * @param {number} linesPerPage - 每頁行數（預設 80）
+ * @returns {Promise<Array<{page: number, text: string}>>}
+ */
+export async function extractTextFilePages(filePath, linesPerPage = 80) {
+  const { readFile } = await import('node:fs/promises');
+  const content = await readFile(path.resolve(filePath), 'utf-8');
+  // 優先使用換頁符號分頁
+  if (content.includes('\f')) {
+    const rawPages = content.split('\f');
+    return rawPages
+      .map((text, idx) => ({ page: idx + 1, text: text.trim() }))
+      .filter(p => p.text.length > 0);
+  }
+  // 否則以行數分頁
+  const lines = content.split('\n');
+  if (lines.length <= linesPerPage) {
+    return [{ page: 1, text: content.trim() }];
+  }
+  const pages = [];
+  for (let i = 0; i < lines.length; i += linesPerPage) {
+    const pageText = lines.slice(i, i + linesPerPage).join('\n').trim();
+    if (pageText.length > 0) {
+      pages.push({ page: Math.floor(i / linesPerPage) + 1, text: pageText });
+    }
+  }
+  return pages.length > 0 ? pages : [{ page: 1, text: content.trim() }];
+}
+/**
+ * 取得 libreoffice 執行檔路徑
+ * 依序嘗試 libreoffice、soffice（舊版 LibreOffice 可執行檔名稱）
+ *
+ * @returns {string} libreoffice 執行檔路徑
+ */
+function getLoBin() {
+  // 先嘗試 libreoffice，再嘗試 soffice
+  const lo = toolBin('libreoffice');
+  if (lo !== 'libreoffice') return lo;
+  return toolBin('soffice', 'libreoffice');
+}
+/**
+ * 呼叫 LibreOffice headless 轉換單一文件
+ *
+ * @param {string} absPath  - 輸入檔案絕對路徑
+ * @param {string} format   - 目標格式（'txt:Text' | 'pdf'）
+ * @param {string} outDir   - 輸出目錄
+ * @param {number} timeout  - 逾時毫秒（預設 120 秒）
+ */
+async function loConvert(absPath, format, outDir, timeout = 120000) {
+  const lo = getLoBin();
+  await execFileAsync(lo, [
+    '--headless',
+    '--convert-to', format,
+    '--outdir', outDir,
+    absPath,
+  ], {
+    timeout,
+    maxBuffer: 32 * 1024 * 1024, // 32MB stdout buffer
+  });
+}
+/**
+ * 將 Office 文件轉為純文字（單一字串）
+ *
+ * @param {string} filePath - Office 文件路徑
+ * @returns {Promise<string>} 文字內容
+ */
+export async function extractOfficeText(filePath) {
+  const absPath = path.resolve(filePath);
+  const tmpDir = await mkdtemp(path.join(os.tmpdir(), 'bh-office-'));
+  try {
+    await loConvert(absPath, 'txt:Text', tmpDir);
+    // LibreOffice 輸出檔名 = 原始檔名（去副檔名）+ .txt
+    const baseName = path.basename(absPath, path.extname(absPath));
+    const txtPath = path.join(tmpDir, `${baseName}.txt`);
+    const content = await readFile(txtPath, 'utf-8');
+    return content;
+  } finally {
+    await rm(tmpDir, { recursive: true, force: true });
+  }
+}
+/**
+ * 將 Office 文件轉為逐頁文字陣列
+ *
+ * 分頁策略：
+ *   - Writer/Impress: LibreOffice 在輸出文字中插入換頁符號（\f，ASCII 12）
+ *   - Calc: 先轉 PDF，再用 pdftotext 按頁萃取（保留 sheet 分頁）
+ *
+ * @param {string} filePath - Office 文件路徑
+ * @returns {Promise<Array<{page: number, text: string}>>} 逐頁文字陣列
+ */
+export async function extractOfficePages(filePath) {
+  const absPath = path.resolve(filePath);
+  const ext = path.extname(absPath).toLowerCase();
+  // Calc 類型：先轉 PDF，再用 pdftotext 按頁萃取
+  const calcExts = new Set(['.xls', '.xlsx', '.ods', '.fods', '.csv']);
+  if (calcExts.has(ext)) {
+    return extractCalcPages(absPath);
+  }
+  // Writer / Impress 類型：直接轉 txt，用換頁符號分割
+  return extractWriterImpressPages(absPath);
+}
+/**
+ * Writer/Impress 逐頁萃取
+ * LibreOffice 輸出的 txt 中，頁面邊界為 \f（換頁符號，ASCII 12）
+ */
+async function extractWriterImpressPages(absPath) {
+  const tmpDir = await mkdtemp(path.join(os.tmpdir(), 'bh-office-'));
+  try {
+    await loConvert(absPath, 'txt:Text', tmpDir);
+    const baseName = path.basename(absPath, path.extname(absPath));
+    const txtPath = path.join(tmpDir, `${baseName}.txt`);
+    const content = await readFile(txtPath, 'utf-8');
+    // 以換頁符號（\f）分割頁面
+    const rawPages = content.split('\f');
+    // 清理頁面文字（去除首尾空白，過濾空頁）
+    const pages = rawPages
+      .map((text, idx) => ({ page: idx + 1, text: text.trim() }))
+      .filter(p => p.text.length > 0);
+    // 若無換頁符號（如純文字 .txt），整份文件視為第 1 頁
+    if (pages.length === 0) {
+      return [{ page: 1, text: content.trim() }];
+    }
+    return pages;
+  } finally {
+    await rm(tmpDir, { recursive: true, force: true });
+  }
+}
+/**
+ * Calc (試算表) 逐頁萃取
+ * 先轉 PDF（LibreOffice 會保留頁碼），再用 pdftotext 按頁萃取
+ */
+async function extractCalcPages(absPath) {
+  const tmpDir = await mkdtemp(path.join(os.tmpdir(), 'bh-office-'));
+  try {
+    await loConvert(absPath, 'pdf', tmpDir);
+    const baseName = path.basename(absPath, path.extname(absPath));
+    const pdfPath = path.join(tmpDir, `${baseName}.pdf`);
+    // 動態匯入 pdf2text，避免循環依賴
+    const { extractAllPages } = await import('./pdf2text.mjs');
+    const pages = await extractAllPages(pdfPath);
+    return pages;
+  } finally {
+    await rm(tmpDir, { recursive: true, force: true });
+  }
+}

package/lib/doc/pdf2text.mjs ADDED Viewed

@@ -0,0 +1,133 @@
+// lib/doc/pdf2text.mjs — PDF 轉純文字模組
+//
+// 核心設計（方案 E 混合最佳化）：
+//   直接從原始 PDF 按頁萃取，不需要先拆頁成獨立 PDF 檔案。
+//   pdftotext -f {page} -l {page} -layout -enc UTF-8 {input} -
+//   stdout 輸出，支援 Unix pipe。
+//
+// 平行策略：
+//   - 單一 PDF 的所有頁面以 Promise.all 平行萃取（I/O bound）
+//   - 多個 PDF 的批次平行由上層（ingestBatch）控制 concurrency
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+import { toolBin } from '../core/paths.mjs';
+const execFileAsync = promisify(execFile);
+/**
+ * 取得 PDF 總頁數
+ * 使用 pdfinfo 解析 "Pages: N" 輸出
+ *
+ * @param {string} inputPath - PDF 檔案路徑
+ * @returns {Promise<number>} 總頁數
+ */
+export async function getPageCount(inputPath) {
+  const pdfinfo = toolBin('pdfinfo');
+  const { stdout } = await execFileAsync(pdfinfo, [inputPath]);
+  const match = stdout.match(/^Pages:\s+(\d+)/m);
+  if (!match) {
+    throw new Error(`無法從 pdfinfo 輸出中解析總頁數：${inputPath}`);
+  }
+  return parseInt(match[1], 10);
+}
+/**
+ * 萃取指定頁面的文字內容
+ *
+ * 直接從原始 PDF 萃取，不需要先 split：
+ *   pdftotext -f {page} -l {page} -layout -enc UTF-8 {input} -
+ *
+ * -layout    保留原始排版佈局（段落、縮排）
+ * -enc UTF-8 確保繁體中文、日文、韓文等 CJK 字符正確輸出
+ *
+ * @param {string} inputPath - PDF 檔案路徑
+ * @param {number} pageNum   - 頁碼（從 1 開始）
+ * @returns {Promise<{page: number, text: string}>} 該頁的文字資料
+ */
+export async function extractPage(inputPath, pageNum) {
+  const pdftotext = toolBin('pdftotext');
+  const args = [
+    '-f', String(pageNum),   // 起始頁
+    '-l', String(pageNum),   // 結束頁（與起始頁相同 = 單頁）
+    '-layout',               // 保留原始排版佈局
+    '-enc', 'UTF-8',         // 強制 UTF-8 輸出（繁體中文必要）
+    '-nopgbrk',              // 不在頁尾加分頁符號（AI 友善）
+    inputPath,               // 輸入 PDF 檔案
+    '-',                     // 輸出到 stdout
+  ];
+  const { stdout } = await execFileAsync(pdftotext, args, {
+    maxBuffer: 64 * 1024 * 1024, // 64MB，支援大型頁面
+  });
+  return {
+    page: pageNum,
+    text: stdout,
+  };
+}
+/**
+ * 萃取 PDF 所有頁面的文字內容（平行執行）
+ *
+ * 所有頁面同時發出 pdftotext 子程序（I/O bound，平行效益高）。
+ * 若頁數極多（>50頁），建議呼叫方改用 extractPagesBatched 控制上限。
+ *
+ * @param {string} inputPath - PDF 檔案路徑
+ * @returns {Promise<Array<{page: number, text: string}>>} 各頁文字資料陣列
+ */
+export async function extractAllPages(inputPath) {
+  const totalPages = await getPageCount(inputPath);
+  const promises = [];
+  for (let i = 1; i <= totalPages; i++) {
+    promises.push(extractPage(inputPath, i));
+  }
+  const results = await Promise.all(promises);
+  return results;
+}
+/**
+ * 萃取 PDF 所有頁面文字（可設定最大並發數）
+ *
+ * 適用於頁數很多的大型 PDF，避免同時開啟過多子程序。
+ * 類似 GNU parallel 的 --jobs N 設計。
+ *
+ * @param {string} inputPath   - PDF 檔案路徑
+ * @param {number} concurrency - 最大並發頁面數（預設 10）
+ * @returns {Promise<Array<{page: number, text: string}>>} 各頁文字資料陣列
+ */
+export async function extractAllPagesConcurrent(inputPath, concurrency = 10) {
+  const totalPages = await getPageCount(inputPath);
+  const results = new Array(totalPages);
+  // Semaphore 實作（類 GNU parallel -j N）
+  let running = 0;
+  let next = 0;
+  await new Promise((resolve, reject) => {
+    function schedule() {
+      while (running < concurrency && next < totalPages) {
+        const pageNum = next + 1;
+        const idx = next;
+        next++;
+        running++;
+        extractPage(inputPath, pageNum)
+          .then(r => {
+            results[idx] = r;
+            running--;
+            if (next < totalPages) {
+              schedule();
+            } else if (running === 0) {
+              resolve();
+            }
+          })
+          .catch(err => reject(err));
+      }
+      if (next >= totalPages && running === 0) resolve();
+    }
+    schedule();
+  });
+  return results;
+}

package/lib/doc/split.mjs ADDED Viewed

@@ -0,0 +1,132 @@
+// lib/doc/split.mjs — PDF 拆頁模組
+//
+// 核心設計（方案 E）：
+//   「轉文字」不需要先拆頁 — pdftotext -f/-l 直接按頁萃取。
+//   本模組的拆頁功能僅在需要「頁面級別獨立 PDF 檔案」時使用，
+//   例如：提供給 LLM 做圖像辨識、或作為頁面精準引證的附件。
+//
+// 工具：qpdf（跨平台，比 pdfseparate 更能精確控制輸出檔名格式）
+// 平行策略：所有頁面同時以 Promise.all 平行拆分（I/O bound）
+// 命名格式：{原始檔名}_page_{三位數頁碼}.pdf（如 report_page_001.pdf）
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+import { mkdir } from 'node:fs/promises';
+import path from 'node:path';
+import { toolBin } from '../core/paths.mjs';
+const execFileAsync = promisify(execFile);
+/**
+ * 取得 PDF 總頁數（使用 qpdf --show-npages）
+ *
+ * @param {string} inputPath - PDF 檔案路徑
+ * @returns {Promise<number>} 總頁數
+ */
+export async function getPdfPageCount(inputPath) {
+  const qpdf = toolBin('qpdf');
+  const absInput = path.resolve(inputPath);
+  const { stdout } = await execFileAsync(qpdf, ['--show-npages', absInput]);
+  const count = parseInt(stdout.trim(), 10);
+  if (Number.isNaN(count) || count <= 0) {
+    throw new Error(`無法解析頁數，qpdf 輸出: "${stdout.trim()}"`);
+  }
+  return count;
+}
+/**
+ * 拆分單頁 PDF（內部用）
+ *
+ * @param {string} qpdf       - qpdf 執行檔路徑
+ * @param {string} absInput   - 來源 PDF 絕對路徑
+ * @param {number} page       - 頁碼
+ * @param {string} outputPath - 輸出 PDF 絕對路徑
+ */
+async function splitOnePage(qpdf, absInput, page, outputPath) {
+  await execFileAsync(qpdf, [
+    absInput,
+    '--pages', '.', String(page),
+    '--',
+    outputPath,
+  ]);
+}
+/**
+ * 將多頁 PDF 拆為單頁獨立 PDF（平行執行）
+ *
+ * 所有頁面同時啟動 qpdf 子程序（I/O bound，CPU 佔用低）。
+ * 類似 GNU parallel 的 --jobs 0（使用所有可用核心）設計。
+ *
+ * 輸出格式：{原始檔名}_page_{三位數頁碼}.pdf
+ *   例如: report_page_001.pdf, report_page_012.pdf, report_page_100.pdf
+ *
+ * @param {string} inputPath  - 來源 PDF 檔案路徑
+ * @param {string} outputDir  - 輸出目錄路徑（不存在時自動建立）
+ * @param {object} [opts]
+ * @param {number} [opts.concurrency] - 最大並發數（預設不限，全部平行）
+ * @returns {Promise<Array<{ page: number, path: string }>>} 拆頁結果陣列
+ */
+export async function splitPdf(inputPath, outputDir, opts = {}) {
+  const qpdf = toolBin('qpdf');
+  const absInput = path.resolve(inputPath);
+  const absOutputDir = path.resolve(outputDir);
+  await mkdir(absOutputDir, { recursive: true });
+  const totalPages = await getPdfPageCount(absInput);
+  const baseName = path.basename(absInput, '.pdf');
+  // 預建所有輸出路徑
+  const tasks = [];
+  for (let page = 1; page <= totalPages; page++) {
+    const pageStr = String(page).padStart(3, '0');
+    const outputPath = path.join(absOutputDir, `${baseName}_page_${pageStr}.pdf`);
+    tasks.push({ page, outputPath });
+  }
+  if (opts.concurrency && opts.concurrency > 0) {
+    // 有限並發（類 GNU parallel -j N）
+    const results = [];
+    let idx = 0;
+    await new Promise((resolve, reject) => {
+      let running = 0;
+      function schedule() {
+        while (running < opts.concurrency && idx < tasks.length) {
+          const { page, outputPath } = tasks[idx++];
+          running++;
+          splitOnePage(qpdf, absInput, page, outputPath)
+            .then(() => {
+              results.push({ page, path: outputPath });
+              running--;
+              if (idx < tasks.length) {
+                schedule();
+              } else if (running === 0) {
+                resolve();
+              }
+            })
+            .catch(reject);
+        }
+        if (idx >= tasks.length && running === 0) resolve();
+      }
+      schedule();
+    });
+    // 依頁碼排序（平行可能亂序）
+    results.sort((a, b) => a.page - b.page);
+    return results;
+  } else {
+    // 全部平行（無限並發，頁數少時效率最高）
+    const promises = tasks.map(({ page, outputPath }) =>
+      splitOnePage(qpdf, absInput, page, outputPath).then(() => ({ page, path: outputPath }))
+    );
+    const results = await Promise.all(promises);
+    results.sort((a, b) => a.page - b.page);
+    return results;
+  }
+}

package/lib/flows/draft-writing.mjs ADDED Viewed

@@ -0,0 +1,29 @@
+// lib/flows/draft-writing.mjs — 公文撰寫業務流程
+// 串接：config → DAG → writing/generate → 更新狀態
+import { loadDag, initState, saveState, loadState } from '../core/dag.mjs';
+import { generateAll } from '../writing/generate.mjs';
+/**
+ * 執行公文撰寫流程
+ * @param {object} opts
+ * @param {string} opts.project - 專案名稱
+ * @param {object} opts.generators - 公文類型 → 生成函式
+ * @param {object} [opts.overrides] - 按 ID 覆蓋的生成函式
+ * @param {number} [opts.concurrency=10]
+ * @param {number[]} [opts.ids] - 指定生成的文件 ID
+ */
+export async function draftWriting(opts = {}) {
+  const { project = 'nstc', generators, overrides, concurrency = 10, ids = null } = opts;
+  // 確保 DAG 已初始化
+  let state = loadState(project);
+  if (!state) {
+    const { documents } = loadDag(project);
+    state = initState(documents);
+    saveState(state, project);
+  }
+  // 執行生成
+  return generateAll({ project, generators, overrides, concurrency, ids });
+}