npm - novelforge-agent - Versions diffs - 0.1.0 - Mend

novelforge-agent 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/LICENSE +21 -0
package/README.md +240 -0
package/dist/src/cli/index.js +125 -0
package/dist/src/core/contextBuilder.js +128 -0
package/dist/src/core/fileNames.js +41 -0
package/dist/src/core/index.js +9 -0
package/dist/src/core/projectDiscovery.js +141 -0
package/dist/src/core/projectStore.js +85 -0
package/dist/src/core/prompts/en-US.js +363 -0
package/dist/src/core/prompts/types.js +1 -0
package/dist/src/core/prompts/zh-CN.js +362 -0
package/dist/src/core/prompts.js +15 -0
package/dist/src/core/retrieval/chunker.js +77 -0
package/dist/src/core/retrieval/index.js +125 -0
package/dist/src/core/retrieval/tokenizer.js +43 -0
package/dist/src/core/retrieval/types.js +1 -0
package/dist/src/core/schemas.js +91 -0
package/dist/src/core/steps/architecture.js +16 -0
package/dist/src/core/steps/chapter.js +16 -0
package/dist/src/core/steps/chapterReview.js +16 -0
package/dist/src/core/steps/chapterRevision.js +20 -0
package/dist/src/core/steps/continuityReview.js +13 -0
package/dist/src/core/steps/crossChapterReview.js +15 -0
package/dist/src/core/steps/index.js +20 -0
package/dist/src/core/steps/memoryCard.js +22 -0
package/dist/src/core/steps/novelMetadata.js +12 -0
package/dist/src/core/steps/storyBible.js +13 -0
package/dist/src/core/steps/types.js +7 -0
package/dist/src/core/types.js +1 -0
package/dist/src/core/workflow.js +186 -0
package/dist/src/mcp/server.js +13 -0
package/dist/src/mcp/tools.js +126 -0
package/package.json +61 -0
package/src/cli/index.ts +147 -0
package/src/core/contextBuilder.ts +131 -0
package/src/core/fileNames.ts +48 -0
package/src/core/index.ts +9 -0
package/src/core/projectDiscovery.ts +174 -0
package/src/core/projectStore.ts +111 -0
package/src/core/prompts/en-US.ts +376 -0
package/src/core/prompts/types.ts +28 -0
package/src/core/prompts/zh-CN.ts +375 -0
package/src/core/prompts.ts +27 -0
package/src/core/retrieval/chunker.ts +80 -0
package/src/core/retrieval/index.ts +136 -0
package/src/core/retrieval/tokenizer.ts +44 -0
package/src/core/retrieval/types.ts +24 -0
package/src/core/schemas.ts +101 -0
package/src/core/steps/architecture.ts +17 -0
package/src/core/steps/chapter.ts +17 -0
package/src/core/steps/chapterReview.ts +17 -0
package/src/core/steps/chapterRevision.ts +21 -0
package/src/core/steps/continuityReview.ts +14 -0
package/src/core/steps/crossChapterReview.ts +16 -0
package/src/core/steps/index.ts +25 -0
package/src/core/steps/memoryCard.ts +23 -0
package/src/core/steps/novelMetadata.ts +13 -0
package/src/core/steps/storyBible.ts +14 -0
package/src/core/steps/types.ts +21 -0
package/src/core/types.ts +115 -0
package/src/core/workflow.ts +250 -0
package/src/mcp/server.ts +15 -0
package/src/mcp/tools.ts +227 -0

package/src/core/prompts/zh-CN.ts ADDED Viewed

@@ -0,0 +1,375 @@
+import { BuiltPrompt, PromptBuildInput, PromptPack } from './types.js';
+function strictJsonOutputRules(): string {
+  return [
+    'JSON 输出规则：',
+    '- 只输出合法 JSON，不要输出 Markdown、代码块、解释或前后缀文本。',
+    '- 字符串必须使用双引号。',
+    '- 不要使用 undefined、NaN、Infinity、注释或尾随逗号。',
+    '- 数组字段必须输出真实数组，不要输出字符串化数组。',
+  ].join('\n');
+}
+function buildMetadataPrompt(input: PromptBuildInput): BuiltPrompt {
+return {
+    purpose: 'novel_metadata',
+    expectedFormat: 'JSON matching NovelMetadataSchema',
+    prompt: `你是一名长篇网络小说总策划。请根据用户提示生成新小说的基础信息。
+## 用户提示词
+${input.state.initialPrompt}
+## 输出要求
+请只输出合法 JSON，格式如下：
+{
+  "title": "小说名称",
+  "genre": "题材",
+  "premise": "故事前提，80-200字",
+  "language": "zh-CN",
+  "style": "文风说明",
+  "coreCast": [
+    {
+      "name": "角色姓名",
+      "role": "角色定位",
+      "description": "角色描述"
+    }
+  ]
+}
+要求：
+- title、genre、premise、language、style 必须是非空字符串。
+- coreCast 至少包含 1 个核心人物。
+- premise 要能支撑长篇连载，不要只写一句设定。
+${strictJsonOutputRules()}`,
+  };
+}
+function buildStoryBiblePrompt(input: PromptBuildInput): BuiltPrompt {
+return {
+    purpose: 'story_bible',
+    expectedFormat: 'Markdown',
+    prompt: `你是一名故事圣经编辑。请为这部长篇小说生成可长期复用的 Markdown 故事圣经。
+## 用户提示词
+${input.state.initialPrompt}
+${input.context ? `## 已有上下文\n${input.context}\n` : ''}## 输出结构
+请用 Markdown 输出，至少包含：
+## 核心人物
+- 主要人物的目标、弱点、关系、长期变化方向。
+## 人物关系
+- 核心关系、冲突关系、隐藏关系和后续可推进点。
+## 世界规则
+- 题材相关的硬规则、限制、代价、社会结构或势力格局。
+## 主线与支线
+- 主线目标。
+- 至少 3 条长期伏笔或支线。
+## 风格约束
+- 叙事语气、节奏、禁忌写法、人物对白边界。
+要求：
+- 内容要能被后续章节生成反复引用。
+- 不要写成章节正文。
+- 不要输出 JSON。`,
+  };
+}
+function buildArchitecturePrompt(input: PromptBuildInput): BuiltPrompt {
+return {
+    purpose: 'architecture',
+    expectedFormat: 'JSON matching ArchitecturePayloadSchema',
+    prompt: `你是一名长篇小说总架构师。请生成全本、卷、章三级架构。
+## 用户提示词
+${input.state.initialPrompt}
+## 目标
+- 本次至少生成 ${input.state.targetChapters} 个章架构。
+- 全本架构负责长期主线和结局方向。
+- 卷架构负责阶段冲突、高潮和卷尾钩子。
+- 章架构必须只覆盖本章应发生的内容，不要提前泄露后续具体事件。
+${input.context ? `## 已有上下文\n${input.context}\n` : ''}## 输出要求
+请只输出合法 JSON，格式如下：
+{
+  "full": "完整全书主线、阶段推进、核心冲突、主题和结局方向",
+  "volumes": [
+    {
+      "id": "v1",
+      "title": "卷标题",
+      "summary": "本卷目标、冲突、高潮和卷尾钩子",
+      "order": 1
+    }
+  ],
+  "chapters": [
+    {
+      "chapterNumber": 1,
+      "title": "章标题",
+      "volumeId": "v1",
+      "summary": "本章剧情摘要",
+      "requiredBeats": ["必须完成的情节点1", "必须完成的情节点2"]
+    }
+  ]
+}
+要求：
+- chapters.length 必须大于等于 ${input.state.targetChapters}。
+- chapterNumber 从 1 开始连续递增。
+- volumeId 必须引用 volumes 中存在的 id。
+- requiredBeats 至少 1 条，且必须具体可执行。
+${strictJsonOutputRules()}`,
+  };
+}
+function buildChapterPrompt(input: PromptBuildInput): BuiltPrompt {
+return {
+    purpose: 'chapter',
+    expectedFormat: 'Markdown',
+    prompt: `你是一位擅长创作长篇网络小说的职业作者。请直接完成第 ${input.state.currentChapter} 章正文。
+## 执行优先级
+1. 先严格遵守“本章架构、用户补充要求、故事圣经硬约束、上一章承接”。
+2. 再参考“历史相关记忆、历史原文证据”保证一致性。
+3. 最后才参考“全本/本卷远场规划”，且不得提前写出尚未发生的情节。
+## 风格与字数
+- 文风必须与本书题材、世界观、人物身份、情感基调一致。
+- 语言要自然、稳定、可读，优先服务叙事推进、人物塑造和情绪积累。
+- 对话必须符合人物身份、关系和处境；重要情绪尽量通过动作、神态、节奏、潜台词体现。
+- 场景描写要有必要的感官细节与氛围支撑，但篇幅服务剧情，不要空转。
+- 冲突、转折、悬念和章末钩子要清晰，保证阅读推进感。
+## 执行规则
+- 只写本章架构明确覆盖的内容，不得提前写后续章节具体事件或人物揭示。
+- 不得新增本章架构未授权的主要人物；功能性角色只能轻描淡写。
+- 所有人物称谓、物品、场景、能力、时间线必须与既有设定一致。
+- 如果上一章结尾仍在动作、对话或同一场景中，本章开头必须连续衔接。
+- 禁止无代价越级碾压、强行降智配角、突兀机械反转、硬灌设定、空洞抒情。
+- 禁止总结腔、条目腔、说教腔，不要输出解释性前言。
+${input.context ? `## 生成上下文\n${input.context}\n` : ''}## 输出要求
+- 输出 Markdown。
+- 第一行使用本章标题作为 H1，例如：# 章标题
+- H1 后直接进入正文。`,
+  };
+}
+function buildMemoryPrompt(input: PromptBuildInput): BuiltPrompt {
+return {
+    purpose: 'memory_card',
+    expectedFormat: 'JSON matching MemoryCardSchema',
+    prompt: `你是一名长篇小说连续性编辑。请从第 ${input.state.currentChapter} 章正文中提取记忆卡。
+${input.context ? `## 当前章上下文\n${input.context}\n` : ''}## 输出要求
+请只输出合法 JSON，格式如下：
+{
+  "summary": "本章摘要",
+  "keyEvents": ["关键事件1"],
+  "entities": [
+    {
+      "name": "人物/地点/物品/组织名称",
+      "type": "person | location | item | faction | concept",
+      "state": "本章结束时的状态"
+    }
+  ],
+  "facts": [
+    {
+      "subject": "主体",
+      "predicate": "关系或动作",
+      "object": "客体"
+    }
+  ],
+  "stateChanges": [
+    {
+      "entity": "实体",
+      "before": "变化前",
+      "after": "变化后"
+    }
+  ],
+  "openThreads": ["尚未解决的伏笔、承诺、危险或疑问"]
+}
+要求：
+- 只记录已经在本章发生或被确认的信息。
+- 不要推测后续剧情。
+- facts 和 stateChanges 要具体，便于后续章节引用。
+${strictJsonOutputRules()}`,
+  };
+}
+function buildContinuityReviewPrompt(input: PromptBuildInput): BuiltPrompt {
+const end = Math.max(input.state.targetChapters, input.state.currentChapter - 1);
+return {
+    purpose: 'continuity_review',
+    expectedFormat: 'JSON matching ContinuityReviewSchema',
+    prompt: `你是一名长篇小说连续性审稿人。请审阅第 1-${end} 章的连续性问题。
+${input.context ? `## 审阅上下文\n${input.context}\n` : ''}## 审阅重点
+- 人物状态、位置、伤势、关系是否前后矛盾。
+- 物品归属、能力限制、世界规则是否被破坏。
+- 伏笔是否被误解、遗漏或提前揭示。
+- 章节架构要求是否被正文违反。
+## 输出要求
+请只输出合法 JSON，格式如下：
+{
+  "range": {
+    "start": 1,
+    "end": ${end}
+  },
+  "status": "clean",
+  "issues": [
+    {
+      "severity": "low | medium | high",
+      "description": "问题描述",
+      "evidence": "来自上下文的证据",
+      "suggestion": "修复建议"
+    }
+  ]
+}
+要求：
+- 没有问题时 status 使用 "clean"，issues 输出空数组。
+- 有问题时 status 使用 "issues_found"。
+- evidence 必须具体，不能只写“疑似不一致”。
+${strictJsonOutputRules()}`,
+  };
+}
+function buildChapterReviewPrompt(input: PromptBuildInput): BuiltPrompt {
+const chapter = input.state.pendingAction?.chapterNumber ?? input.state.currentChapter;
+return {
+    purpose: 'chapter_review',
+    expectedFormat: 'JSON matching ChapterReviewSchema',
+    prompt: `你是一名严格的章节审稿编辑。请审阅指定章节是否存在内部问题以及与既有设定的冲突。
+${input.context ? `## 审阅上下文\n${input.context}\n` : ''}## 审阅重点
+- 人物声音、动机、状态是否符合故事圣经与历史记忆。
+- 世界规则、物品归属、能力边界是否被破坏。
+- 时间线、地点、与上一章结尾的衔接是否一致。
+- 是否完成本章架构 requiredBeats。
+- 文风：节奏、硬灌设定、突兀反转、空洞抒情、条目腔。
+## 输出要求
+请只输出合法 JSON，格式如下：
+{
+  "chapterNumber": ${chapter},
+  "status": "clean",
+  "issues": [
+    {
+      "severity": "low | medium | high",
+      "category": "character | world | timeline | item | knowledge | pacing | style | architecture",
+      "description": "具体问题",
+      "evidence": "引用或转述能证明问题的具体段落",
+      "suggestion": "具体修复建议"
+    }
+  ]
+}
+要求：
+- 没有问题时 status 为 "clean"，issues 输出空数组。
+- 有问题时 status 为 "issues_found"。
+- evidence 必须具体，不能写"疑似"、"可能"。
+${strictJsonOutputRules()}`,
+  };
+}
+function buildChapterRevisionPrompt(input: PromptBuildInput): BuiltPrompt {
+const chapter = input.state.pendingAction?.chapterNumber ?? input.state.currentChapter;
+return {
+    purpose: 'chapter_revision',
+    expectedFormat: 'Markdown',
+    prompt: `你是这本长篇小说第 ${chapter} 章的修订作者。请根据审稿反馈，产出修订后的完整章节正文。
+## 优先级
+1. 必须修复反馈中的每一条问题，不可遗漏。
+2. 不要破坏已经能用的部分：结构、语气、人物声音、有效对白。
+3. 保持与故事圣经、上一章承接、已有记忆的连续性。
+## 风格规则
+- 保持原章节的标题与 Markdown 结构。
+- 不要输出条目化总结、变更日志、"我修改了什么"之类的解释文字。
+- 不得新增本章架构未授权的主要人物或主线伏笔。
+${input.context ? `## 修订上下文\n${input.context}\n` : ''}## 输出要求
+- 仅输出 Markdown。
+- 第一行使用本章标题作为 H1：\`# 章标题\`。
+- H1 后直接输出修订后的完整正文，不要输出 diff 标记。`,
+  };
+}
+function buildCrossChapterReviewPrompt(input: PromptBuildInput): BuiltPrompt {
+const range = input.state.pendingAction?.range ?? { start: 1, end: input.state.currentChapter - 1 };
+return {
+    purpose: 'cross_chapter_review',
+    expectedFormat: 'JSON matching CrossChapterReviewSchema',
+    prompt: `你是资深连续性编辑。请同时审阅第 ${range.start}-${range.end} 章，找出单章审阅无法发现的跨章节冲突。
+${input.context ? `## 审阅上下文\n${input.context}\n` : ''}## 审阅重点
+- 人物状态在多章之间漂移（例如伤势忽然消失）。
+- 不同章节确认的事实互相冲突。
+- 被悄悄遗忘或丢弃的伏笔。
+- 后续章节破坏前面建立的世界规则。
+- 只能跨章看到的节奏问题。
+## 输出要求
+请只输出合法 JSON，格式如下：
+{
+  "range": { "start": ${range.start}, "end": ${range.end} },
+  "status": "clean",
+  "issues": [
+    {
+      "severity": "low | medium | high",
+      "chapters": [${range.start}, ${range.end}],
+      "description": "具体问题",
+      "evidence": "按章节引用冲突段落或记忆条目",
+      "suggestion": "具体修复建议"
+    }
+  ]
+}
+要求：
+- chapters 必须列出问题涉及的所有章节。
+- evidence 必须引用具体章节内容或记忆，不能模糊。
+${strictJsonOutputRules()}`,
+  };
+}
+function buildPromptForStep(input: PromptBuildInput): BuiltPrompt {
+  switch (input.state.currentStep) {
+    case 'novel_metadata':
+      return buildMetadataPrompt(input);
+    case 'story_bible':
+      return buildStoryBiblePrompt(input);
+    case 'architecture':
+      return buildArchitecturePrompt(input);
+    case 'chapter':
+      return buildChapterPrompt(input);
+    case 'memory_card':
+      return buildMemoryPrompt(input);
+    case 'continuity_review':
+      return buildContinuityReviewPrompt(input);
+    case 'chapter_review':
+      return buildChapterReviewPrompt(input);
+    case 'chapter_revision':
+      return buildChapterRevisionPrompt(input);
+    case 'cross_chapter_review':
+      return buildCrossChapterReviewPrompt(input);
+    case 'complete':
+      return {
+        purpose: 'continuity_review',
+        expectedFormat: 'No output required',
+        prompt: 'The workflow is complete.',
+      };
+  }
+}
+export const zhCNPromptPack: PromptPack = {
+  buildPromptForStep,
+  strictJsonOutputRules,
+};

package/src/core/prompts.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import { enUSPromptPack } from './prompts/en-US.js';
+import { zhCNPromptPack } from './prompts/zh-CN.js';
+import { BuiltPrompt, PromptBuildInput, PromptPack } from './prompts/types.js';
+export type {
+  BuiltPrompt,
+  PromptBuildInput,
+  PromptPack,
+  PromptPurpose,
+} from './prompts/types.js';
+const promptPacks: Record<PromptBuildInput['state']['language'], PromptPack> = {
+  'zh-CN': zhCNPromptPack,
+  'en-US': enUSPromptPack,
+};
+function getPromptPack(language: PromptBuildInput['state']['language']): PromptPack {
+  return promptPacks[language] || zhCNPromptPack;
+}
+export function strictJsonOutputRules(language: PromptBuildInput['state']['language'] = 'zh-CN'): string {
+  return getPromptPack(language).strictJsonOutputRules();
+}
+export function buildPromptForStep(input: PromptBuildInput): BuiltPrompt {
+  return getPromptPack(input.state.language).buildPromptForStep(input);
+}

package/src/core/retrieval/chunker.ts ADDED Viewed

@@ -0,0 +1,80 @@
+import { MemoryCard } from '../types.js';
+import { RetrievalDoc } from './types.js';
+const PARAGRAPH_MIN_CHARS = 40;
+const PARAGRAPH_MAX_CHARS = 600;
+function splitMarkdownParagraphs(markdown: string): string[] {
+  const stripped = markdown.replace(/^#[^\n]*\n?/, ''); // drop leading H1 title
+  const raw = stripped.split(/\n\s*\n+/);
+  const merged: string[] = [];
+  for (const part of raw) {
+    const trimmed = part.trim();
+    if (!trimmed) continue;
+    if (trimmed.length < PARAGRAPH_MIN_CHARS && merged.length) {
+      merged[merged.length - 1] = `${merged[merged.length - 1]}\n${trimmed}`;
+    } else {
+      merged.push(trimmed);
+    }
+  }
+  // Cap super-long paragraphs into halves so a single 3000-char block does not dominate.
+  const capped: string[] = [];
+  for (const para of merged) {
+    if (para.length <= PARAGRAPH_MAX_CHARS) {
+      capped.push(para);
+      continue;
+    }
+    for (let i = 0; i < para.length; i += PARAGRAPH_MAX_CHARS) {
+      capped.push(para.slice(i, i + PARAGRAPH_MAX_CHARS));
+    }
+  }
+  return capped;
+}
+export function chunkChapter(chapterNumber: number, markdown: string): RetrievalDoc[] {
+  const paragraphs = splitMarkdownParagraphs(markdown);
+  return paragraphs.map((text, index) => ({
+    id: `chapter:${chapterNumber}:p:${index}`,
+    type: 'chapter',
+    chapterNumber,
+    text,
+  }));
+}
+export function chunkStoryBible(markdown: string): RetrievalDoc[] {
+  const sections = markdown.split(/^##\s+/m);
+  const docs: RetrievalDoc[] = [];
+  sections.forEach((section, index) => {
+    const trimmed = section.trim();
+    if (!trimmed) return;
+    const newlineIdx = trimmed.indexOf('\n');
+    const heading = newlineIdx > -1 ? trimmed.slice(0, newlineIdx).trim() : `section-${index}`;
+    const body = newlineIdx > -1 ? trimmed.slice(newlineIdx + 1).trim() : '';
+    if (!body) return;
+    docs.push({
+      id: `bible:${index}:${heading}`,
+      type: 'bible',
+      section: heading,
+      text: `${heading}\n${body}`,
+    });
+  });
+  return docs;
+}
+export function chunkMemoryCard(chapterNumber: number, card: MemoryCard): RetrievalDoc[] {
+  const lines = [
+    `chapter ${chapterNumber} summary`,
+    card.summary,
+    ...card.keyEvents.map((event) => `event: ${event}`),
+    ...card.facts.map((f) => `fact: ${f.subject} ${f.predicate} ${f.object}`),
+    ...card.stateChanges.map((s) => `state: ${s.entity} ${s.before} -> ${s.after}`),
+    ...card.entities.map((e) => `entity ${e.type}: ${e.name} - ${e.state}`),
+    ...card.openThreads.map((thread) => `open: ${thread}`),
+  ];
+  return [{
+    id: `memory:${chapterNumber}`,
+    type: 'memory',
+    chapterNumber,
+    text: lines.join('\n'),
+  }];
+}

package/src/core/retrieval/index.ts ADDED Viewed

@@ -0,0 +1,136 @@
+import { readFile, writeFile, mkdir } from 'node:fs/promises';
+import { dirname, join } from 'node:path';
+import MiniSearch from 'minisearch';
+import { MemoryCard } from '../types.js';
+import { tokenize } from './tokenizer.js';
+import { chunkChapter, chunkMemoryCard, chunkStoryBible } from './chunker.js';
+import { RetrievalDoc, RetrievalHit, RetrieveOptions } from './types.js';
+export type { RetrievalDoc, RetrievalDocType, RetrievalHit, RetrieveOptions } from './types.js';
+const INDEX_PATH = '.index/lexical.json';
+const MANIFEST_PATH = '.index/manifest.json';
+const MINISEARCH_OPTIONS = {
+  fields: ['text'],
+  storeFields: ['type', 'chapterNumber', 'section', 'text'],
+  tokenize: (text: string) => tokenize(text),
+  processTerm: (term: string) => term,
+  searchOptions: {
+    tokenize: (text: string) => tokenize(text),
+    processTerm: (term: string) => term,
+    prefix: false,
+    fuzzy: false,
+    combineWith: 'OR' as const,
+  },
+};
+interface IndexBundle {
+  index: MiniSearch<RetrievalDoc>;
+  ids: Set<string>;
+}
+async function loadBundle(projectPath: string): Promise<IndexBundle> {
+  let index: MiniSearch<RetrievalDoc>;
+  try {
+    const raw = await readFile(join(projectPath, INDEX_PATH), 'utf8');
+    index = MiniSearch.loadJSON<RetrievalDoc>(raw, MINISEARCH_OPTIONS);
+  } catch {
+    index = new MiniSearch<RetrievalDoc>(MINISEARCH_OPTIONS);
+  }
+  let ids = new Set<string>();
+  try {
+    const raw = await readFile(join(projectPath, MANIFEST_PATH), 'utf8');
+    const parsed = JSON.parse(raw) as { ids?: string[] };
+    if (Array.isArray(parsed.ids)) ids = new Set(parsed.ids);
+  } catch {
+    // manifest missing; bundle starts empty
+  }
+  return { index, ids };
+}
+async function persistBundle(projectPath: string, bundle: IndexBundle): Promise<void> {
+  const indexFull = join(projectPath, INDEX_PATH);
+  const manifestFull = join(projectPath, MANIFEST_PATH);
+  await mkdir(dirname(indexFull), { recursive: true });
+  await writeFile(indexFull, JSON.stringify(bundle.index), 'utf8');
+  await writeFile(manifestFull, JSON.stringify({ ids: Array.from(bundle.ids).sort() }), 'utf8');
+}
+async function upsert(projectPath: string, removePredicate: (id: string) => boolean, docs: RetrievalDoc[]): Promise<void> {
+  const bundle = await loadBundle(projectPath);
+  const toRemove: string[] = [];
+  for (const id of bundle.ids) {
+    if (removePredicate(id)) toRemove.push(id);
+  }
+  for (const id of toRemove) {
+    try {
+      bundle.index.discard(id);
+    } catch {
+      // already absent
+    }
+    bundle.ids.delete(id);
+  }
+  if (toRemove.length) {
+    await bundle.index.vacuum();
+  }
+  for (const doc of docs) {
+    bundle.index.add(doc);
+    bundle.ids.add(doc.id);
+  }
+  await persistBundle(projectPath, bundle);
+}
+export async function indexChapter(projectPath: string, chapterNumber: number, markdown: string): Promise<void> {
+  const prefix = `chapter:${chapterNumber}:`;
+  await upsert(projectPath, (id) => id.startsWith(prefix), chunkChapter(chapterNumber, markdown));
+}
+export async function indexStoryBible(projectPath: string, markdown: string): Promise<void> {
+  await upsert(projectPath, (id) => id.startsWith('bible:'), chunkStoryBible(markdown));
+}
+export async function indexMemoryCard(projectPath: string, chapterNumber: number, card: MemoryCard): Promise<void> {
+  const id = `memory:${chapterNumber}`;
+  await upsert(projectPath, (existing) => existing === id, chunkMemoryCard(chapterNumber, card));
+}
+export async function retrieve(projectPath: string, query: string, options: RetrieveOptions = {}): Promise<RetrievalHit[]> {
+  if (!query.trim()) return [];
+  const bundle = await loadBundle(projectPath);
+  const topK = options.topK ?? 6;
+  const raw = bundle.index.search(query, {
+    filter: (result: Record<string, unknown>) => {
+      const type = result.type as string | undefined;
+      const chapterNumber = result.chapterNumber as number | undefined;
+      if (options.types && !options.types.includes(type as RetrievalHit['type'])) return false;
+      if (options.chapterRange && typeof chapterNumber === 'number') {
+        const { start, end } = options.chapterRange;
+        if (chapterNumber < start || chapterNumber > end) return false;
+      }
+      return true;
+    },
+  });
+  const hits: RetrievalHit[] = raw.slice(0, topK).map((r: Record<string, unknown>) => ({
+    id: r.id as string,
+    type: r.type as RetrievalHit['type'],
+    chapterNumber: r.chapterNumber as number | undefined,
+    section: r.section as string | undefined,
+    text: r.text as string,
+    score: r.score as number,
+  }));
+  return hits;
+}
+export function formatHits(hits: RetrievalHit[]): string {
+  if (!hits.length) return '';
+  const lines: string[] = [];
+  for (const hit of hits) {
+    const tag =
+      hit.type === 'chapter' ? `Chapter ${hit.chapterNumber}` :
+      hit.type === 'memory' ? `Chapter ${hit.chapterNumber} Memory` :
+      `Bible: ${hit.section}`;
+    lines.push(`### ${tag} (score ${hit.score.toFixed(2)})\n${hit.text}`);
+  }
+  return lines.join('\n\n');
+}

package/src/core/retrieval/tokenizer.ts ADDED Viewed

@@ -0,0 +1,44 @@
+const CJK_RANGE = /[㐀-鿿]/;
+const ALNUM_RANGE = /[a-z0-9]/;
+export function isCjk(char: string): boolean {
+  return CJK_RANGE.test(char);
+}
+// Pragmatic tokenizer for mixed Chinese + Latin text without jieba:
+// - Latin / digit runs are lowercased and emitted whole.
+// - CJK characters are emitted as both unigrams and overlapping bigrams.
+//   "陈青云走" -> ["陈", "陈青", "青", "青云", "云", "云走", "走"]
+// - Everything else acts as a separator.
+//
+// Unigrams cover names and recall; bigrams give phrase locality so a search for
+// "陈青云" prefers chapters that actually contain that phrase.
+export function tokenize(text: string): string[] {
+  const tokens: string[] = [];
+  const lowered = text.toLowerCase();
+  let alnumBuf = '';
+  const flushAlnum = () => {
+    if (alnumBuf) {
+      tokens.push(alnumBuf);
+      alnumBuf = '';
+    }
+  };
+  for (let i = 0; i < lowered.length; i += 1) {
+    const c = lowered[i];
+    if (isCjk(c)) {
+      flushAlnum();
+      tokens.push(c);
+      const next = lowered[i + 1];
+      if (next && isCjk(next)) {
+        tokens.push(c + next);
+      }
+    } else if (ALNUM_RANGE.test(c)) {
+      alnumBuf += c;
+    } else {
+      flushAlnum();
+    }
+  }
+  flushAlnum();
+  return tokens;
+}

package/src/core/retrieval/types.ts ADDED Viewed

@@ -0,0 +1,24 @@
+export type RetrievalDocType = 'chapter' | 'bible' | 'memory';
+export interface RetrievalDoc {
+  id: string;
+  type: RetrievalDocType;
+  chapterNumber?: number;
+  section?: string;
+  text: string;
+}
+export interface RetrievalHit {
+  id: string;
+  type: RetrievalDocType;
+  chapterNumber?: number;
+  section?: string;
+  text: string;
+  score: number;
+}
+export interface RetrieveOptions {
+  topK?: number;
+  types?: RetrievalDocType[];
+  chapterRange?: { start: number; end: number };
+}