npm - kc-beta - Versions diffs - 0.7.3 → 0.7.5 - Mend

kc-beta 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/src/agent/tools/consult-skill.js ADDED Viewed

@@ -0,0 +1,112 @@
+import { BaseTool, ToolResult } from "./base.js";
+/**
+ * v0.7.5: load a methodology skill's body into the agent's conversation
+ * history as a tool result. Pairs with the always-loaded body injection
+ * in SkillLoader.formatForContext — that handles the 1-2 architecturally-
+ * required skills per phase; consult_skill handles the rest on demand.
+ *
+ * Validation:
+ * - Skill name must be in the current phase's available set (per
+ *   template/skills/phase_skills.yaml).
+ * - Already-always-loaded skills return a hint pointing the agent at the
+ *   system prompt (don't double-load).
+ * - Missing bodies return an error result.
+ *
+ * Emits `skill_invoked` event with proper skill name on success — replaces
+ * the older path-matching regex at engine.js:1297-1313 that produced
+ * "(unknown)" spam from rule_skills/<id>/SKILL.md writes.
+ */
+export class ConsultSkillTool extends BaseTool {
+  /**
+   * @param {import('../workspace.js').Workspace} workspace
+   * @param {import('../skill-loader.js').SkillLoader} skillLoader
+   * @param {() => string} getCurrentPhase — returns the engine's current phase
+   * @param {import('../event-log.js').EventLog} [eventLog] — for skill_invoked emission
+   */
+  constructor(workspace, skillLoader, getCurrentPhase, eventLog) {
+    super();
+    this._workspace = workspace;
+    this._skillLoader = skillLoader;
+    this._getCurrentPhase = getCurrentPhase;
+    this._eventLog = eventLog;
+  }
+  get name() { return "consult_skill"; }
+  get description() {
+    return (
+      "Load the full body of a methodology skill into your context for the " +
+      "current turn. Use when the description tease in the system prompt's " +
+      "'Available Methodology Skills' section isn't enough detail to proceed. " +
+      "The body lands in your conversation history; subsequent turns can " +
+      "reference it via context, or you can re-consult if it ages out. " +
+      "Skills already in the 'Loaded Into Your Context' section don't need " +
+      "consulting — they're already in your prompt."
+    );
+  }
+  get inputSchema() {
+    return {
+      type: "object",
+      properties: {
+        name: {
+          type: "string",
+          description: "Skill name as listed in the system prompt (e.g., 'work-decomposition', 'evolution-loop').",
+        },
+      },
+      required: ["name"],
+    };
+  }
+  async execute(input) {
+    const name = (input?.name || "").trim();
+    if (!name) return new ToolResult("name required (e.g. consult_skill({name: 'work-decomposition'}))", true);
+    const phase = this._getCurrentPhase ? this._getCurrentPhase() : null;
+    const { alwaysLoaded, available } = this._skillLoader.getPhaseSkillSet(phase);
+    const alwaysSet = new Set(alwaysLoaded);
+    const availableSet = new Set(available);
+    if (alwaysSet.has(name)) {
+      return new ToolResult(
+        `Skill '${name}' is already always-loaded in your system prompt for phase '${phase}'. ` +
+        `Re-read the system prompt's 'Methodology Skills — Loaded Into Your Context' section ` +
+        `— the body is there. No separate consult needed.`,
+      );
+    }
+    if (!availableSet.has(name)) {
+      const sorted = [...availableSet].sort();
+      return new ToolResult(
+        `Skill '${name}' is not available in phase '${phase}'. ` +
+        `Available for this phase: ${sorted.join(", ")}. ` +
+        `If you genuinely need this skill, either advance/retreat to a phase ` +
+        `where it's available, or check the spelling.`,
+        true,
+      );
+    }
+    const body = this._skillLoader.loadSkillBody(name);
+    if (!body) {
+      return new ToolResult(
+        `Skill '${name}' is declared available for phase '${phase}' but its body could not be loaded. ` +
+        `This is an engine/template inconsistency — surface to the developer user.`,
+        true,
+      );
+    }
+    // Emit skill_invoked event with the real skill name (replaces the
+    // old path-matching regex that produced "(unknown)" spam).
+    try {
+      this._eventLog?.append?.("skill_invoked", {
+        skill: name,
+        via_tool: "consult_skill",
+        phase,
+      });
+    } catch { /* event logging is best-effort */ }
+    return new ToolResult(body);
+  }
+}

package/src/agent/tools/copy-to-workspace.js CHANGED Viewed

@@ -114,9 +114,10 @@ export class CopyToWorkspaceTool extends BaseTool {
   }
   async _appendManifest(entry) {
-    // v0.7.3: refs/manifest.json is a shared coordination path — wrap the
-    // whole read-modify-write under the workspace lock so two parallel
-    // copy_to_workspace calls (main agent + subagent) don't lose entries.
+    // v0.7.4 (re-applied from v0.7.3 G1a): refs/manifest.json is a
+    // shared coordination path — wrap the whole read-modify-write
+    // under the workspace lock so two parallel copy_to_workspace
+    // calls (main agent + subagent) don't lose entries.
     return await this._workspace.withSharedLockIfApplicable(MANIFEST_REL, () => {
       const manifestAbs = this._workspace.resolvePath(MANIFEST_REL);
       fs.mkdirSync(path.dirname(manifestAbs), { recursive: true });

package/src/agent/tools/release.js CHANGED Viewed

@@ -185,8 +185,23 @@ export class ReleaseTool extends BaseTool {
     // file through and emitted a stub on miss. We try to populate from
     // known QC artifact shapes here; if nothing matches, fall through
     // to the existing stub fallback.
+    // v0.7.5 G-H3: aggregator now runs if calibSrc is MISSING **or** has
+    // empty `historical_accuracy`. v0.7.4 audit (both 贷款 + 资管) shipped
+    // empty stubs despite QC data on disk — root cause was the v0.7.2
+    // gate only checked file existence; a stub written earlier (e.g., on
+    // finalization phase entry) kept the aggregator from firing later.
     const calibSrc = path.join(this._workspace.cwd, "confidence_calibration.json");
-    if (!fs.existsSync(calibSrc)) {
+    let shouldAggregate = !fs.existsSync(calibSrc);
+    if (!shouldAggregate) {
+      try {
+        const existing = JSON.parse(fs.readFileSync(calibSrc, "utf-8"));
+        const ha = existing?.historical_accuracy;
+        if (!ha || (typeof ha === "object" && Object.keys(ha).length === 0)) {
+          shouldAggregate = true;
+        }
+      } catch { shouldAggregate = true; } // corrupt → re-aggregate
+    }
+    if (shouldAggregate) {
       const aggregated = this._aggregateAccuracyFromOutput();
       if (aggregated && Object.keys(aggregated.historical_accuracy).length > 0) {
         fs.writeFileSync(calibSrc, JSON.stringify(aggregated, null, 2) + "\n", "utf-8");
@@ -247,6 +262,14 @@ export class ReleaseTool extends BaseTool {
       .replace(/\{RULES_LIST\}/g, rulesList);
     fs.writeFileSync(path.join(bundleAbs, "README.md"), readme, "utf-8");
+    // v0.7.5 G-H4: sweep any leftover `.tmpl` files from the bundle dir.
+    // template/release/v1/ contains manifest.json.tmpl, catalog.json.tmpl,
+    // README.md.tmpl. _copyDir's exclude list (line 119) only filters
+    // README.md.tmpl; the other two ride along and persist alongside their
+    // populated counterparts. Audit (v0.7.4 贷款) confirmed this regression
+    // of v0.7.2 G1d which only handled the v1/ scaffold case.
+    this._sweepTmplFiles(bundleAbs);
     // v0.7.2 1d: clean up the template scaffold dir if a customized
     // release was just written alongside it. Both v0.7.1 audit runs
     // shipped with `output/releases/v1/` (template-derived, .tmpl
@@ -303,6 +326,25 @@ export class ReleaseTool extends BaseTool {
     }
   }
+  /**
+   * v0.7.5 G-H4: recursively remove any `*.tmpl` files from a directory.
+   * Used after populating a release bundle to drop template stubs that
+   * weren't filtered by the initial copy's exclude list. Idempotent.
+   */
+  _sweepTmplFiles(dir) {
+    try {
+      if (!fs.existsSync(dir) || !fs.statSync(dir).isDirectory()) return;
+      for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+        const entryPath = path.join(dir, entry.name);
+        if (entry.isDirectory()) {
+          this._sweepTmplFiles(entryPath);
+        } else if (entry.isFile() && entry.name.endsWith(".tmpl")) {
+          try { fs.unlinkSync(entryPath); } catch { /* best-effort */ }
+        }
+      }
+    } catch { /* best-effort */ }
+  }
   _findLatestWorkflow(ruleId) {
     // Canonical: workflows/<ruleId>/workflow_v#.py (subdirectory layout)
     const wfDir = path.join(this._workspace.cwd, "workflows", ruleId);
@@ -338,10 +380,95 @@ export class ReleaseTool extends BaseTool {
           }
         } catch { /* manifest unreadable; skip */ }
       }
+      // v0.7.5 G-H2: master / grouped workflow pattern. Agent shipped a
+      // single workflow folder (e.g., workflows/master/ or workflows/
+      // bank_wm_compliance/) declaring `source_rules: [R001, R002, ...]`
+      // in its SKILL.md / workflow.md / config.json. The manifest writer
+      // should credit this rule_id as covered by that workflow.
+      //
+      // Walk workflows/ subdirs looking for a source_rules declaration
+      // that includes this ruleId. Return the first matching workflow file.
+      // Audit (v0.7.4 贷款 session) confirmed manifest under-counted:
+      // catalog had 15 rules; manifest only listed R001 because R002-R015
+      // weren't found as standalone workflows.
+      for (const entry of fs.readdirSync(flatRoot, { withFileTypes: true })) {
+        if (!entry.isDirectory()) continue;
+        if (entry.name === ruleId) continue; // already checked above
+        const subDir = path.join(flatRoot, entry.name);
+        const declaredRules = this._readWorkflowSourceRules(subDir);
+        if (declaredRules.includes(ruleId)) {
+          // Find the workflow entry file in this dir
+          const subFiles = fs.readdirSync(subDir);
+          const versioned = subFiles.filter((f) => /^workflow_v\d+\.py$/.test(f)).sort();
+          if (versioned.length > 0) return path.join(subDir, versioned[versioned.length - 1]);
+          const any = subFiles.find((f) => f.endsWith(".py"));
+          if (any) return path.join(subDir, any);
+        }
+      }
     }
     return null;
   }
+  /**
+   * v0.7.5 G-H2: read a workflow directory's source_rules declaration.
+   * Checks SKILL.md / workflow.md frontmatter (`source_rules: [...]`)
+   * and config.json (`source_rules`, `rules`, or `rule_ids` field).
+   * Returns array of canonical rule IDs.
+   */
+  _readWorkflowSourceRules(workflowDir) {
+    const ids = new Set();
+    try {
+      const files = fs.readdirSync(workflowDir);
+      // Frontmatter sources
+      for (const fname of files) {
+        if (!/^(skill|workflow)\.md$/i.test(fname)) continue;
+        let content;
+        try { content = fs.readFileSync(path.join(workflowDir, fname), "utf-8"); } catch { continue; }
+        const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
+        if (!fmMatch) continue;
+        const fm = fmMatch[1];
+        // Inline form
+        const inlineMatch = fm.match(/^source_rules\s*:\s*\[([^\]]*)\]\s*$/m);
+        if (inlineMatch) {
+          inlineMatch[1].split(",").map(s => s.trim().replace(/^["']|["']$/g, ""))
+            .filter(Boolean).forEach(s => {
+              const m = s.match(/^R0*(\d+)$/i);
+              if (m) ids.add(`R${String(parseInt(m[1], 10)).padStart(3, "0")}`);
+            });
+        }
+        // Block form
+        const blockMatch = fm.match(/^source_rules\s*:\s*\n((?:[ \t]+-\s+\S+\s*\n?)+)/m);
+        if (blockMatch) {
+          blockMatch[1].split("\n").forEach(line => {
+            const m = line.match(/^[ \t]+-\s+["']?(R0*\d+)["']?\s*$/i);
+            if (m) {
+              const n = m[1].match(/R0*(\d+)/i);
+              if (n) ids.add(`R${String(parseInt(n[1], 10)).padStart(3, "0")}`);
+            }
+          });
+        }
+      }
+      // Config.json sources
+      const configPath = path.join(workflowDir, "config.json");
+      if (fs.existsSync(configPath)) {
+        try {
+          const data = JSON.parse(fs.readFileSync(configPath, "utf-8"));
+          const rules = Array.isArray(data?.source_rules) ? data.source_rules :
+                        Array.isArray(data?.rules) ? data.rules :
+                        Array.isArray(data?.rule_ids) ? data.rule_ids : [];
+          for (const r of rules) {
+            const m = String(r).match(/^R0*(\d+)$/i);
+            if (m) ids.add(`R${String(parseInt(m[1], 10)).padStart(3, "0")}`);
+          }
+        } catch { /* ignore */ }
+      }
+    } catch { /* dir unreadable */ }
+    return [...ids];
+  }
   _resolveFixture(rel) {
     // Try samples/ first (workspace, then project), then plain workspace path
     const candidates = [];

package/src/agent/tools/workspace-file.js CHANGED Viewed

@@ -30,9 +30,7 @@ export class WorkspaceFileTool extends BaseTool {
       "Read, write, or list files. " +
       "scope='workspace' (default): KC's working directory for rules, skills, workflows, results. " +
       "scope='project': the user's project folder where KC was launched — source regulations and samples live here. " +
-      "Operations: read (returns file content), write (creates/overwrites a file), list (shows directory contents). " +
-      "read returns up to 50,000 chars per call; longer files are truncated. " +
-      "For full reads of regulation/rule documents (typically smaller than this cap), prefer this tool over sandbox_exec."
+      "Operations: read (returns file content), write (creates/overwrites a file), list (shows directory contents)."
     );
   }
@@ -163,10 +161,12 @@ export class WorkspaceFileTool extends BaseTool {
       return new ToolResult(msg);
     };
-    // v0.7.3: route writes to shared coordination paths (rules/catalog.json,
-    // tasks.json, refs/manifest.json, etc.) through the workspace lock so
-    // concurrent writers serialize. No-op for non-shared paths and for
-    // project-scope writes (project dir is the user's, not shared engine state).
+    // v0.7.4 (re-applied from v0.7.3 G1a): route writes to shared
+    // coordination paths (rules/catalog.json, tasks.json,
+    // refs/manifest.json, etc.) through the workspace lock so
+    // concurrent writers serialize. No-op for non-shared paths and
+    // for project-scope writes (project dir is the user's, not
+    // shared engine state).
     if (scope === "workspace") {
       return await this._workspace.withSharedLockIfApplicable(filePath, doWrite);
     }

package/src/config.js CHANGED Viewed

@@ -90,7 +90,7 @@ export function loadSettings(workspacePath) {
     tier3: env.TIER3 || gc.tiers?.tier3 || "",
     tier4: env.TIER4 || gc.tiers?.tier4 || "",
-    // VLM tiers (vision/OCR models). v0.7.3: accept OCR_MODEL_TIER* as
+    // VLM tiers (vision/OCR models). v0.7.4: accept OCR_MODEL_TIER* as
     // alias since template/.env.template + initializer.js seed that name.
     // VLM_TIER* takes precedence when both are set.
     vlmTier1: env.VLM_TIER1 || env.OCR_MODEL_TIER1 || gc.vlm_tiers?.tier1 || "",

package/template/AGENT.md CHANGED Viewed

@@ -1,20 +1,195 @@
-# AGENT.md — Project Context
+# AGENT.md — KC Project Context
-This file is your per-project memory. Update it as you learn about the project.
-The content here is injected into your system prompt on every turn.
+This file is injected into the agent's system prompt every turn. The
+top sections describe KC's design philosophy + your mission (static
+across sessions); the bottom sections are per-project memory you
+update as you learn about this specific business scenario.
-## Project
+> **Skill priority**: meta-meta skills are architectural — they
+> override meta (how-to) skills when guidance conflicts. The
+> architect's frame bounds the technique. If you find yourself
+> rationalizing past a meta-meta principle to follow a meta procedure,
+> stop — the frame should bound the technique, not the other way
+> around. Each skill declares its tier in YAML frontmatter (`tier:
+> meta-meta` or `tier: meta`).
+---
+# KC Reborn — Document Verification Workspace
+## What This Workspace Is
+You are a coding agent tasked with building a document verification app for the developer user's specific business scenario. The meta skills in `skills/` encode the methodology of experienced verification system architects and business analysts. You bring the intelligence and judgment to apply this methodology to the specific case at hand.
+Your goal: build a verification system that starts with you doing the work, then gradually distills your capability into cheap, fast workflows powered by worker LLMs. You are the ground truth. The workflows you create are the deliverables.
+## Roles
+- **Developer user**: The human you serve. They are a domain expert (e.g., tech lead at a bank's loan department). They provide the rules, the documents, and the business context. Discuss decisions with them.
+- **You (the coding agent)**: You are both the Builder (creating skills and workflows) and the Observer (judging quality). You do the verification first, prove it works, then teach smaller models to replicate your results.
+- **Worker LLMs**: The performers. Models configured in `.env` (TIER1 through TIER4) that will execute the workflows you build. Your job is to find the smallest model that works for each task.
+## Workspace Layout
+```
+Rules/       — Regulation documents, compliance notes from the developer user
+Samples/     — Sample documents for testing (your training set)
+Input/       — Production document batches awaiting verification
+Output/      — Verification results
+skills/      — Methodology skills (current phase's available set)
+.env         — Configuration: API keys, model tiers, thresholds, language
+```
+Note: KC's session workspace under `~/.kc_agent/workspaces/<sessionId>/`
+uses lowercase counterparts (`rules/`, `samples/`, `input/`, `output/`,
+`logs/`, `workflows/`, `rule_skills/`) — these are runtime-internal and
+separate from this project's user-facing folders above. The asymmetry
+is intentional: title-case for human-facing project dirs, lowercase for
+KC's working state.
+## Your Mission
+Follow this lifecycle. Each step references the skill(s) to consult.
+Always-loaded skills are already in your system prompt (above); other
+skills are listed under "Available Methodology Skills" and require
+`consult_skill(name)` to load the body.
+1. **Bootstrap** → `bootstrap-workspace` (always loaded). Understand the business scenario, read Rules/, scan Samples/, configure .env with the developer user.
+2. **Extract Rules** → `rule-extraction` (always loaded). Decompose regulation documents into atomic, testable verification rules.
+3. **Decompose Tasks** → `work-decomposition` (always loaded in skill_authoring). Decide ordering, grouping, and TaskBoard structure.
+4. **Map Rule Relationships** → `consult_skill("rule-graph")`. Identify shared entities, dependencies, and conflicts between rules. Each rule stays independently executable.
+5. **Write Rule Skills** → `skill-authoring` (always loaded in skill_authoring). Write each rule into a skill folder. Before writing extraction logic for a new document type, `consult_skill("data-sensibility")` to observe the data first.
+6. **Test Skills** → Apply each skill to Samples/. `evolution-loop` is always loaded in skill_testing — use it to diagnose failures and iterate. Continue until accuracy meets SKILL_ACCURACY threshold in .env.
+7. **Distill to Workflows** → `skill-to-workflow` (always loaded in distillation). Convert proven skills into Python code + worker LLM prompts. Test workflows against your own results as ground truth. Iterate until WORKFLOW_ACCURACY is met.
+8. **Production QC** → `quality-control` (always loaded in production_qc). Run workflows on Input/. Sample and review results based on confidence scores. For multi-document cases, `consult_skill("cross-document-verification")`. Use `evolution-loop` when quality drops.
+9. **Stabilize** → Gradually reduce monitoring as workflows prove reliable. Only intervene when rules change or quality drops.
+10. **Report** → `consult_skill("dashboard-reporting")`. Generate HTML dashboards so the developer user can see results, progress, and issues. Ensure dashboards include feedback collection mechanisms for users.
+Throughout: `consult_skill("version-control")` to track changes. `consult_skill("corner-case-management")` to handle edge cases without polluting workflows.
+## Core Principles
+- **Minimum viable model**: Always use the smallest, cheapest, fastest model that meets the accuracy threshold. Start simple, escalate only when necessary.
+- **JIT structure**: Do not design schemas or formats prematurely. Define them when needed, keep them consistent once defined.
+- **OTF evolution**: The system you build today may look completely different tomorrow. Embrace change.
+- **Skills before workflows**: Prove each rule works as a skill (you executing it) before distilling into code + worker LLM prompts.
+- **Log everything**: Every test iteration, every evolution decision, every version change. Both JSON (machine-readable) and plain text (human-readable).
+## How to Use Skills
+Skills are loaded in two ways:
+1. **Always loaded** — bodies are inline in this system prompt above the project orientation. These are the architecturally-required skills for the current phase. Treat them as authoritative.
+2. **Available — call consult_skill(name)** — listed by name + description in the system prompt under "Available Methodology Skills." Call `consult_skill("<name>")` to load the body into your conversation history when the description tease isn't enough.
+The skill body is the methodology. Skills convey philosophy and decision frameworks. Adapt them to the specific business case. Do not follow them rigidly.
+## Communication with Developer User
+- **Proactively discuss**: rule granularity, accuracy thresholds, model selection, edge cases.
+- **Report progress**: after each testing round, share results and next steps.
+- **Escalate**: when you cannot resolve an issue after iterating, surface it with evidence.
+- **Ask**: the developer user is a domain expert. When in doubt about a rule's intent, ask.
+---
+# KC Reborn — 文档核查工作区
+> **技能优先级**: meta-meta 技能是架构层面 —— 当指导冲突时，
+> meta-meta 凌驾于 meta (技法层面) 之上。架构师的框架约束技法。
+> 如果你发现自己在为了遵循一条 meta 程序而绕开一条 meta-meta
+> 原则，停下 —— 框架应当约束技法，而不是反过来。每个技能在
+> YAML frontmatter 中声明自己的层级 (`tier: meta-meta` 或
+> `tier: meta`)。
+## 这是什么
+你是一个编程智能体，负责为开发者用户的具体业务场景构建文档核查应用。`skills/` 中的元技能编码了资深核查系统架构师和业务分析师的方法论。你负责运用智慧和判断力，将这些方法论应用到具体场景中。
+你的目标：构建一个核查系统，先由你亲自执行核查工作，然后逐步将你的能力蒸馏为由 Worker LLM（执行模型）驱动的低成本、高速度的工作流。你是基准真值。你创建的工作流是最终交付物。
+## 角色定义
+- **开发者用户**：你服务的人。他们是领域专家（如银行信贷部门的技术负责人）。他们提供规则、文档和业务背景。与他们讨论决策。
+- **你（编程智能体）**：你既是构建者（创建技能和工作流），也是观察者（评判质量）。你先执行核查，证明方法可行，再教小模型复现你的结果。
+- **Worker LLM**：执行者。在 `.env` 中配置的模型（TIER1到TIER4），将执行你构建的工作流。你的任务是为每项工作找到能胜任的最小模型。
+## 工作区结构
+```
+Rules/       — 法规文件、开发者用户的合规注释
+Samples/     — 用于测试的样本文件（你的训练集）
+Input/       — 等待核查的生产批次文件
+Output/      — 核查结果
+skills/      — 当前阶段可用的方法论技能
+.env         — 配置：API密钥、模型层级、阈值、语言
+```
+注：KC 在 `~/.kc_agent/workspaces/<sessionId>/` 下的会话工作区使用
+小写对应目录（`rules/`、`samples/`、`input/`、`output/`、`logs/`、
+`workflows/`、`rule_skills/`）—— 这些是运行时内部目录，与本项目上面
+那些用户可见的目录是分开的。这种大小写不对称是有意的：项目里给人看
+的目录用首字母大写；KC 自己的工作状态用小写。
+## 你的使命
+遵循以下生命周期。常驻加载的技能已经在你的系统提示词中；其他技能在"可用方法论技能"清单里列出，调 `consult_skill(name)` 才能加载正文。
+1. **初始化** → `bootstrap-workspace`（常驻）。理解业务场景，阅读 Rules/，浏览 Samples/，与开发者用户配置 .env。
+2. **提取规则** → `rule-extraction`（常驻）。将法规文件分解为原子级、可测试的核查规则。
+3. **任务分解** → `work-decomposition`（skill_authoring 常驻）。决定顺序、分组以及 TaskBoard 结构。
+4. **构建规则图谱** → `consult_skill("rule-graph")`。识别规则间的共享实体、依赖关系和潜在冲突。每条规则保持独立可执行。
+5. **编写规则技能** → `skill-authoring`（skill_authoring 常驻）。将每条规则写入技能文件夹。编写新文档类型的提取逻辑前，先 `consult_skill("data-sensibility")` 观察数据。
+6. **测试技能** → 在 Samples/ 上应用每个技能。`evolution-loop` 在 skill_testing 常驻 —— 用它诊断失败并迭代。直到准确率达到 .env 中的 SKILL_ACCURACY 阈值。
+7. **蒸馏为工作流** → `skill-to-workflow`（distillation 常驻）。将验证过的技能转化为 Python 代码 + Worker LLM 提示词。用你自己的结果作为基准测试工作流。迭代直到达到 WORKFLOW_ACCURACY。
+8. **生产质控** → `quality-control`（production_qc 常驻）。在 Input/ 上运行工作流。根据置信度分数抽样审查结果。涉及多文档案件时，`consult_skill("cross-document-verification")`。质量下降时使用 `evolution-loop`。
+9. **稳定运行** → 随着工作流稳定，逐步降低监控频率。仅在规则变更或质量下降时介入。
+10. **报告** → `consult_skill("dashboard-reporting")`。生成 HTML 仪表板，让开发者用户直观地看到结果、进度和问题。确保仪表盘内置用户反馈收集机制。
+全程：用 `consult_skill("version-control")` 跟踪所有变更，用 `consult_skill("corner-case-management")` 处理边缘案例，不要污染主工作流。
+## 核心原则
+- **最小可用模型**：始终使用能达到准确率阈值的最小、最便宜、最快的模型。从简单开始，必要时才升级。
+- **即时结构（JIT）**：不要过早设计数据结构或格式。需要时定义，定义后保持一致。
+- **即时演进（OTF）**：你今天构建的系统明天可能面目全非。拥抱变化。
+- **先技能后工作流**：先证明每条规则作为技能（你执行）可行，再蒸馏为代码 + Worker LLM 提示词。
+- **记录一切**：每次测试迭代、每个演进决策、每次版本变更。同时保存 JSON（机器可读）和纯文本（人类可读）。
+## 如何使用技能
+技能通过两种方式加载：
+1. **常驻加载** —— 技能正文直接出现在本系统提示词里、项目说明的上方。这些是当前阶段架构上必需的技能，把它们的内容当作权威指导。
+2. **可用 —— 调 consult_skill(name)** —— 在系统提示词的"可用方法论技能"清单里按名字 + 描述列出。当描述简介不够用时，调 `consult_skill("<名字>")` 把技能正文加载到你的对话历史里。
+技能正文是方法论本身。技能传达的是理念和决策框架。请根据具体业务场景灵活运用，不要机械照搬。
+## 与开发者用户的沟通
+- **主动讨论**：规则粒度、准确率阈值、模型选择、边缘案例。
+- **汇报进度**：每轮测试后，分享结果和下一步计划。
+- **升级问题**：迭代后仍无法解决的问题，附带证据提交给开发者用户。
+- **多问**：开发者用户是领域专家。对规则意图有疑问时，问他们。
+---
+## Per-project memory (you maintain this section)
+The sections below are your scratchpad for this specific project. Update them as you learn about the business scenario, decisions, and edge cases. They persist across your sessions on this project.
+### Project
 <!-- What domain? What regulations? What documents? Fill this in during bootstrap. -->
-## Decisions
+### Decisions
 <!-- Key decisions made with the developer user. Rule granularity, accuracy targets, model choices, scope boundaries. -->
-## Domain Notes
+### Domain Notes
 <!-- Terminology, document formats, naming conventions, edge cases specific to this domain. -->
-## User Preferences
+### User Preferences
 <!-- How the developer user prefers to communicate. Reporting format, language, level of detail. -->

package/template/skills/en/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: auto-model-selection
+tier: meta
 description: >
   Use Context7 CLI to get up-to-date LLM model information. Use whenever you need to
   know about available models, model capabilities, pricing, context window sizes, or

package/template/skills/en/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: bootstrap-workspace
+tier: meta-meta
 description: Initialize and configure a document verification workspace. Use when a developer user first opens this workspace, when .env needs configuration, or when the business scenario needs to be understood. Guides the coding agent through reading regulation documents, understanding the developer user's business context, configuring model tiers and thresholds, and establishing the working relationship. Covers initial conversation with developer user to scope the verification task, set expectations, and agree on checkpoints.
 ---

package/template/skills/{zh/meta → en}/compliance-judgment/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: compliance-judgment
+tier: meta
 description: Determine whether extracted entities comply with verification rules. Use after entity extraction to make the pass/fail judgment for each rule on each document. Covers translating natural language rules into executable logic, choosing between Python calculation and LLM semantic judgment, and producing actionable comments on failures. Also use when designing the judgment step of a workflow or when a rule's judgment logic needs debugging.
 ---

package/template/skills/en/{meta/confidence-system → confidence-system}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: confidence-system
+tier: meta
 description: Design and calibrate confidence scoring for extraction and verification results. Use when building any workflow that needs to quantify trust in its output, when setting up quality control sampling thresholds, or when calibrating existing confidence scores against actual accuracy. Confidence is the bridge between workflows and quality control — high confidence means less review, low confidence means more review. Also use when the quality control skill reports that confidence scores do not correlate with actual correctness.
 ---

package/template/skills/en/{meta/corner-case-management → corner-case-management}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: corner-case-management
+tier: meta
 description: Identify, catalog, and handle corner cases that do not fit the mainstream verification workflow. Use when the evolution loop classifies a failure as a corner case (affecting less than ~10% of documents), when adding a new edge case to the registry, or when deciding whether a corner case should be promoted to a systemic fix. Also use when designing the corner case detection mechanism for a workflow.
 ---

package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: cross-document-verification
+tier: meta
 description: Perform case-level analysis across multiple documents for the same transaction. Use when documents do not exist in isolation — main contracts have appendices, loan applications come bundled with income certificates, bank statements, credit reports, and property appraisals. Use to build comparison matrices, detect contradictions (hard mismatches and soft implausibilities), classify severity, and flag fraud signals. Also use when user or end-user reports a cross-document inconsistency — these reports are ground truth and take priority over agent judgment.
 ---

package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: dashboard-reporting
+tier: meta-meta
 description: Generate HTML dashboards for developer users to visualize verification results, system progress, and quality metrics. Use when a testing round completes, when production batches finish processing, when the developer user wants to see the system's status, or at any point where visual reporting would help communicate progress. Dashboards should be self-contained HTML files that can be opened by double-clicking. Also use when the developer user asks about results, accuracy, or system health.
 ---

package/template/skills/en/{meta/data-sensibility → data-sensibility}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: data-sensibility
+tier: meta
 description: Build intuition about document data before writing extraction logic. Use before designing any extraction schema or regex pattern, when onboarding a new document type, or when extraction accuracy is unexpectedly low and you suspect a data assumption is wrong. Covers systematic observation of raw documents, spot-checking extracted results, distribution analysis, and recognizing suspicious patterns. If you are about to write code that touches document data and you have not read at least five documents end-to-end, stop and use this skill first.
 ---

package/template/skills/{zh/meta → en}/document-chunking/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: document-chunking
+tier: meta
 description: >
   Fast, cheap chunking for processing batches of sample and input documents.
   Use when you need to split documents into manageable pieces for initial observation,

package/template/skills/en/{meta/document-parsing → document-parsing}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: document-parsing
+tier: meta
 description: Parse source documents into machine-readable text with maximum fidelity. Use when processing any document in Samples/ or Input/ for the first time, when parsed text quality is poor, or when tables and charts need special handling. Covers multi-level parser selection from simple text extraction to OCR and vision models. Also use when a verification rule fails due to parsing issues (garbled text, missing tables, mangled layouts) and the parser needs to be upgraded for that document type.
 ---

package/template/skills/{zh/meta → en}/entity-extraction/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: entity-extraction
+tier: meta
 description: Extract specific entities, values, and text segments from documents as required by verification rules. Use after tree processing has located the relevant section, when a rule needs a specific number, date, name, amount, clause, or any domain-specific entity extracted. Covers extraction method selection (regex vs LLM), schema design, postprocessing, and confidence annotation. Also use when designing the extraction step of a workflow for worker LLMs.
 ---

package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: evolution-loop
+tier: meta-meta
 description: Drive continuous improvement of skills and workflows through the diagnose-classify-fix-retest cycle. Use after any testing round reveals failures, when production quality control flags issues, or when accuracy drops below thresholds. Covers failure analysis, distinguishing systemic issues from corner cases, deciding whether to rewrite or patch, and knowing when to stop iterating. The evolution loop is the heartbeat of the system. Also use when transitioning between lifecycle phases (skill testing, workflow testing, production monitoring).
 ---

package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md RENAMED Viewed

@@ -1,5 +1,6 @@
 ---
 name: pdf-review-dashboard
+tier: meta
 description: >
   Generate a two-column PDF review dashboard for manual verification result checking.
   Left panel shows the original PDF document, right panel shows verification results.