npm - kc-beta - Versions diffs - 0.3.2 → 0.5.4 - Mend

kc-beta 0.3.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/package.json +1 -1
package/src/agent/confidence-scorer.js +8 -0
package/src/agent/context-window.js +7 -2
package/src/agent/context.js +25 -0
package/src/agent/corner-case-registry.js +5 -0
package/src/agent/engine.js +564 -76
package/src/agent/event-log.js +15 -2
package/src/agent/history.js +91 -23
package/src/agent/pipelines/initializer.js +3 -6
package/src/agent/retry.js +9 -1
package/src/agent/rule-catalog-normalize.js +37 -0
package/src/agent/scheduler.js +276 -0
package/src/agent/session-state.js +11 -2
package/src/agent/task-manager.js +5 -0
package/src/agent/tools/agent-tool.js +57 -14
package/src/agent/tools/archive-file.js +94 -0
package/src/agent/tools/copy-to-workspace.js +140 -0
package/src/agent/tools/phase-advance.js +60 -0
package/src/agent/tools/release.js +323 -0
package/src/agent/tools/rule-catalog.js +56 -4
package/src/agent/tools/schedule-fetch.js +118 -0
package/src/agent/tools/snapshot.js +101 -0
package/src/agent/tools/workspace-file.js +10 -7
package/src/agent/version-manager.js +29 -120
package/src/agent/workspace.js +127 -4
package/src/cli/components.js +68 -12
package/src/cli/index.js +147 -15
package/src/config.js +10 -1
package/src/model-tiers.json +5 -5
package/template/release-runtime/README.md.tmpl +84 -0
package/template/release-runtime/kc_runtime/__init__.py +2 -0
package/template/release-runtime/kc_runtime/confidence.py +93 -0
package/template/release-runtime/kc_runtime/dashboard.py +208 -0
package/template/release-runtime/render_dashboard.py +49 -0
package/template/release-runtime/run.py +230 -0
package/template/release-runtime/serve.sh +15 -0
package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +11 -0
package/template/skills/en/meta-meta/quality-control/SKILL.md +13 -1
package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +8 -0
package/template/skills/en/meta-meta/task-decomposition/SKILL.md +13 -0
package/template/skills/en/meta-meta/version-control/SKILL.md +13 -0
package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +11 -0
package/template/skills/zh/meta-meta/quality-control/SKILL.md +12 -0
package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +8 -0
package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +16 -0
package/template/skills/zh/meta-meta/version-control/SKILL.md +13 -0
package/template/workspace.gitignore +22 -0

package/src/cli/index.js CHANGED Viewed

@@ -2,9 +2,10 @@ import React, { useState, useEffect, useCallback, useRef } from "react";
 import { render, Box, Text, useApp, useInput } from "ink";
 import { loadSettings } from "../config.js";
 import { LLMClient } from "../agent/llm-client.js";
-import { AgentEngine } from "../agent/engine.js";
+import { AgentEngine, NEXT_PHASE } from "../agent/engine.js";
 import { Workspace } from "../agent/workspace.js";
 import { ConversationHistory } from "../agent/history.js";
+import { Scheduler } from "../agent/scheduler.js";
 import {
   WelcomeBanner,
   StatusBar,
@@ -17,6 +18,18 @@ import {
 const h = React.createElement;
+// Only the last N messages stay in the Ink render tree. Older messages
+// remain in React state (so /compact can summarize them) but aren't
+// diffed on every keystroke — this is what keeps long sessions responsive
+// and prevents the 4 GB heap OOM observed in the v0.5.3 E2E test.
+// Full conversation is persisted to logs/events.jsonl on every event,
+// so dropping from render is purely visual.
+const VISIBLE_WINDOW = 50;
+// How many recent messages render their ToolBlock with full preview.
+// Older ToolBlocks show header only. Both still persist full output to disk.
+const RECENT_TOOL_WINDOW = 10;
 /**
  * Main KC Agent CLI App using Ink (React for terminals).
  */
@@ -158,6 +171,8 @@ function App({ engine, config }) {
             "  /help                Show this help\n" +
             "  /status              Show session info, model, phase, workspace\n" +
             "  /tasks               Show task progress\n" +
+            "  /phase [sub]         advance | status | <name> — manual phase override\n" +
+            "  /schedule            Show scheduled ingestion jobs and recent log lines\n" +
             "  /clear               Clear conversation history (keep workspace)\n" +
             "  /compact             Summarize older messages to reduce context\n" +
             "  /sessions            List all sessions\n" +
@@ -193,6 +208,79 @@ function App({ engine, config }) {
         });
         return true;
+      case "/phase": {
+        // User-driven phase override. Useful when auto-advance fails to fire
+        // or when debugging. Subcommands:
+        //   /phase                 → current phase (alias: /phase status)
+        //   /phase advance | next  → move to NEXT_PHASE[current]
+        //   /phase <name>          → force-jump to any phase (forward or back)
+        const engine = engineRef.current;
+        const sub = (parts[1] || "").toLowerCase();
+        if (!sub || sub === "status") {
+          const next = NEXT_PHASE[engine.currentPhase];
+          addMessage({
+            role: "system",
+            content:
+              `Current phase: ${engine.currentPhase.toUpperCase()}` +
+              (next ? `  (next auto: ${next})` : "  (final phase)"),
+          });
+          return true;
+        }
+        if (sub === "advance" || sub === "next") {
+          const next = NEXT_PHASE[engine.currentPhase];
+          if (!next) {
+            addMessage({ role: "system", content: `Already in final phase (${engine.currentPhase}).` });
+            return true;
+          }
+          const ok = engine._advancePhase(next, "manual /phase advance");
+          addMessage({
+            role: "system",
+            content: ok
+              ? `→ phase advanced to ${next.toUpperCase()}.`
+              : `Failed to advance from ${engine.currentPhase}.`,
+          });
+          updateContextStats();
+          return true;
+        }
+        // /phase <name> — force-jump. Uses {force:true} to allow backward jumps.
+        const ok = engine._advancePhase(sub, "manual /phase <name>", { force: true });
+        addMessage({
+          role: "system",
+          content: ok
+            ? `→ phase set to ${sub.toUpperCase()}.`
+            : `Unknown phase: ${sub}. Valid: bootstrap, extraction, skill_authoring, skill_testing, distillation, production_qc`,
+        });
+        updateContextStats();
+        return true;
+      }
+      case "/schedule": {
+        const sched = new Scheduler(engineRef.current.workspace);
+        const jobs = sched.list();
+        if (jobs.length === 0) {
+          addMessage({ role: "system", content: "No scheduled ingestion jobs. Ask KC to set one up via the schedule_fetch tool." });
+        } else {
+          const lines = jobs.map((j) => {
+            const status = j.enabled ? "✓ enabled" : "· disabled";
+            const hint = j.cron_hint ? `   cron: ${j.cron_hint}` : "   cron: (not set)";
+            return `  ${status}  ${j.id}\n${hint}\n   cmd:  ${j.command}`;
+          });
+          const tail = sched.tailLog(8);
+          const pending = sched.pendingInputCount();
+          addMessage({
+            role: "system",
+            content:
+              `Scheduled jobs:\n${lines.join("\n\n")}\n\n` +
+              `Pending in input/: ${pending} file(s)` +
+              (tail ? `\n\nlogs/ingest.log (last 8):\n${tail}` : ""),
+          });
+        }
+        return true;
+      }
       case "/clear":
         engineRef.current.history = new ConversationHistory(engineRef.current.workspace.cwd);
         setMessages([]);
@@ -202,15 +290,22 @@ function App({ engine, config }) {
       case "/compact": {
         addMessage({ role: "system", content: "Compacting conversation history..." });
-        // Run compact asynchronously
         (async () => {
           try {
             const result = await engineRef.current.compact();
             if (result) {
-              addMessage({
+              // Claude Code pattern: after successful compact, clear the
+              // visible TUI messages and start fresh with a single summary
+              // line. The underlying engine.history already contains the
+              // compact-summary message pair; the TUI doesn't need to keep
+              // showing the pre-compact history (it's on disk in
+              // logs/events.jsonl anyway) and clearing it immediately frees
+              // Ink render-tree memory — fixing the lag that builds up over
+              // long sessions.
+              setMessages([{
                 role: "system",
-                content: `Compacted: removed ${result.removedCount} messages, kept ${result.retainedCount}. Summary: ~${result.summaryTokens} tokens.`,
-              });
+                content: `✓ 上下文已压缩：合并了 ${result.removedCount} 条早期消息（摘要约 ${result.summaryTokens} tokens，保留最近 ${result.retainedCount} 条）`,
+              }]);
             } else {
               addMessage({ role: "system", content: "Nothing to compact (conversation is short enough)." });
             }
@@ -227,9 +322,22 @@ function App({ engine, config }) {
           addMessage({ role: "system", content: "Usage: /rename <new_name>" });
         } else {
           try {
-            const newId = engineRef.current.workspace.rename(arg);
-            setSessionId(newId);
-            addMessage({ role: "system", content: `Session renamed to: ${newId}` });
+            const r = engineRef.current.renameSession(arg);
+            setSessionId(r.sessionId);
+            const lines = [`Session renamed to: ${r.sessionId}`];
+            if (r.scheduleWrappersRegenerated.length > 0) {
+              lines.push(
+                `${r.scheduleWrappersRegenerated.length} cron wrapper script(s) regenerated.`,
+                `If you'd installed crontab lines for the OLD path, re-install via 'schedule_fetch print_crontab'.`,
+              );
+            }
+            if (r.scheduleWrappersFailed && r.scheduleWrappersFailed.length > 0) {
+              const ids = r.scheduleWrappersFailed.map((f) => f.id).join(", ");
+              lines.push(
+                `⚠ ${r.scheduleWrappersFailed.length} wrapper script(s) failed to regenerate (${ids}). Check workspace/scripts/ingest/ and disk space.`,
+              );
+            }
+            addMessage({ role: "system", content: lines.join("\n") });
           } catch (err) {
             addMessage({ role: "system", content: `Rename failed: ${err.message}` });
           }
@@ -342,36 +450,52 @@ function App({ engine, config }) {
   return h(Box, { flexDirection: "column" },
     // Welcome banner
-    showWelcome ? h(WelcomeBanner, { projectDir: config.projectDir }) : null,
+    showWelcome ? h(WelcomeBanner, {
+      projectDir: config.projectDir,
+      pendingInputCount: (() => {
+        try { return new Scheduler(engineRef.current.workspace).pendingInputCount(); }
+        catch { return 0; }
+      })(),
+    }) : null,
     // Task dashboard (ralph-loop)
     taskList.length > 0 ? h(TaskDashboard, { tasks: taskList, progress: taskProgress }) : null,
-    // Message history
-    ...messages.map((msg, i) => {
+    // Message history (virtualized — only last VISIBLE_WINDOW render).
+    // Hidden-count hint for earlier messages, so users know the full
+    // history still exists (on disk) even though the TUI is slim.
+    messages.length > VISIBLE_WINDOW ? h(Box, { key: "hidden-hint" },
+      h(Text, { dimColor: true },
+        `— 前 ${messages.length - VISIBLE_WINDOW} 条消息已折叠，完整记录在 logs/events.jsonl —`),
+    ) : null,
+    ...messages.slice(-VISIBLE_WINDOW).map((msg, i, arr) => {
+      // Global index (for stable React keys) vs visible index (for isRecent).
+      const globalIdx = messages.length - arr.length + i;
+      const visibleIdx = arr.length - 1 - i;  // 0 = most recent
       if (msg.role === "user") {
-        return h(Box, { key: `msg-${i}` },
+        return h(Box, { key: `msg-${globalIdx}` },
           h(Text, { dimColor: true }, "❯ "),
           h(Text, null, msg.content),
         );
       }
       if (msg.role === "agent") {
-        return h(Box, { key: `msg-${i}` },
+        return h(Box, { key: `msg-${globalIdx}` },
           h(Text, null, msg.content),
         );
       }
       if (msg.role === "tool") {
         return h(ToolBlock, {
-          key: `msg-${i}`,
+          key: `msg-${globalIdx}`,
           name: msg.toolName,
           input: msg.toolInput,
           output: msg.toolOutput,
           isError: msg.toolIsError,
           isRunning: false,
+          isRecent: visibleIdx < RECENT_TOOL_WINDOW,
         });
       }
       if (msg.role === "system") {
-        return h(Box, { key: `msg-${i}` },
+        return h(Box, { key: `msg-${globalIdx}` },
           h(Text, { dimColor: true }, msg.content),
         );
       }
@@ -436,6 +560,14 @@ export async function main({ languageOverride } = {}) {
     console.log(`\x1b[33m${msg}\x1b[0m\n`);
   }
+  // Warn if git is missing — Block 11 file system relies on git for version history.
+  if (config.gitAutoCommit !== false && !Workspace.isGitInstalled()) {
+    const msg = config.language === "zh"
+      ? "  ⚠ 未检测到 git。本会话将不记录版本历史。安装 git 以启用自动提交。"
+      : "  ⚠ git not found — version history disabled this session. Install git to enable auto-commit.";
+    console.log(`\x1b[33m${msg}\x1b[0m\n`);
+  }
   const client = new LLMClient({
     apiKey: config.llmApiKey,
     baseUrl: config.llmBaseUrl,

package/src/config.js CHANGED Viewed

@@ -65,7 +65,7 @@ export function loadSettings(workspacePath) {
     llmApiKey: env.LLM_API_KEY || env.SILICONFLOW_API_KEY || gc.api_key || "",
     llmBaseUrl: env.LLM_BASE_URL || env.SILICONFLOW_BASE_URL || gc.base_url || "https://api.siliconflow.cn/v1",
     kcModel: gc.conductor_model || "glm-5",
-    kcMaxTokens: 65536,
+    kcMaxTokens: parseInt(env.KC_MAX_TOKENS || gc.kc_max_tokens?.toString() || "65536", 10),
     // Tier models (from .env or global config tiers)
     tier1: env.TIER1 || gc.tiers?.tier1 || "",
@@ -111,6 +111,15 @@ export function loadSettings(workspacePath) {
     // Context management
     kcContextLimit: parseInt(env.KC_CONTEXT_LIMIT || "200000", 10),
+    toolOutputOffloadTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_TOKENS || gc.tool_output_offload_tokens?.toString() || "2000", 10),
+    toolOutputOffloadErrorTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_ERROR_TOKENS || gc.tool_output_offload_error_tokens?.toString() || "500", 10),
+    maxMessageTokens: parseInt(env.MAX_MESSAGE_TOKENS || gc.max_message_tokens?.toString() || "60000", 10),
+    // File system (Block 11)
+    gitAutoCommit: (env.GIT_AUTO_COMMIT ?? gc.git_auto_commit ?? true) !== false &&
+                   (env.GIT_AUTO_COMMIT !== "false") &&
+                   (gc.git_auto_commit !== false),
+    largeRefThresholdMB: parseInt(env.LARGE_REF_THRESHOLD_MB || gc.large_ref_threshold_mb?.toString() || "10", 10),
     // Language
     language: env.LANGUAGE || gc.language || "en",

package/src/model-tiers.json CHANGED Viewed

@@ -2,17 +2,17 @@
   "_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
   "siliconflow": {
-    "conductor": "Pro/zai-org/GLM-5",
+    "conductor": "Pro/zai-org/GLM-5.1",
     "llm": {
-      "tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
+      "tier1": "Pro/zai-org/GLM-5.1, Pro/moonshotai/Kimi-K2.5",
       "tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
       "tier3": "Qwen/Qwen3.5-122B-A10B",
       "tier4": "Qwen/Qwen3.5-35B-A3B"
     },
     "vlm": {
-      "tier1": "Pro/Qwen/Qwen2.5-VL-72B-Instruct",
-      "tier2": "Qwen/Qwen2.5-VL-32B-Instruct",
-      "tier3": "Qwen/Qwen2.5-VL-7B-Instruct"
+      "tier1": "Qwen/Qwen3-VL-235B-A22B-Instruct",
+      "tier2": "Qwen/Qwen3-VL-30B-A3B-Instruct",
+      "tier3": "Qwen/Qwen3-VL-8B-Instruct"
     }
   },

package/template/release-runtime/README.md.tmpl ADDED Viewed

@@ -0,0 +1,84 @@
+# {LABEL} — KC Verification Release
+Generated: {CREATED_AT}
+Snapshot tag: `{SNAPSHOT_TAG}`
+Commit: `{SNAPSHOT_COMMIT}`
+Built by: kc-beta {KC_VERSION}
+{NOTES_BLOCK}
+This bundle is self-contained. It runs without `kc-beta` installed — only Python 3 and a worker LLM API key are required.
+## What's in here
+```
+manifest.json              — release metadata (rules, models, snapshot tag)
+README.md                  — this file
+run.py                     — standalone driver, runs all rules
+render_dashboard.py        — re-render an HTML dashboard from a result JSON
+serve.sh                   — optional helper, serves this dir over local HTTP
+kc_runtime/                — bundled Python helpers (confidence scoring, dashboard)
+workflows/                 — pinned per-rule Python workflows + prompts
+fixtures/                  — sample inputs (if KC selected any)
+glossary.json              — project entity vocabulary at release time
+catalog.json               — rule catalog at release time
+corner_cases.json          — known corner cases (used by confidence scoring)
+confidence_calibration.json — per-rule historical accuracy
+models.json                — worker LLM tier→model assignments
+```
+## Run a verification
+```sh
+export LLM_API_KEY="sk-..."
+export LLM_BASE_URL="https://api.siliconflow.cn/v1"   # or your provider
+export TIER1="..."                                     # comma-separated model list
+export TIER2="..."
+python run.py /path/to/document.pdf > result.json
+```
+Each rule's workflow runs against the document; results are aggregated into a single JSON.
+### Useful flags
+```sh
+python run.py doc.pdf --rule R001              # run only one rule
+python run.py doc.pdf --output result.json     # write to a file
+python run.py doc.pdf --dashboard              # also emit an HTML dashboard
+```
+### Re-render a dashboard
+```sh
+python render_dashboard.py result.json
+# → result.html alongside the JSON
+```
+### Browse dashboards in a browser
+```sh
+./serve.sh
+# → http://localhost:8080/result.html
+```
+## Rules in this release
+{RULES_LIST}
+## Reproducibility
+The release bundle is regenerable from the snapshot tag:
+```sh
+git checkout {SNAPSHOT_TAG}
+# then run kc-beta and ask it to release({label: "{LABEL}"}) again
+```
+The `manifest.json` records the exact commit (`{SNAPSHOT_COMMIT}`) so you can verify what's running.
+## Caveats
+- Workflows call worker LLMs. Costs depend on your provider; the bundle does not enforce a budget.
+- Workflow output for each rule is preserved in `result.raw[*]` for audit. If you need full audit history with KC's event log + corner-case registry, work from the source workspace, not this bundle.
+- Bundle does not sandbox `python`. Treat it like any executable you trust.

package/template/release-runtime/kc_runtime/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # KC release-runtime support package.
2	+ # Bundled into every release. Self-contained, no external dependencies.

package/template/release-runtime/kc_runtime/confidence.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""
+Confidence scorer — Python port of src/agent/confidence-scorer.js.
+Composite formula: confidence = method_prior * source_presence
+                                * historical_accuracy * (1 - corner_proximity)
+Identical to the JS scorer used inside KC, so release runs produce the same
+confidence values KC produces in-workspace.
+Note on rounding: JS Math.round() is half-up, Python's round() is half-to-even
+(banker's rounding). We use a half-up implementation here to match JS exactly.
+"""
+import math
+def _round3_halfup(x):
+    """Round x to 3 decimals, half-up (matches JS Math.round)."""
+    return math.floor(x * 1000 + 0.5) / 1000
+DEFAULT_PRIORS = {
+    "regex": 0.95,
+    "python": 0.90,
+    "llm": 0.75,
+    "ocr": 0.65,
+    "fallback": 0.50,
+}
+def score(rule_id, extracted_value, source_text="", method="llm",
+          document="", priors=None, historical=None, corner_cases=None):
+    """
+    Compute composite confidence score (0.0 - 1.0).
+    rule_id: rule identifier
+    extracted_value: the value the workflow extracted (string)
+    source_text: optional surrounding text from the document
+    method: "regex" | "python" | "llm" | "ocr" | "fallback"
+    document: document name / path (used for corner-case proximity)
+    priors: dict overriding DEFAULT_PRIORS
+    historical: dict of {rule_id: accuracy} from confidence_calibration.json
+    corner_cases: list/dict from corner_cases.json registry
+    """
+    p = priors or DEFAULT_PRIORS
+    method_prior = p.get(method, p.get("fallback", 0.50))
+    source_presence = 1.0
+    if source_text and extracted_value:
+        source_presence = 1.0 if str(extracted_value) in source_text else 0.7
+    hist = (historical or {}).get(rule_id, 0.8)
+    corner_proximity = _corner_proximity(corner_cases, document, rule_id)
+    confidence = method_prior * source_presence * hist * (1.0 - corner_proximity)
+    confidence = max(0.0, min(1.0, confidence))
+    return _round3_halfup(confidence)
+def band(confidence):
+    """Classify confidence into low/medium/high band — matches JS getBand()."""
+    if confidence >= 0.8:
+        return "high"
+    if confidence >= 0.5:
+        return "medium"
+    return "low"
+def _corner_proximity(corner_cases, document, rule_id):
+    """Mirror CornerCaseRegistry.match: count entries matching this doc + rule.
+    Each match adds 0.1 (capped at 0.3). Schema is intentionally loose — KC's
+    JS registry stores entries with optional `document_pattern` and `rule_id`
+    fields; we replicate the same matching semantics here.
+    """
+    if not corner_cases or not document:
+        return 0.0
+    entries = corner_cases if isinstance(corner_cases, list) else corner_cases.get("entries", [])
+    if not entries:
+        return 0.0
+    matches = 0
+    for e in entries:
+        if not isinstance(e, dict):
+            continue
+        if e.get("rule_id") and e.get("rule_id") != rule_id:
+            continue
+        pattern = e.get("document_pattern") or e.get("document") or ""
+        if pattern and pattern not in document:
+            continue
+        matches += 1
+    return min(0.3, 0.1 * matches)