npm - kc-beta - Versions diffs - 0.1.1 → 0.2.1 - Mend

kc-beta 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/bin/kc-beta.js +14 -2
package/package.json +1 -1
package/src/agent/context-window.js +151 -0
package/src/agent/context.js +58 -88
package/src/agent/engine.js +267 -38
package/src/agent/event-log.js +111 -0
package/src/agent/llm-client.js +352 -59
package/src/agent/pipelines/_archive_v1/distillation.js +113 -0
package/src/agent/pipelines/_archive_v1/extraction.js +92 -0
package/src/agent/pipelines/_archive_v1/initializer.js +163 -0
package/src/agent/pipelines/_archive_v1/production-qc.js +99 -0
package/src/agent/pipelines/_archive_v1/skill-authoring.js +83 -0
package/src/agent/pipelines/_archive_v1/skill-testing.js +111 -0
package/src/agent/pipelines/base.js +6 -0
package/src/agent/pipelines/distillation.js +25 -11
package/src/agent/pipelines/extraction.js +26 -7
package/src/agent/pipelines/initializer.js +30 -20
package/src/agent/pipelines/production-qc.js +22 -5
package/src/agent/pipelines/skill-authoring.js +19 -8
package/src/agent/pipelines/skill-testing.js +26 -8
package/src/agent/retry.js +83 -0
package/src/agent/session-state.js +78 -0
package/src/agent/skill-loader.js +139 -0
package/src/agent/token-counter.js +62 -0
package/src/agent/tools/document-parse.js +3 -3
package/src/agent/tools/tier-downgrade.js +11 -2
package/src/agent/tools/web-search.js +107 -0
package/src/agent/tools/worker-llm-call.js +14 -5
package/src/cli/components.js +16 -4
package/src/cli/config.js +246 -0
package/src/cli/index.js +99 -10
package/src/cli/onboard.js +154 -48
package/src/config.js +25 -7
package/src/providers.js +370 -0

package/src/agent/pipelines/production-qc.js CHANGED Viewed

@@ -69,7 +69,7 @@ export class ProductionQCPipeline extends Pipeline {
   describeState() {
     this._scanWorkspace();
-    const parts = ["## Current Phase: PRODUCTION_QC"];
+    const parts = ["## Phase: PRODUCTION_QC\nRun workflows on production documents from input/, monitor quality via confidence-based sampling. This phase transitions from active review to stable spot-checking as accuracy stabilizes."];
     parts.push(`### Progress\n- Batches: ${this.batchesProcessed}\n- Documents: ${this.totalDocuments}\n- Reviewed: ${this.documentsReviewed}\n- Monitoring: ${this.monitoringPhase}\n- Sampling rate: ${(this._samplingRate * 100).toFixed(0)}%`);
     if (Object.keys(this.accuracyByRule).length) {
@@ -77,10 +77,8 @@ export class ProductionQCPipeline extends Pipeline {
       parts.push("### Accuracy by rule\n" + lines.join("\n"));
     }
-    if (this.monitoringPhase === "initial") {
-      parts.push("### What to do now\nRun workflows on input/ documents. Save results to output/. Review and save QC to output/qc/.");
-    } else if (this.monitoringPhase === "stable") {
-      parts.push("### Status: Stable\nWorkflows running reliably. Spot-check only.");
+    if (this.monitoringPhase === "stable") {
+      parts.push("### Status: Stable monitoring. Spot-check only.");
     }
     return parts.join("\n\n");
   }
@@ -96,4 +94,23 @@ export class ProductionQCPipeline extends Pipeline {
   }
   exitCriteriaMet() { return this.monitoringPhase === "stable"; }
+  exportState() {
+    return {
+      batchesProcessed: this.batchesProcessed,
+      totalDocuments: this.totalDocuments,
+      documentsReviewed: this.documentsReviewed,
+      monitoringPhase: this.monitoringPhase,
+      accuracyByRule: this.accuracyByRule,
+      issuesCount: this.issuesFound.length,
+    };
+  }
+  importState(data) {
+    if (typeof data.batchesProcessed === "number" && data.batchesProcessed > this.batchesProcessed) this.batchesProcessed = data.batchesProcessed;
+    if (typeof data.totalDocuments === "number" && data.totalDocuments > this.totalDocuments) this.totalDocuments = data.totalDocuments;
+    if (typeof data.documentsReviewed === "number" && data.documentsReviewed > this.documentsReviewed) this.documentsReviewed = data.documentsReviewed;
+    if (data.monitoringPhase) this.monitoringPhase = data.monitoringPhase;
+    if (data.accuracyByRule && typeof data.accuracyByRule === "object") Object.assign(this.accuracyByRule, data.accuracyByRule);
+  }
 }

package/src/agent/pipelines/skill-authoring.js CHANGED Viewed

@@ -52,16 +52,13 @@ export class SkillAuthoringPipeline extends Pipeline {
   describeState() {
     this._scanWorkspace();
     const total = this.totalRules.length;
-    const parts = ["## Current Phase: SKILL_AUTHORING"];
-    parts.push(`### Progress\n- Rules from extraction: ${total}\n- Skills authored: ${this.skillsAuthored.length}\n- Skills with scripts/: ${this.skillsWithScripts.length}`);
+    const authored = this.skillsAuthored.length;
+    const remaining = this.totalRules.filter((r) => !this.skillsAuthored.includes(r));
+    const parts = ["## Phase: SKILL_AUTHORING\nWrite verification skills for each extracted rule. Skills are first-class deliverables — they may serve as the production solution when worker LLM workflows are insufficient. Follow Anthropic skill-creator format. This is BUILD mode."];
+    parts.push(`### Progress\n- Rules: ${total}\n- Skills authored: ${authored}\n- Skills with scripts/: ${this.skillsWithScripts.length}${remaining.length > 0 ? `\n- Remaining: ${remaining.slice(0, 10).join(", ")}` : ""}`);
     if (this.exitCriteriaMet()) {
-      parts.push("### Ready\nAll rules have skills. Proceed to SKILL_TESTING.");
-    } else if (this.skillsAuthored.length === 0) {
-      parts.push("### What to do now\nWrite a SKILL.md for each rule in rule_skills/{rule_id}/.\nDescribe: what to check, where to look, what to extract, how to judge.");
-    } else {
-      const remaining = this.totalRules.filter((r) => !this.skillsAuthored.includes(r));
-      parts.push(`### What to do now\n${total - this.skillsAuthored.length} rules still need skills. Remaining: ${remaining.slice(0, 10).join(", ")}`);
+      parts.push("### Exit\nAll rules have skills. Proceed to SKILL_TESTING.");
     }
     return parts.join("\n\n");
   }
@@ -80,4 +77,18 @@ export class SkillAuthoringPipeline extends Pipeline {
     if (!this.totalRules.length) return false;
     return this.skillsAuthored.length >= this.totalRules.length && this.skillsWithScripts.length >= this.skillsAuthored.length * 0.5;
   }
+  exportState() {
+    return {
+      totalRules: this.totalRules,
+      skillsAuthored: this.skillsAuthored,
+      skillsWithScripts: this.skillsWithScripts,
+    };
+  }
+  importState(data) {
+    if (Array.isArray(data.totalRules) && data.totalRules.length > this.totalRules.length) this.totalRules = data.totalRules;
+    if (Array.isArray(data.skillsAuthored) && data.skillsAuthored.length > this.skillsAuthored.length) this.skillsAuthored = data.skillsAuthored;
+    if (Array.isArray(data.skillsWithScripts) && data.skillsWithScripts.length > this.skillsWithScripts.length) this.skillsWithScripts = data.skillsWithScripts;
+  }
 }

package/src/agent/pipelines/skill-testing.js CHANGED Viewed

@@ -78,17 +78,15 @@ export class SkillTestingPipeline extends Pipeline {
     const failing = Object.entries(this.skillsTested).filter(([, acc]) => acc < this._accuracyThreshold);
     const untested = this.skillsToTest.filter((s) => !(s in this.skillsTested));
-    const parts = ["## Current Phase: SKILL_TESTING"];
+    const parts = ["## Phase: SKILL_TESTING\nTest skills against sample documents, iterate via evolution loop until accuracy threshold is met. This is BUILD mode — the results established here become the accuracy baseline for distillation."];
     parts.push(`### Progress\n- Skills to test: ${total}\n- Tested: ${tested}\n- Passing (>=${this._accuracyThreshold}): ${passing}\n- Evolution iterations: ${this.iterationCount}/${this._maxIterations}`);
+    if (untested.length) parts.push(`- Untested: ${untested.slice(0, 10).join(", ")}`);
+    if (failing.length) parts.push(`- Below threshold:\n${failing.map(([id, acc]) => `  - ${id}: ${acc.toFixed(2)}`).join("\n")}`);
     if (this.exitCriteriaMet()) {
-      parts.push("### Ready\nAll skills passing. Proceed to DISTILLATION.");
-    } else if (untested.length) {
-      parts.push(`### What to do now\nTest these skills: ${untested.slice(0, 10).join(", ")}`);
-    } else if (failing.length) {
-      parts.push("### What to do now — Evolution Cycle\nFailing skills:\n" +
-        failing.map(([id, acc]) => `- ${id}: ${acc.toFixed(2)}`).join("\n") +
-        "\n\nFollow: diagnose -> classify -> fix -> retest -> log");
+      parts.push("### Exit\nAll skills passing. Proceed to DISTILLATION.");
+    } else if (this.iterationCount >= this._maxIterations) {
+      parts.push(`### Max iterations (${this._maxIterations}) reached. Discuss remaining failures with the developer user.`);
     }
     return parts.join("\n\n");
   }
@@ -108,4 +106,24 @@ export class SkillTestingPipeline extends Pipeline {
     if (!total) return false;
     return Object.keys(this.skillsTested).length >= total && this.skillsPassing.length >= total * this._accuracyThreshold;
   }
+  exportState() {
+    return {
+      skillsToTest: this.skillsToTest,
+      skillsTested: this.skillsTested,
+      skillsPassing: this.skillsPassing,
+      iterationCount: this.iterationCount,
+    };
+  }
+  importState(data) {
+    if (typeof data.iterationCount === "number" && data.iterationCount > this.iterationCount) this.iterationCount = data.iterationCount;
+    if (Array.isArray(data.skillsToTest) && data.skillsToTest.length > this.skillsToTest.length) this.skillsToTest = data.skillsToTest;
+    if (Array.isArray(data.skillsPassing) && data.skillsPassing.length > this.skillsPassing.length) this.skillsPassing = data.skillsPassing;
+    if (data.skillsTested && typeof data.skillsTested === "object") {
+      for (const [k, v] of Object.entries(data.skillsTested)) {
+        if (!this.skillsTested[k] || v > this.skillsTested[k]) this.skillsTested[k] = v;
+      }
+    }
+  }
 }

package/src/agent/retry.js ADDED Viewed

@@ -0,0 +1,83 @@
+/**
+ * Retry wrapper with exponential backoff and jitter.
+ * Designed for LLM API calls — retries transient errors, fails fast on auth/validation errors.
+ */
+const MAX_RETRIES = 10;
+const INITIAL_DELAY_MS = 1000;
+const MAX_DELAY_MS = 60000;
+const BACKOFF_MULTIPLIER = 2;
+const JITTER_FRACTION = 0.2;
+const RETRYABLE_STATUS = new Set([408, 429, 500, 502, 503, 504, 520, 522, 524]);
+const NON_RETRYABLE_STATUS = new Set([400, 401, 403, 404, 422]);
+/**
+ * Determine if an error is retryable.
+ * @param {Error} err
+ * @returns {boolean}
+ */
+function isRetryable(err) {
+  if (err.status) {
+    if (NON_RETRYABLE_STATUS.has(err.status)) return false;
+    if (RETRYABLE_STATUS.has(err.status)) return true;
+  }
+  // Network errors (ECONNRESET, ETIMEDOUT, fetch TypeError, AbortError)
+  const msg = err.message || "";
+  if (/ECONNRESET|ETIMEDOUT|ENOTFOUND|ECONNREFUSED|UND_ERR|fetch failed|network|socket hang up/i.test(msg)) {
+    return true;
+  }
+  if (err.name === "AbortError" || err.name === "TimeoutError") return true;
+  // If we have a status code and it's not in our known sets, retry server errors (5xx)
+  if (err.status && err.status >= 500) return true;
+  // Unknown errors without status — retry conservatively
+  return !err.status;
+}
+/**
+ * Calculate delay for a given attempt using exponential backoff with jitter.
+ * @param {number} attempt - 0-indexed attempt number
+ * @param {number|null} retryAfterSec - Retry-After header value in seconds
+ * @returns {number} Delay in milliseconds
+ */
+function calculateDelay(attempt, retryAfterSec) {
+  if (retryAfterSec && retryAfterSec > 0) {
+    return Math.min(retryAfterSec * 1000, MAX_DELAY_MS);
+  }
+  const base = Math.min(INITIAL_DELAY_MS * Math.pow(BACKOFF_MULTIPLIER, attempt), MAX_DELAY_MS);
+  const jitter = base * JITTER_FRACTION * Math.random();
+  return base + jitter;
+}
+/**
+ * Execute an async function with retry logic.
+ *
+ * @param {() => Promise<any>} fn - The async function to execute. Should throw with
+ *   an error that has `.status` and optionally `.retryAfter` properties on failure.
+ * @returns {Promise<any>} The successful result
+ * @throws {Error} The last error after all retries exhausted, or a non-retryable error immediately
+ */
+export async function withRetry(fn) {
+  let lastError;
+  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+    try {
+      return await fn();
+    } catch (err) {
+      lastError = err;
+      if (!isRetryable(err)) throw err;
+      if (attempt === MAX_RETRIES) break;
+      const retryAfterSec = err.retryAfter ? parseFloat(err.retryAfter) : null;
+      const delay = calculateDelay(attempt, retryAfterSec);
+      await new Promise((resolve) => setTimeout(resolve, delay));
+    }
+  }
+  const wrapper = new Error(`LLM API call failed after ${MAX_RETRIES + 1} attempts: ${lastError.message}`);
+  wrapper.cause = lastError;
+  wrapper.status = lastError.status;
+  throw wrapper;
+}

package/src/agent/session-state.js ADDED Viewed

@@ -0,0 +1,78 @@
+import fs from "node:fs";
+import path from "node:path";
+/**
+ * Persists session state (phase, pipeline milestones, phase summaries)
+ * to enable cross-session resume.
+ *
+ * Stored as: workspace/{sessionId}/session-state.json
+ */
+export class SessionState {
+  /**
+   * @param {string} workspacePath - Session workspace directory
+   */
+  constructor(workspacePath) {
+    this._path = path.join(workspacePath, "session-state.json");
+  }
+  /** Whether a session state file exists */
+  get exists() {
+    return fs.existsSync(this._path);
+  }
+  /**
+   * Save engine state to disk.
+   * @param {import('./engine.js').AgentEngine} engine
+   */
+  save(engine) {
+    const state = {
+      version: 1,
+      sessionId: engine.workspace.sessionId,
+      currentPhase: engine.currentPhase,
+      phaseSummaries: engine._phaseSummaries || [],
+      lastEventSeq: engine.eventLog?.currentSeq || 0,
+      createdAt: this._loadRaw()?.createdAt || new Date().toISOString(),
+      updatedAt: new Date().toISOString(),
+      pipelineMilestones: this._extractMilestones(engine.pipelines),
+    };
+    fs.writeFileSync(this._path, JSON.stringify(state, null, 2), "utf-8");
+  }
+  /**
+   * Load session state from disk.
+   * @returns {object} The persisted state
+   */
+  load() {
+    return this._loadRaw() || {};
+  }
+  /**
+   * Read raw file contents.
+   */
+  _loadRaw() {
+    if (!this.exists) return null;
+    try {
+      return JSON.parse(fs.readFileSync(this._path, "utf-8"));
+    } catch {
+      return null;
+    }
+  }
+  /**
+   * Serialize pipeline milestones for persistence.
+   * @param {object} pipelines - Map of phase -> pipeline instance
+   * @returns {object}
+   */
+  _extractMilestones(pipelines) {
+    const milestones = {};
+    for (const [phase, pipeline] of Object.entries(pipelines)) {
+      if (pipeline?.exportState) {
+        try {
+          milestones[phase] = pipeline.exportState();
+        } catch { /* skip if not implemented */ }
+      }
+    }
+    return milestones;
+  }
+}

package/src/agent/skill-loader.js ADDED Viewed

@@ -0,0 +1,139 @@
+import fs from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const BUNDLED_SKILLS_DIR = path.resolve(__dirname, "../../template/skills");
+/**
+ * Discover and index meta skills from template/skills/.
+ * Follows Claude Code's pattern: skills are NOT dumped into the system prompt.
+ * Instead, a brief index (name + description) is injected into context.
+ * The agent reads full SKILL.md content on demand via workspace_file or sandbox_exec.
+ *
+ * Skills are organized as:
+ *   template/skills/{lang}/meta-meta/  — System architecture methodology
+ *   template/skills/{lang}/meta/       — Verification domain methodology
+ *   template/skills/{lang}/skill-creator/ — Anthropic's official skill creation toolkit
+ */
+export class SkillLoader {
+  /**
+   * @param {string} [language] - "en" or "zh"
+   * @param {string} [skillsDir] - Override skills directory (default: bundled template)
+   */
+  constructor(language = "en", skillsDir) {
+    this._lang = language;
+    this._skillsDir = skillsDir || BUNDLED_SKILLS_DIR;
+    this._index = null;
+  }
+  /**
+   * Build the skill index by scanning SKILL.md frontmatter.
+   * Cached after first call.
+   * @returns {Array<{name: string, description: string, category: string, path: string}>}
+   */
+  getIndex() {
+    if (this._index) return this._index;
+    this._index = [];
+    const langDir = path.join(this._skillsDir, this._lang);
+    if (!fs.existsSync(langDir)) return this._index;
+    for (const category of ["meta-meta", "meta", "skill-creator"]) {
+      const catDir = path.join(langDir, category);
+      if (!fs.existsSync(catDir)) continue;
+      // skill-creator is a single skill, not a directory of skills
+      const skillMd = path.join(catDir, "SKILL.md");
+      if (fs.existsSync(skillMd)) {
+        const { name, description } = this._parseFrontmatter(skillMd);
+        if (name) {
+          this._index.push({
+            name: name || category,
+            description: description || "",
+            category,
+            path: path.relative(this._skillsDir, catDir),
+          });
+        }
+      }
+      // Check subdirectories (meta-meta/bootstrap-workspace/, etc.)
+      for (const entry of fs.readdirSync(catDir, { withFileTypes: true })) {
+        if (!entry.isDirectory()) continue;
+        const subSkillMd = path.join(catDir, entry.name, "SKILL.md");
+        if (!fs.existsSync(subSkillMd)) continue;
+        const { name, description } = this._parseFrontmatter(subSkillMd);
+        this._index.push({
+          name: name || entry.name,
+          description: description || "",
+          category,
+          path: path.relative(this._skillsDir, path.join(catDir, entry.name)),
+        });
+      }
+    }
+    return this._index;
+  }
+  /**
+   * Format the skill index for injection into agent context.
+   * Brief listing — agent reads full content on demand.
+   * @returns {string}
+   */
+  formatForContext() {
+    const index = this.getIndex();
+    if (index.length === 0) return "";
+    const metaMeta = index.filter((s) => s.category === "meta-meta");
+    const meta = index.filter((s) => s.category === "meta");
+    const other = index.filter((s) => s.category !== "meta-meta" && s.category !== "meta");
+    const lines = ["## Available Methodology Skills",
+      "Read full skill content from the skills/ directory when needed.\n"];
+    if (metaMeta.length) {
+      lines.push("**System Architecture (meta-meta):**");
+      for (const s of metaMeta) {
+        lines.push(`- **${s.name}**: ${s.description.slice(0, 120)}`);
+      }
+      lines.push("");
+    }
+    if (meta.length) {
+      lines.push("**Verification Methodology (meta):**");
+      for (const s of meta) {
+        lines.push(`- **${s.name}**: ${s.description.slice(0, 120)}`);
+      }
+      lines.push("");
+    }
+    if (other.length) {
+      lines.push("**Toolkits:**");
+      for (const s of other) {
+        lines.push(`- **${s.name}**: ${s.description.slice(0, 120)}`);
+      }
+    }
+    return lines.join("\n");
+  }
+  /**
+   * Parse YAML frontmatter from a SKILL.md file.
+   * Only extracts name and description — lightweight.
+   */
+  _parseFrontmatter(filePath) {
+    try {
+      const content = fs.readFileSync(filePath, "utf-8");
+      const match = content.match(/^---\n([\s\S]*?)\n---/);
+      if (!match) return {};
+      const frontmatter = match[1];
+      const name = frontmatter.match(/^name:\s*(.+)$/m)?.[1]?.trim() || "";
+      const description = frontmatter.match(/^description:\s*(.+)$/m)?.[1]?.trim() || "";
+      return { name, description };
+    } catch {
+      return {};
+    }
+  }
+}

package/src/agent/token-counter.js ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Lightweight token estimation without external dependencies.
+ * Uses character-based heuristics: ~4 chars per token for Latin text,
+ * ~1.5 tokens per CJK character.
+ */
+// CJK Unified Ideographs and extensions
+const CJK_REGEX = /[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff]/g;
+/**
+ * Estimate the number of tokens in a string.
+ * @param {string} text
+ * @returns {number}
+ */
+export function estimateTokens(text) {
+  if (!text) return 0;
+  const cjkMatches = text.match(CJK_REGEX);
+  const cjkCount = cjkMatches ? cjkMatches.length : 0;
+  const nonCjkLength = text.length - cjkCount;
+  return Math.ceil(nonCjkLength / 4) + Math.ceil(cjkCount * 1.5);
+}
+/**
+ * Estimate total tokens for an array of OpenAI-format messages.
+ * Accounts for per-message overhead (~4 tokens for role/formatting).
+ * @param {Array<object>} messages
+ * @returns {number}
+ */
+export function estimateMessagesTokens(messages) {
+  let total = 0;
+  for (const msg of messages) {
+    total += 4; // role + formatting overhead
+    if (typeof msg.content === "string") {
+      total += estimateTokens(msg.content);
+    } else if (Array.isArray(msg.content)) {
+      // Anthropic-style content blocks
+      for (const block of msg.content) {
+        if (block.text) total += estimateTokens(block.text);
+        if (block.content) total += estimateTokens(block.content);
+      }
+    }
+    if (msg.tool_calls) {
+      for (const tc of msg.tool_calls) {
+        total += estimateTokens(tc.function?.name || "");
+        total += estimateTokens(tc.function?.arguments || "");
+      }
+    }
+  }
+  return total;
+}
+/**
+ * Format a token count for display (e.g., "45.2k").
+ * @param {number} tokens
+ * @returns {string}
+ */
+export function formatTokenCount(tokens) {
+  if (tokens >= 1000) {
+    return (tokens / 1000).toFixed(1) + "k";
+  }
+  return tokens.toString();
+}

package/src/agent/tools/document-parse.js CHANGED Viewed

@@ -12,13 +12,13 @@ const MIN_CHARS_PER_PAGE = 50;
  * Level 3: OCR models via SiliconFlow — fallback via vision models
  */
 export class DocumentParseTool extends BaseTool {
-  constructor(workspace, { mineruApiUrl, mineruApiKey, siliconflowApiKey, siliconflowBaseUrl, ocrModel } = {}) {
+  constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, siliconflowApiKey, siliconflowBaseUrl, ocrModel } = {}) {
     super();
     this._workspace = workspace;
     this._mineruApiUrl = mineruApiUrl || "";
     this._mineruApiKey = mineruApiKey || "";
-    this._sfApiKey = siliconflowApiKey || "";
-    this._sfBaseUrl = siliconflowBaseUrl || "https://api.siliconflow.cn/v1";
+    this._sfApiKey = llmApiKey || siliconflowApiKey || "";
+    this._sfBaseUrl = llmBaseUrl || siliconflowBaseUrl || "https://api.siliconflow.cn/v1";
     this._ocrModel = ocrModel || "";
   }

package/src/agent/tools/tier-downgrade.js CHANGED Viewed

@@ -72,14 +72,23 @@ export class TierDowngradeTool extends BaseTool {
       }
     }
-    const recommend = targetAcc >= threshold && delta <= 0.05 ? "downgrade" : "keep_current";
+    // Read tier tolerance from .env (default from onboarding config)
+    let tolerance = 0.05;
+    if (fs.existsSync(envPath)) {
+      for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
+        if (line.startsWith("TIER_TOLERANCE=")) {
+          try { tolerance = parseFloat(line.split("=")[1].trim()); }
+          catch { /* ignore */ }
+        }
+      }
+    }
     const report = {
       rule_id: ruleId, current_tier: currentTier, target_tier: targetTier,
       current_accuracy: Math.round(currentAcc * 1000) / 1000,
       target_accuracy: Math.round(targetAcc * 1000) / 1000,
       accuracy_delta: Math.round(delta * 1000) / 1000,
-      threshold, recommendation: recommend, test_count: testInputs.length,
+      threshold, tolerance, test_count: testInputs.length,
     };
     return new ToolResult(JSON.stringify(report, null, 2));
   }

package/src/agent/tools/web-search.js ADDED Viewed

@@ -0,0 +1,107 @@
+import { BaseTool, ToolResult } from "./base.js";
+/**
+ * Web search via Tavily API.
+ * Returns extracted text content from search results.
+ */
+export class WebSearchTool extends BaseTool {
+  /**
+   * @param {string} apiKey - Tavily API key
+   */
+  constructor(apiKey) {
+    super();
+    this._apiKey = apiKey;
+  }
+  get name() { return "web_search"; }
+  get description() {
+    return (
+      "Search the web for information using Tavily. Returns extracted text from top results. " +
+      "IMPORTANT: Always prioritize information from user-provided domain documents " +
+      "(uploaded regulations, sample files, workspace documents) over web search results. " +
+      "Use web search only when: (1) the needed information is not in provided documents, " +
+      "(2) you need to verify or supplement document content with external sources, or " +
+      "(3) the user explicitly asks for web information (e.g., latest LLM model info, API docs)."
+    );
+  }
+  get inputSchema() {
+    return {
+      type: "object",
+      properties: {
+        query: {
+          type: "string",
+          description: "The search query",
+        },
+        search_depth: {
+          type: "string",
+          enum: ["basic", "advanced"],
+          description: "Search depth: 'basic' for fast results, 'advanced' for more thorough search (default: basic)",
+        },
+        max_results: {
+          type: "integer",
+          description: "Maximum number of results to return (default: 5, max: 10)",
+        },
+      },
+      required: ["query"],
+    };
+  }
+  async execute(input) {
+    const query = input.query || "";
+    if (!query.trim()) {
+      return new ToolResult("No query provided", true);
+    }
+    if (!this._apiKey) {
+      return new ToolResult(
+        "Web search is not configured. Set TAVILY_API_KEY in your .env file or global config.",
+        true,
+      );
+    }
+    const searchDepth = input.search_depth || "basic";
+    const maxResults = Math.min(input.max_results || 5, 10);
+    try {
+      const resp = await fetch("https://api.tavily.com/search", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          api_key: this._apiKey,
+          query,
+          search_depth: searchDepth,
+          max_results: maxResults,
+        }),
+        signal: AbortSignal.timeout(15000),
+      });
+      if (!resp.ok) {
+        const text = await resp.text();
+        return new ToolResult(`Tavily API error ${resp.status}: ${text}`, true);
+      }
+      const data = await resp.json();
+      const results = data.results || [];
+      if (results.length === 0) {
+        return new ToolResult(`No results found for: ${query}`);
+      }
+      const lines = [];
+      for (const r of results) {
+        lines.push(`--- ${r.title || "Untitled"} ---`);
+        lines.push(`URL: ${r.url || ""}`);
+        lines.push(r.content || "(no content)");
+        lines.push("");
+      }
+      return new ToolResult(
+        `Found ${results.length} result(s) for "${query}":\n\n${lines.join("\n")}`,
+      );
+    } catch (err) {
+      return new ToolResult(`Web search failed: ${err.message}`, true);
+    }
+  }
+}