npm - selftune - Versions diffs - 0.2.6 → 0.2.9 - Mend

selftune 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

package/README.md +1 -0
package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +1 -0
package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +15 -0
package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
package/apps/local-dashboard/dist/index.html +5 -5
package/cli/selftune/activation-rules.ts +57 -18
package/cli/selftune/agent-guidance.ts +96 -0
package/cli/selftune/alpha-identity.ts +156 -0
package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
package/cli/selftune/alpha-upload/client.ts +113 -0
package/cli/selftune/alpha-upload/flush.ts +191 -0
package/cli/selftune/alpha-upload/index.ts +194 -0
package/cli/selftune/alpha-upload/queue.ts +252 -0
package/cli/selftune/alpha-upload/stage-canonical.ts +251 -0
package/cli/selftune/alpha-upload-contract.ts +52 -0
package/cli/selftune/auth/device-code.ts +110 -0
package/cli/selftune/auto-update.ts +130 -0
package/cli/selftune/badge/badge.ts +19 -9
package/cli/selftune/canonical-export.ts +16 -3
package/cli/selftune/constants.ts +28 -8
package/cli/selftune/contribute/bundle.ts +33 -5
package/cli/selftune/dashboard-contract.ts +32 -1
package/cli/selftune/dashboard-server.ts +215 -693
package/cli/selftune/dashboard.ts +1 -1
package/cli/selftune/eval/baseline.ts +11 -7
package/cli/selftune/eval/hooks-to-evals.ts +39 -15
package/cli/selftune/eval/synthetic-evals.ts +54 -1
package/cli/selftune/evolution/audit.ts +24 -19
package/cli/selftune/evolution/constitutional.ts +176 -0
package/cli/selftune/evolution/evidence.ts +18 -13
package/cli/selftune/evolution/evolve-body.ts +104 -7
package/cli/selftune/evolution/evolve.ts +195 -22
package/cli/selftune/evolution/propose-body.ts +18 -1
package/cli/selftune/evolution/propose-description.ts +27 -2
package/cli/selftune/evolution/rollback.ts +11 -15
package/cli/selftune/export.ts +84 -0
package/cli/selftune/grading/auto-grade.ts +14 -4
package/cli/selftune/grading/grade-session.ts +17 -6
package/cli/selftune/hooks/auto-activate.ts +5 -0
package/cli/selftune/hooks/evolution-guard.ts +25 -11
package/cli/selftune/hooks/prompt-log.ts +23 -9
package/cli/selftune/hooks/session-stop.ts +78 -15
package/cli/selftune/hooks/skill-eval.ts +189 -10
package/cli/selftune/index.ts +274 -2
package/cli/selftune/ingestors/claude-replay.ts +48 -21
package/cli/selftune/init.ts +260 -49
package/cli/selftune/last.ts +7 -7
package/cli/selftune/localdb/db.ts +90 -10
package/cli/selftune/localdb/direct-write.ts +573 -0
package/cli/selftune/localdb/materialize.ts +296 -42
package/cli/selftune/localdb/queries.ts +482 -32
package/cli/selftune/localdb/schema.ts +153 -1
package/cli/selftune/monitoring/watch.ts +27 -8
package/cli/selftune/normalization.ts +88 -15
package/cli/selftune/observability.ts +257 -5
package/cli/selftune/orchestrate.ts +176 -53
package/cli/selftune/quickstart.ts +34 -10
package/cli/selftune/repair/skill-usage.ts +15 -2
package/cli/selftune/routes/actions.ts +77 -0
package/cli/selftune/routes/badge.ts +66 -0
package/cli/selftune/routes/doctor.ts +12 -0
package/cli/selftune/routes/index.ts +14 -0
package/cli/selftune/routes/orchestrate-runs.ts +13 -0
package/cli/selftune/routes/overview.ts +14 -0
package/cli/selftune/routes/report.ts +293 -0
package/cli/selftune/routes/skill-report.ts +230 -0
package/cli/selftune/status.ts +203 -7
package/cli/selftune/sync.ts +14 -1
package/cli/selftune/types.ts +52 -2
package/cli/selftune/utils/jsonl.ts +58 -1
package/cli/selftune/utils/selftune-meta.ts +38 -0
package/cli/selftune/utils/skill-log.ts +30 -4
package/cli/selftune/utils/transcript.ts +15 -0
package/cli/selftune/workflows/workflows.ts +7 -6
package/package.json +11 -6
package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
package/packages/telemetry-contract/fixtures/golden.json +1 -0
package/packages/telemetry-contract/fixtures/index.ts +4 -0
package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
package/packages/telemetry-contract/package.json +6 -1
package/packages/telemetry-contract/src/schemas.ts +196 -0
package/packages/telemetry-contract/src/types.ts +3 -1
package/packages/telemetry-contract/src/validators.ts +3 -1
package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
package/packages/ui/package.json +4 -0
package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
package/packages/ui/src/components/section-cards.tsx +31 -14
package/packages/ui/src/types.ts +1 -0
package/skill/SKILL.md +214 -174
package/skill/Workflows/AlphaUpload.md +45 -0
package/skill/Workflows/Baseline.md +18 -12
package/skill/Workflows/Composability.md +3 -3
package/skill/Workflows/Dashboard.md +39 -91
package/skill/Workflows/Doctor.md +93 -66
package/skill/Workflows/Evals.md +49 -40
package/skill/Workflows/Evolve.md +76 -28
package/skill/Workflows/EvolveBody.md +37 -38
package/skill/Workflows/Initialize.md +145 -26
package/skill/Workflows/Orchestrate.md +11 -2
package/skill/Workflows/Sync.md +23 -0
package/skill/Workflows/Watch.md +2 -5
package/skill/agents/diagnosis-analyst.md +163 -0
package/skill/agents/evolution-reviewer.md +149 -0
package/skill/agents/integration-guide.md +154 -0
package/skill/agents/pattern-analyst.md +149 -0
package/skill/assets/multi-skill-settings.json +1 -1
package/skill/assets/single-skill-settings.json +1 -1
package/skill/references/interactive-config.md +39 -0
package/skill/references/invocation-taxonomy.md +34 -0
package/skill/references/logs.md +15 -1
package/skill/references/setup-patterns.md +3 -3
package/skill/settings_snippet.json +1 -1
package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60

package/cli/selftune/dashboard.ts CHANGED Viewed

@@ -46,7 +46,7 @@ Usage:
   const openBrowser = !args.includes("--no-open");
   const { startDashboardServer } = await import("./dashboard-server.js");
-  const { stop } = await startDashboardServer({ port, openBrowser });
+  const { stop } = await startDashboardServer({ port, openBrowser, runtimeMode: "standalone" });
   await new Promise<void>((resolve) => {
     let closed = false;
     const keepAlive = setInterval(() => {}, 1 << 30);

package/cli/selftune/eval/baseline.ts CHANGED Viewed

@@ -186,14 +186,18 @@ Options:
     const raw = readFileSync(values["eval-set"], "utf-8");
     evalSet = JSON.parse(raw) as EvalEntry[];
   } else {
-    // Build from logs
-    const { QUERY_LOG } = await import("../constants.js");
-    const { readJsonl } = await import("../utils/jsonl.js");
-    const { readEffectiveSkillUsageRecords } = await import("../utils/skill-log.js");
+    // Build from logs via SQLite
+    const { getDb } = await import("../localdb/db.js");
+    const { querySkillUsageRecords, queryQueryLog } = await import("../localdb/queries.js");
     const { buildEvalSet } = await import("./hooks-to-evals.js");
-    const skillRecords = readEffectiveSkillUsageRecords();
-    const queryRecords = readJsonl(QUERY_LOG);
-    evalSet = buildEvalSet(skillRecords, queryRecords, values.skill);
+    const db = getDb();
+    const skillRecords = querySkillUsageRecords(db);
+    const queryRecords = queryQueryLog(db);
+    evalSet = buildEvalSet(
+      skillRecords as Parameters<typeof buildEvalSet>[0],
+      queryRecords as Parameters<typeof buildEvalSet>[1],
+      values.skill,
+    );
   }
   // Detect agent

package/cli/selftune/eval/hooks-to-evals.ts CHANGED Viewed

@@ -4,20 +4,30 @@
  *
  * Converts hook logs into trigger eval sets compatible with run_eval / run_loop.
  *
- * Three input logs (all written automatically by hooks):
- *   ~/.claude/skill_usage_log.jsonl      - queries that DID trigger a skill
- *   ~/.claude/all_queries_log.jsonl      - ALL queries, triggered or not
- *   ~/.claude/session_telemetry_log.jsonl - per-session process metrics (Stop hook)
+ * Default read path is SQLite (via localdb/queries). JSONL fallback is used only
+ * when custom --skill-log / --query-log / --telemetry-log paths are supplied
+ * (test/custom-path override).
+ *
+ * Three underlying log sources (all written automatically by hooks):
+ *   skill_usage     - queries that DID trigger a skill
+ *   query_log       - ALL queries, triggered or not
+ *   session_telemetry - per-session process metrics (Stop hook)
  *
  * For a given skill:
- *   Positives (should_trigger=true)  -> queries in skill_usage_log for that skill
- *   Negatives (should_trigger=false) -> queries in all_queries_log that never triggered
+ *   Positives (should_trigger=true)  -> queries in skill_usage for that skill
+ *   Negatives (should_trigger=false) -> queries in query_log that never triggered
  *                                       that skill (cross-skill AND untriggered queries)
  */
 import { writeFileSync } from "node:fs";
 import { parseArgs } from "node:util";
 import { GENERIC_NEGATIVES, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
+import { getDb } from "../localdb/db.js";
+import {
+  queryQueryLog,
+  querySessionTelemetry,
+  querySkillUsageRecords,
+} from "../localdb/queries.js";
 import type {
   EvalEntry,
   InvocationType,
@@ -32,7 +42,6 @@ import {
   filterActionableSkillUsageRecords,
 } from "../utils/query-filter.js";
 import { seededShuffle } from "../utils/seeded-random.js";
-import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
 import { isHighConfidencePositiveSkillRecord } from "../utils/skill-usage-confidence.js";
 import { generateSyntheticEvals } from "./synthetic-evals.js";
@@ -456,14 +465,29 @@ export async function cliMain(): Promise<void> {
   // --- Log-based mode (original behavior) ---
   const skillLogPath = values["skill-log"] ?? SKILL_LOG;
-  const skillRecords =
-    skillLogPath === SKILL_LOG
-      ? readEffectiveSkillUsageRecords()
-      : readJsonl<SkillUsageRecord>(skillLogPath);
-  const queryRecords = readJsonl<QueryLogRecord>(values["query-log"] ?? QUERY_LOG);
-  const telemetryRecords = readJsonl<SessionTelemetryRecord>(
-    values["telemetry-log"] ?? TELEMETRY_LOG,
-  );
+  const queryLogPath = values["query-log"] ?? QUERY_LOG;
+  const telemetryLogPath = values["telemetry-log"] ?? TELEMETRY_LOG;
+  let skillRecords: SkillUsageRecord[];
+  let queryRecords: QueryLogRecord[];
+  let telemetryRecords: SessionTelemetryRecord[];
+  // SQLite is the default path; JSONL fallback only for custom --*-log overrides
+  if (
+    skillLogPath === SKILL_LOG &&
+    queryLogPath === QUERY_LOG &&
+    telemetryLogPath === TELEMETRY_LOG
+  ) {
+    const db = getDb();
+    skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
+    queryRecords = queryQueryLog(db) as QueryLogRecord[];
+    telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
+  } else {
+    // test/custom-path fallback
+    skillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
+    queryRecords = readJsonl<QueryLogRecord>(queryLogPath);
+    telemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
+  }
   if (values["list-skills"]) {
     listSkills(skillRecords, queryRecords, telemetryRecords);

package/cli/selftune/eval/synthetic-evals.ts CHANGED Viewed

@@ -37,6 +37,7 @@ export function buildSyntheticPrompt(
   skillName: string,
   maxPositives: number,
   maxNegatives: number,
+  realExamples?: { positive: string[]; negative: string[] },
 ): { system: string; user: string } {
   const system = `You are generating test queries for a coding agent skill. Given the skill description below, generate realistic user queries.
@@ -55,13 +56,27 @@ For NEGATIVE queries (should NOT trigger this skill):
 Output as JSON array with no surrounding text:
 [{"query": "...", "should_trigger": true, "invocation_type": "explicit|implicit|contextual|negative"}]`;
-  const user = `Skill name: ${skillName}
+  let user = `Skill name: ${skillName}
 Skill content:
 ${skillContent}
 Generate exactly ${maxPositives} positive queries (should_trigger: true) and ${maxNegatives} negative queries (should_trigger: false). Return ONLY the JSON array.`;
+  if (realExamples && (realExamples.positive.length > 0 || realExamples.negative.length > 0)) {
+    const parts: string[] = ["\n\nReal user queries for style and phrasing reference:"];
+    if (realExamples.positive.length > 0) {
+      parts.push("Queries that triggered this skill:");
+      parts.push(...realExamples.positive.map((q) => `  - "${q}"`));
+    }
+    if (realExamples.negative.length > 0) {
+      parts.push("Queries that did NOT trigger (general queries):");
+      parts.push(...realExamples.negative.map((q) => `  - "${q}"`));
+    }
+    parts.push("\nGenerate queries that match this natural phrasing style.");
+    user += parts.join("\n");
+  }
   return { system, user };
 }
@@ -160,11 +175,49 @@ export async function generateSyntheticEvals(
   const skillContent = readFileSync(skillPath, "utf-8");
+  // Load real query examples from the database for few-shot style guidance.
+  // Uses dynamic imports since SQLite may not be available in all contexts.
+  let realExamples: { positive: string[]; negative: string[] } | undefined;
+  try {
+    const { getDb } = await import("../localdb/db.js");
+    const { querySkillUsageRecords, queryQueryLog } = await import("../localdb/queries.js");
+    const { isHighConfidencePositiveSkillRecord } = await import(
+      "../utils/skill-usage-confidence.js"
+    );
+    const db = getDb();
+    // Positives: high-confidence triggered records for this skill
+    const skillRecords = querySkillUsageRecords(db);
+    const positive = skillRecords
+      .filter((r) => isHighConfidencePositiveSkillRecord(r, skillName))
+      .map((r) => r.query)
+      .filter((q): q is string => typeof q === "string" && q.length > 0)
+      .slice(0, 5);
+    // Negatives: from all_queries, excluding known positives
+    const posSet = new Set(positive.map((q: string) => q.toLowerCase()));
+    const allQueries = queryQueryLog(db);
+    const negative = allQueries
+      .map((r) => r.query)
+      .filter(
+        (q): q is string => typeof q === "string" && q.length > 0 && !posSet.has(q.toLowerCase()),
+      )
+      .slice(0, 5);
+    if (positive.length > 0) {
+      realExamples = { positive, negative };
+    }
+  } catch {
+    // fail-open: synthetic gen works without real examples
+  }
   const { system, user } = buildSyntheticPrompt(
     skillContent,
     skillName,
     maxPositives,
     maxNegatives,
+    realExamples,
   );
   const raw = await callLlm(system, user, agent, options.modelFlag);

package/cli/selftune/evolution/audit.ts CHANGED Viewed

@@ -1,33 +1,37 @@
 /**
  * Evolution audit trail: append, read, and query audit entries.
+ *
+ * Uses SQLite as the primary store via getDb(). Tests inject an in-memory
+ * database via _setTestDb() for isolation.
  */
-import { EVOLUTION_AUDIT_LOG } from "../constants.js";
+import { getDb } from "../localdb/db.js";
+import { writeEvolutionAuditToDb } from "../localdb/direct-write.js";
+import { queryEvolutionAudit } from "../localdb/queries.js";
 import type { EvolutionAuditEntry } from "../types.js";
-import { appendJsonl, readJsonl } from "../utils/jsonl.js";
-/** Append an audit entry to the evolution audit log. */
-export function appendAuditEntry(
-  entry: EvolutionAuditEntry,
-  logPath: string = EVOLUTION_AUDIT_LOG,
-): void {
-  appendJsonl(logPath, entry);
+/** Append an audit entry to the evolution audit log (SQLite). */
+export function appendAuditEntry(entry: EvolutionAuditEntry, _logPath?: string): void {
+  writeEvolutionAuditToDb(entry);
 }
 /**
  * Read all audit entries, optionally filtered by skill name.
  *
- * When skillName is provided, returns only entries whose `details` field
- * contains the skill name (case-insensitive match).
+ * @param skillName - Optional skill name to filter by
  */
-export function readAuditTrail(
-  skillName?: string,
-  logPath: string = EVOLUTION_AUDIT_LOG,
-): EvolutionAuditEntry[] {
-  const entries = readJsonl<EvolutionAuditEntry>(logPath);
+export function readAuditTrail(skillName?: string, _logPath?: string): EvolutionAuditEntry[] {
+  const db = getDb();
+  const entries = queryEvolutionAudit(db, skillName) as EvolutionAuditEntry[];
   if (!skillName) return entries;
+  // queryEvolutionAudit filters by skill_name field; also filter by details
+  // for backward compatibility (some entries may have skill name in details only)
   const needle = skillName.toLowerCase();
-  return entries.filter((e) => (e.details ?? "").toLowerCase().includes(needle));
+  return entries.length > 0
+    ? entries
+    : (queryEvolutionAudit(db) as EvolutionAuditEntry[]).filter((e) =>
+        (e.details ?? "").toLowerCase().includes(needle),
+      );
 }
 /**
@@ -36,9 +40,10 @@ export function readAuditTrail(
  */
 export function getLastDeployedProposal(
   skillName: string,
-  logPath: string = EVOLUTION_AUDIT_LOG,
+  _logPath?: string,
 ): EvolutionAuditEntry | null {
-  const entries = readAuditTrail(skillName, logPath);
+  const entries = readAuditTrail(skillName);
   const deployed = entries.filter((e) => e.action === "deployed");
-  return deployed.length > 0 ? deployed[deployed.length - 1] : null;
+  // Results are DESC-ordered from SQLite, so first match is most recent
+  return deployed.length > 0 ? deployed[0] : null;
 }

package/cli/selftune/evolution/constitutional.ts ADDED Viewed

@@ -0,0 +1,176 @@
+/**
+ * constitutional.ts
+ *
+ * Deterministic pre-validation gate for evolution proposals. Runs before
+ * confidence checks and LLM validation to reject obviously bad proposals
+ * cheaply — no LLM calls required.
+ *
+ * Four principles:
+ *   1. Size constraint — char limit + word-count ratio
+ *   2. No XML injection — reject embedded XML tags
+ *   3. No unbounded broadening — reject bare "all/any/every/everything"
+ *   4. Anchor preservation — preserve USE WHEN triggers and $skillName refs
+ */
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface ConstitutionalResult {
+  passed: boolean;
+  violations: string[];
+}
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function wordCount(text: string): number {
+  return text.split(/\s+/).filter(Boolean).length;
+}
+/**
+ * Extract the sentence containing the match index. Splits on sentence-ending
+ * punctuation (`.` `!` `?`) followed by whitespace, but avoids splitting on
+ * common abbreviations like "e.g." or "i.e.".
+ */
+function sentenceContaining(text: string, matchIndex: number): string {
+  // Split only when the next token looks like a new sentence.
+  const sentences = text.split(/(?<=[.!?])\s+(?=[A-Z0-9"'‘“])/);
+  let offset = 0;
+  for (const sentence of sentences) {
+    const realOffset = text.indexOf(sentence, offset);
+    if (realOffset === -1) break;
+    if (matchIndex >= realOffset && matchIndex < realOffset + sentence.length) {
+      return sentence;
+    }
+    offset = realOffset + sentence.length;
+  }
+  return text; // fallback: treat entire text as one sentence
+}
+const ENUMERATION_MARKERS = /\b(?:including|such as|like)\b|e\.g\.|,\s*\w+\s*,/i;
+// ---------------------------------------------------------------------------
+// Main check
+// ---------------------------------------------------------------------------
+export function checkConstitution(
+  proposed: string,
+  original: string,
+  _skillName: string,
+): ConstitutionalResult {
+  const violations: string[] = [];
+  // -------------------------------------------------------------------------
+  // Principle 1: Size constraint
+  // -------------------------------------------------------------------------
+  if (proposed.length > 8192) {
+    violations.push(`Size: ${proposed.length} chars exceeds 8192 limit`);
+  }
+  const origWords = wordCount(original);
+  const propWords = wordCount(proposed);
+  if (origWords > 0) {
+    const ratio = propWords / origWords;
+    if (ratio > 3.0) {
+      violations.push(
+        `Size: ${propWords} words is ${ratio.toFixed(1)}x original (${origWords} words), exceeds 3.0x limit`,
+      );
+    }
+    if (ratio < 0.3) {
+      violations.push(
+        `Size: ${propWords} words is ${ratio.toFixed(1)}x original (${origWords} words), below 0.3x limit`,
+      );
+    }
+  }
+  // -------------------------------------------------------------------------
+  // Principle 2: No XML injection
+  // -------------------------------------------------------------------------
+  if (/<[a-zA-Z][^>]*>/.test(proposed)) {
+    violations.push("XML injection: proposed description contains XML/HTML tags");
+  }
+  // -------------------------------------------------------------------------
+  // Principle 3: No unbounded broadening
+  // -------------------------------------------------------------------------
+  const broadenPattern = /\b(all|any|every|everything)\b/gi;
+  let match: RegExpExecArray | null = broadenPattern.exec(proposed);
+  while (match !== null) {
+    const sentence = sentenceContaining(proposed, match.index);
+    if (!ENUMERATION_MARKERS.test(sentence)) {
+      violations.push(
+        `Unbounded broadening: "${match[0]}" at position ${match.index} without enumeration qualifier`,
+      );
+    }
+    match = broadenPattern.exec(proposed);
+  }
+  // -------------------------------------------------------------------------
+  // Principle 4: Anchor preservation
+  // -------------------------------------------------------------------------
+  // Check for USE WHEN triggers
+  if (/USE WHEN/i.test(original) && !/USE WHEN/i.test(proposed)) {
+    violations.push(
+      'Anchor: original contains "USE WHEN" trigger phrase that is missing in proposed',
+    );
+  }
+  // Check for $variable references
+  const dollarRefs = original.match(/\$[A-Za-z0-9_-]+/g);
+  if (dollarRefs) {
+    const proposedDollarRefs = new Set(proposed.match(/\$[A-Za-z0-9_-]+/g) ?? []);
+    for (const ref of dollarRefs) {
+      if (!proposedDollarRefs.has(ref)) {
+        violations.push(`Anchor: original contains "${ref}" reference that is missing in proposed`);
+      }
+    }
+  }
+  return {
+    passed: violations.length === 0,
+    violations,
+  };
+}
+// ---------------------------------------------------------------------------
+// Size-only check (for body evolution)
+// ---------------------------------------------------------------------------
+/**
+ * Body-specific constitutional check. Only enforces the word-count ratio
+ * (0.3x–3.0x of original). The 1024-char absolute limit does not apply
+ * to body text since bodies are typically much larger than descriptions.
+ */
+export function checkConstitutionSizeOnly(
+  proposed: string,
+  original: string,
+): ConstitutionalResult {
+  const violations: string[] = [];
+  const origWords = wordCount(original);
+  const propWords = wordCount(proposed);
+  // Only enforce word-count ratio when the original is substantial enough
+  // for the ratio to be meaningful (at least 10 words).
+  if (origWords >= 10) {
+    const ratio = propWords / origWords;
+    if (ratio > 3.0) {
+      violations.push(
+        `Size: ${propWords} words is ${ratio.toFixed(1)}x original (${origWords} words), exceeds 3.0x limit`,
+      );
+    }
+    if (ratio < 0.3) {
+      violations.push(
+        `Size: ${propWords} words is ${ratio.toFixed(1)}x original (${origWords} words), below 0.3x limit`,
+      );
+    }
+  }
+  return {
+    passed: violations.length === 0,
+    violations,
+  };
+}

package/cli/selftune/evolution/evidence.ts CHANGED Viewed

@@ -1,26 +1,31 @@
 /**
  * Evolution evidence trail: append and read proposal/eval artifacts that power
  * explainable dashboard drill-downs.
+ *
+ * Uses SQLite as the primary store via getDb(). Tests inject an in-memory
+ * database via _setTestDb() for isolation.
  */
-import { EVOLUTION_EVIDENCE_LOG } from "../constants.js";
+import { getDb } from "../localdb/db.js";
+import { writeEvolutionEvidenceToDb } from "../localdb/direct-write.js";
+import { queryEvolutionEvidence } from "../localdb/queries.js";
 import type { EvolutionEvidenceEntry } from "../types.js";
-import { appendJsonl, readJsonl } from "../utils/jsonl.js";
-/** Append a structured evidence artifact to the evolution evidence log. */
+/** Append a structured evidence artifact to the evolution evidence log (SQLite). */
 export function appendEvidenceEntry(
   entry: EvolutionEvidenceEntry,
-  logPath: string = EVOLUTION_EVIDENCE_LOG,
+  /** @deprecated Unused; retained for API compatibility during migration */
+  _logPath?: string,
 ): void {
-  appendJsonl(logPath, entry);
+  writeEvolutionEvidenceToDb(entry);
 }
-/** Read all evidence entries, optionally filtered by exact skill name. */
-export function readEvidenceTrail(
-  skillName?: string,
-  logPath: string = EVOLUTION_EVIDENCE_LOG,
-): EvolutionEvidenceEntry[] {
-  const entries = readJsonl<EvolutionEvidenceEntry>(logPath);
-  if (!skillName) return entries;
-  return entries.filter((entry) => entry.skill_name === skillName);
+/**
+ * Read all evidence entries, optionally filtered by exact skill name.
+ *
+ * @param skillName - Optional skill name to filter by
+ */
+export function readEvidenceTrail(skillName?: string, _logPath?: string): EvolutionEvidenceEntry[] {
+  const db = getDb();
+  return queryEvolutionEvidence(db, skillName) as EvolutionEvidenceEntry[];
 }

package/cli/selftune/evolution/evolve-body.ts CHANGED Viewed

@@ -9,9 +9,10 @@
 import { existsSync, readFileSync } from "node:fs";
 import { parseArgs } from "node:util";
-import { QUERY_LOG } from "../constants.js";
 import { buildEvalSet } from "../eval/hooks-to-evals.js";
 import { readGradingResultsForSkill } from "../grading/results.js";
+import { getDb } from "../localdb/db.js";
+import { queryQueryLog, querySkillUsageRecords } from "../localdb/queries.js";
 import type {
   BodyEvolutionProposal,
   BodyValidationResult,
@@ -24,13 +25,13 @@ import type {
   QueryLogRecord,
   SkillUsageRecord,
 } from "../types.js";
-import { readJsonl } from "../utils/jsonl.js";
-import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
 import { appendAuditEntry } from "./audit.js";
+import { checkConstitutionSizeOnly } from "./constitutional.js";
 import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
 import { appendEvidenceEntry } from "./evidence.js";
 import { extractFailurePatterns } from "./extract-patterns.js";
-import { generateBodyProposal } from "./propose-body.js";
+import { type ExecutionContext, generateBodyProposal } from "./propose-body.js";
 import { generateRoutingProposal } from "./propose-routing.js";
 import { refineBodyProposal } from "./refine-body.js";
 import { validateBodyProposal } from "./validate-body.js";
@@ -85,7 +86,7 @@ export interface EvolveBodyDeps {
   appendAuditEntry?: typeof import("./audit.js").appendAuditEntry;
   appendEvidenceEntry?: typeof import("./evidence.js").appendEvidenceEntry;
   buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet;
-  readEffectiveSkillUsageRecords?: typeof import("../utils/skill-log.js").readEffectiveSkillUsageRecords;
+  readEffectiveSkillUsageRecords?: () => SkillUsageRecord[];
   readFileSync?: typeof readFileSync;
   writeFileSync?: (path: string, data: string, encoding: string) => void;
 }
@@ -143,7 +144,11 @@ export async function evolveBody(
   const _appendEvidenceEntry = _deps.appendEvidenceEntry ?? appendEvidenceEntry;
   const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet;
   const _readEffectiveSkillUsageRecords =
-    _deps.readEffectiveSkillUsageRecords ?? readEffectiveSkillUsageRecords;
+    _deps.readEffectiveSkillUsageRecords ??
+    (() => {
+      const db = getDb();
+      return querySkillUsageRecords(db) as SkillUsageRecord[];
+    });
   const _readFileSync = _deps.readFileSync ?? readFileSync;
   const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
@@ -198,7 +203,8 @@ export async function evolveBody(
       }
       evalSet = parsed as EvalEntry[];
     } else {
-      const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
+      const dbForQuery = getDb();
+      const queryRecords = queryQueryLog(dbForQuery) as QueryLogRecord[];
       evalSet = _buildEvalSet(skillUsage, queryRecords, skillName);
     }
@@ -222,6 +228,64 @@ export async function evolveBody(
     const missedQueries = failurePatterns.flatMap((p) => p.missed_queries);
+    // Compute execution context from session telemetry (fail-open)
+    let executionContext: ExecutionContext | undefined;
+    try {
+      const { querySessionTelemetry } = await import("../localdb/queries.js");
+      const db = getDb();
+      const allTelemetry = querySessionTelemetry(db);
+      // Find session IDs that used this skill
+      const skillSessionIds = new Set(
+        skillUsage
+          .filter((r) => r.skill_name?.toLowerCase() === skillName.toLowerCase() && r.triggered)
+          .map((r) => r.session_id),
+      );
+      // Filter telemetry to skill sessions
+      const telemetryForSkill = allTelemetry.filter((t) => skillSessionIds.has(t.session_id));
+      if (telemetryForSkill.length > 0) {
+        const mean = (arr: number[]) => arr.reduce((a, b) => a + b, 0) / arr.length;
+        const toolCallCounts = telemetryForSkill.map((t) => t.total_tool_calls ?? 0);
+        const errorCounts = telemetryForSkill.map((t) => t.errors_encountered ?? 0);
+        const turnCounts = telemetryForSkill.map((t) => t.assistant_turns ?? 0);
+        // Count tool frequency across all sessions
+        const toolFreq = new Map<string, number>();
+        const failureToolFreq = new Map<string, number>();
+        for (const t of telemetryForSkill) {
+          const tools: Record<string, number> = t.tool_calls ?? {};
+          const isFailure = (t.errors_encountered ?? 0) > 2;
+          for (const [tool, count] of Object.entries(tools)) {
+            toolFreq.set(tool, (toolFreq.get(tool) ?? 0) + count);
+            if (isFailure) {
+              failureToolFreq.set(tool, (failureToolFreq.get(tool) ?? 0) + count);
+            }
+          }
+        }
+        const topN = (freq: Map<string, number>, n: number) =>
+          [...freq.entries()]
+            .sort((a, b) => b[1] - a[1])
+            .slice(0, n)
+            .map(([k]) => k);
+        executionContext = {
+          avgToolCalls: mean(toolCallCounts),
+          avgErrors: mean(errorCounts),
+          avgTurns: mean(turnCounts),
+          commonTools: topN(toolFreq, 5),
+          failureTools: topN(failureToolFreq, 3),
+        };
+      }
+    } catch {
+      // fail-open: body evolution works without execution context
+    }
     // Step 4: Generate -> validate -> refine loop
     let lastProposal: BodyEvolutionProposal | null = null;
     let lastValidation: BodyValidationResult | null = null;
@@ -253,6 +317,7 @@ export async function evolveBody(
             teacherAgent,
             teacherModel,
             fewShotExamples,
+            executionContext,
           );
         }
       } else if (lastProposal && lastValidation) {
@@ -285,6 +350,38 @@ export async function evolveBody(
         eval_set: evalSet,
       });
+      // Constitutional size check (deterministic, pre-validation — body only)
+      if (target === "body") {
+        const constitution = checkConstitutionSizeOnly(
+          proposal.proposed_body,
+          proposal.original_body,
+        );
+        if (!constitution.passed) {
+          const reason = `Constitutional: ${constitution.violations.join("; ")}`;
+          recordAudit(proposal.proposal_id, "rejected", reason);
+          recordEvidence({
+            timestamp: new Date().toISOString(),
+            proposal_id: proposal.proposal_id,
+            skill_name: skillName,
+            skill_path: skillPath,
+            target,
+            stage: "rejected",
+            rationale: proposal.rationale,
+            confidence: proposal.confidence,
+            details: reason,
+            original_text: proposal.original_body,
+            proposed_text: proposal.proposed_body,
+          });
+          return {
+            proposal: lastProposal,
+            validation: null,
+            deployed: false,
+            auditEntries,
+            reason,
+          };
+        }
+      }
       // Check confidence threshold
       if (proposal.confidence < confidenceThreshold) {
         recordAudit(