selftune 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/README.md +1 -0
  2. package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +1 -0
  3. package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +15 -0
  4. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
  5. package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
  7. package/apps/local-dashboard/dist/index.html +5 -5
  8. package/cli/selftune/activation-rules.ts +57 -18
  9. package/cli/selftune/agent-guidance.ts +96 -0
  10. package/cli/selftune/alpha-identity.ts +156 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
  12. package/cli/selftune/alpha-upload/client.ts +113 -0
  13. package/cli/selftune/alpha-upload/flush.ts +191 -0
  14. package/cli/selftune/alpha-upload/index.ts +194 -0
  15. package/cli/selftune/alpha-upload/queue.ts +252 -0
  16. package/cli/selftune/alpha-upload/stage-canonical.ts +251 -0
  17. package/cli/selftune/alpha-upload-contract.ts +52 -0
  18. package/cli/selftune/auth/device-code.ts +110 -0
  19. package/cli/selftune/auto-update.ts +130 -0
  20. package/cli/selftune/badge/badge.ts +19 -9
  21. package/cli/selftune/canonical-export.ts +16 -3
  22. package/cli/selftune/constants.ts +28 -8
  23. package/cli/selftune/contribute/bundle.ts +33 -5
  24. package/cli/selftune/dashboard-contract.ts +32 -1
  25. package/cli/selftune/dashboard-server.ts +215 -693
  26. package/cli/selftune/dashboard.ts +1 -1
  27. package/cli/selftune/eval/baseline.ts +11 -7
  28. package/cli/selftune/eval/hooks-to-evals.ts +39 -15
  29. package/cli/selftune/eval/synthetic-evals.ts +54 -1
  30. package/cli/selftune/evolution/audit.ts +24 -19
  31. package/cli/selftune/evolution/constitutional.ts +176 -0
  32. package/cli/selftune/evolution/evidence.ts +18 -13
  33. package/cli/selftune/evolution/evolve-body.ts +104 -7
  34. package/cli/selftune/evolution/evolve.ts +195 -22
  35. package/cli/selftune/evolution/propose-body.ts +18 -1
  36. package/cli/selftune/evolution/propose-description.ts +27 -2
  37. package/cli/selftune/evolution/rollback.ts +11 -15
  38. package/cli/selftune/export.ts +84 -0
  39. package/cli/selftune/grading/auto-grade.ts +14 -4
  40. package/cli/selftune/grading/grade-session.ts +17 -6
  41. package/cli/selftune/hooks/auto-activate.ts +5 -0
  42. package/cli/selftune/hooks/evolution-guard.ts +25 -11
  43. package/cli/selftune/hooks/prompt-log.ts +23 -9
  44. package/cli/selftune/hooks/session-stop.ts +78 -15
  45. package/cli/selftune/hooks/skill-eval.ts +189 -10
  46. package/cli/selftune/index.ts +274 -2
  47. package/cli/selftune/ingestors/claude-replay.ts +48 -21
  48. package/cli/selftune/init.ts +260 -49
  49. package/cli/selftune/last.ts +7 -7
  50. package/cli/selftune/localdb/db.ts +90 -10
  51. package/cli/selftune/localdb/direct-write.ts +573 -0
  52. package/cli/selftune/localdb/materialize.ts +296 -42
  53. package/cli/selftune/localdb/queries.ts +482 -32
  54. package/cli/selftune/localdb/schema.ts +153 -1
  55. package/cli/selftune/monitoring/watch.ts +27 -8
  56. package/cli/selftune/normalization.ts +88 -15
  57. package/cli/selftune/observability.ts +257 -5
  58. package/cli/selftune/orchestrate.ts +176 -53
  59. package/cli/selftune/quickstart.ts +34 -10
  60. package/cli/selftune/repair/skill-usage.ts +15 -2
  61. package/cli/selftune/routes/actions.ts +77 -0
  62. package/cli/selftune/routes/badge.ts +66 -0
  63. package/cli/selftune/routes/doctor.ts +12 -0
  64. package/cli/selftune/routes/index.ts +14 -0
  65. package/cli/selftune/routes/orchestrate-runs.ts +13 -0
  66. package/cli/selftune/routes/overview.ts +14 -0
  67. package/cli/selftune/routes/report.ts +293 -0
  68. package/cli/selftune/routes/skill-report.ts +230 -0
  69. package/cli/selftune/status.ts +203 -7
  70. package/cli/selftune/sync.ts +14 -1
  71. package/cli/selftune/types.ts +52 -2
  72. package/cli/selftune/utils/jsonl.ts +58 -1
  73. package/cli/selftune/utils/selftune-meta.ts +38 -0
  74. package/cli/selftune/utils/skill-log.ts +30 -4
  75. package/cli/selftune/utils/transcript.ts +15 -0
  76. package/cli/selftune/workflows/workflows.ts +7 -6
  77. package/package.json +11 -6
  78. package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
  79. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
  80. package/packages/telemetry-contract/fixtures/golden.json +1 -0
  81. package/packages/telemetry-contract/fixtures/index.ts +4 -0
  82. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
  83. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
  84. package/packages/telemetry-contract/package.json +6 -1
  85. package/packages/telemetry-contract/src/schemas.ts +196 -0
  86. package/packages/telemetry-contract/src/types.ts +3 -1
  87. package/packages/telemetry-contract/src/validators.ts +3 -1
  88. package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
  89. package/packages/ui/package.json +4 -0
  90. package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
  91. package/packages/ui/src/components/section-cards.tsx +31 -14
  92. package/packages/ui/src/types.ts +1 -0
  93. package/skill/SKILL.md +214 -174
  94. package/skill/Workflows/AlphaUpload.md +45 -0
  95. package/skill/Workflows/Baseline.md +18 -12
  96. package/skill/Workflows/Composability.md +3 -3
  97. package/skill/Workflows/Dashboard.md +39 -91
  98. package/skill/Workflows/Doctor.md +93 -66
  99. package/skill/Workflows/Evals.md +49 -40
  100. package/skill/Workflows/Evolve.md +76 -28
  101. package/skill/Workflows/EvolveBody.md +37 -38
  102. package/skill/Workflows/Initialize.md +145 -26
  103. package/skill/Workflows/Orchestrate.md +11 -2
  104. package/skill/Workflows/Sync.md +23 -0
  105. package/skill/Workflows/Watch.md +2 -5
  106. package/skill/agents/diagnosis-analyst.md +163 -0
  107. package/skill/agents/evolution-reviewer.md +149 -0
  108. package/skill/agents/integration-guide.md +154 -0
  109. package/skill/agents/pattern-analyst.md +149 -0
  110. package/skill/assets/multi-skill-settings.json +1 -1
  111. package/skill/assets/single-skill-settings.json +1 -1
  112. package/skill/references/interactive-config.md +39 -0
  113. package/skill/references/invocation-taxonomy.md +34 -0
  114. package/skill/references/logs.md +15 -1
  115. package/skill/references/setup-patterns.md +3 -3
  116. package/skill/settings_snippet.json +1 -1
  117. package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
  118. package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
  119. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60
@@ -46,7 +46,7 @@ Usage:
46
46
 
47
47
  const openBrowser = !args.includes("--no-open");
48
48
  const { startDashboardServer } = await import("./dashboard-server.js");
49
- const { stop } = await startDashboardServer({ port, openBrowser });
49
+ const { stop } = await startDashboardServer({ port, openBrowser, runtimeMode: "standalone" });
50
50
  await new Promise<void>((resolve) => {
51
51
  let closed = false;
52
52
  const keepAlive = setInterval(() => {}, 1 << 30);
@@ -186,14 +186,18 @@ Options:
186
186
  const raw = readFileSync(values["eval-set"], "utf-8");
187
187
  evalSet = JSON.parse(raw) as EvalEntry[];
188
188
  } else {
189
- // Build from logs
190
- const { QUERY_LOG } = await import("../constants.js");
191
- const { readJsonl } = await import("../utils/jsonl.js");
192
- const { readEffectiveSkillUsageRecords } = await import("../utils/skill-log.js");
189
+ // Build from logs via SQLite
190
+ const { getDb } = await import("../localdb/db.js");
191
+ const { querySkillUsageRecords, queryQueryLog } = await import("../localdb/queries.js");
193
192
  const { buildEvalSet } = await import("./hooks-to-evals.js");
194
- const skillRecords = readEffectiveSkillUsageRecords();
195
- const queryRecords = readJsonl(QUERY_LOG);
196
- evalSet = buildEvalSet(skillRecords, queryRecords, values.skill);
193
+ const db = getDb();
194
+ const skillRecords = querySkillUsageRecords(db);
195
+ const queryRecords = queryQueryLog(db);
196
+ evalSet = buildEvalSet(
197
+ skillRecords as Parameters<typeof buildEvalSet>[0],
198
+ queryRecords as Parameters<typeof buildEvalSet>[1],
199
+ values.skill,
200
+ );
197
201
  }
198
202
 
199
203
  // Detect agent
@@ -4,20 +4,30 @@
4
4
  *
5
5
  * Converts hook logs into trigger eval sets compatible with run_eval / run_loop.
6
6
  *
7
- * Three input logs (all written automatically by hooks):
8
- * ~/.claude/skill_usage_log.jsonl - queries that DID trigger a skill
9
- * ~/.claude/all_queries_log.jsonl - ALL queries, triggered or not
10
- * ~/.claude/session_telemetry_log.jsonl - per-session process metrics (Stop hook)
7
+ * Default read path is SQLite (via localdb/queries). JSONL fallback is used only
8
+ * when custom --skill-log / --query-log / --telemetry-log paths are supplied
9
+ * (test/custom-path override).
10
+ *
11
+ * Three underlying log sources (all written automatically by hooks):
12
+ * skill_usage - queries that DID trigger a skill
13
+ * query_log - ALL queries, triggered or not
14
+ * session_telemetry - per-session process metrics (Stop hook)
11
15
  *
12
16
  * For a given skill:
13
- * Positives (should_trigger=true) -> queries in skill_usage_log for that skill
14
- * Negatives (should_trigger=false) -> queries in all_queries_log that never triggered
17
+ * Positives (should_trigger=true) -> queries in skill_usage for that skill
18
+ * Negatives (should_trigger=false) -> queries in query_log that never triggered
15
19
  * that skill (cross-skill AND untriggered queries)
16
20
  */
17
21
 
18
22
  import { writeFileSync } from "node:fs";
19
23
  import { parseArgs } from "node:util";
20
24
  import { GENERIC_NEGATIVES, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
25
+ import { getDb } from "../localdb/db.js";
26
+ import {
27
+ queryQueryLog,
28
+ querySessionTelemetry,
29
+ querySkillUsageRecords,
30
+ } from "../localdb/queries.js";
21
31
  import type {
22
32
  EvalEntry,
23
33
  InvocationType,
@@ -32,7 +42,6 @@ import {
32
42
  filterActionableSkillUsageRecords,
33
43
  } from "../utils/query-filter.js";
34
44
  import { seededShuffle } from "../utils/seeded-random.js";
35
- import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
36
45
  import { isHighConfidencePositiveSkillRecord } from "../utils/skill-usage-confidence.js";
37
46
  import { generateSyntheticEvals } from "./synthetic-evals.js";
38
47
 
@@ -456,14 +465,29 @@ export async function cliMain(): Promise<void> {
456
465
 
457
466
  // --- Log-based mode (original behavior) ---
458
467
  const skillLogPath = values["skill-log"] ?? SKILL_LOG;
459
- const skillRecords =
460
- skillLogPath === SKILL_LOG
461
- ? readEffectiveSkillUsageRecords()
462
- : readJsonl<SkillUsageRecord>(skillLogPath);
463
- const queryRecords = readJsonl<QueryLogRecord>(values["query-log"] ?? QUERY_LOG);
464
- const telemetryRecords = readJsonl<SessionTelemetryRecord>(
465
- values["telemetry-log"] ?? TELEMETRY_LOG,
466
- );
468
+ const queryLogPath = values["query-log"] ?? QUERY_LOG;
469
+ const telemetryLogPath = values["telemetry-log"] ?? TELEMETRY_LOG;
470
+
471
+ let skillRecords: SkillUsageRecord[];
472
+ let queryRecords: QueryLogRecord[];
473
+ let telemetryRecords: SessionTelemetryRecord[];
474
+
475
+ // SQLite is the default path; JSONL fallback only for custom --*-log overrides
476
+ if (
477
+ skillLogPath === SKILL_LOG &&
478
+ queryLogPath === QUERY_LOG &&
479
+ telemetryLogPath === TELEMETRY_LOG
480
+ ) {
481
+ const db = getDb();
482
+ skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
483
+ queryRecords = queryQueryLog(db) as QueryLogRecord[];
484
+ telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
485
+ } else {
486
+ // test/custom-path fallback
487
+ skillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
488
+ queryRecords = readJsonl<QueryLogRecord>(queryLogPath);
489
+ telemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
490
+ }
467
491
 
468
492
  if (values["list-skills"]) {
469
493
  listSkills(skillRecords, queryRecords, telemetryRecords);
@@ -37,6 +37,7 @@ export function buildSyntheticPrompt(
37
37
  skillName: string,
38
38
  maxPositives: number,
39
39
  maxNegatives: number,
40
+ realExamples?: { positive: string[]; negative: string[] },
40
41
  ): { system: string; user: string } {
41
42
  const system = `You are generating test queries for a coding agent skill. Given the skill description below, generate realistic user queries.
42
43
 
@@ -55,13 +56,27 @@ For NEGATIVE queries (should NOT trigger this skill):
55
56
  Output as JSON array with no surrounding text:
56
57
  [{"query": "...", "should_trigger": true, "invocation_type": "explicit|implicit|contextual|negative"}]`;
57
58
 
58
- const user = `Skill name: ${skillName}
59
+ let user = `Skill name: ${skillName}
59
60
 
60
61
  Skill content:
61
62
  ${skillContent}
62
63
 
63
64
  Generate exactly ${maxPositives} positive queries (should_trigger: true) and ${maxNegatives} negative queries (should_trigger: false). Return ONLY the JSON array.`;
64
65
 
66
+ if (realExamples && (realExamples.positive.length > 0 || realExamples.negative.length > 0)) {
67
+ const parts: string[] = ["\n\nReal user queries for style and phrasing reference:"];
68
+ if (realExamples.positive.length > 0) {
69
+ parts.push("Queries that triggered this skill:");
70
+ parts.push(...realExamples.positive.map((q) => ` - "${q}"`));
71
+ }
72
+ if (realExamples.negative.length > 0) {
73
+ parts.push("Queries that did NOT trigger (general queries):");
74
+ parts.push(...realExamples.negative.map((q) => ` - "${q}"`));
75
+ }
76
+ parts.push("\nGenerate queries that match this natural phrasing style.");
77
+ user += parts.join("\n");
78
+ }
79
+
65
80
  return { system, user };
66
81
  }
67
82
 
@@ -160,11 +175,49 @@ export async function generateSyntheticEvals(
160
175
 
161
176
  const skillContent = readFileSync(skillPath, "utf-8");
162
177
 
178
+ // Load real query examples from the database for few-shot style guidance.
179
+ // Uses dynamic imports since SQLite may not be available in all contexts.
180
+ let realExamples: { positive: string[]; negative: string[] } | undefined;
181
+ try {
182
+ const { getDb } = await import("../localdb/db.js");
183
+ const { querySkillUsageRecords, queryQueryLog } = await import("../localdb/queries.js");
184
+ const { isHighConfidencePositiveSkillRecord } = await import(
185
+ "../utils/skill-usage-confidence.js"
186
+ );
187
+
188
+ const db = getDb();
189
+
190
+ // Positives: high-confidence triggered records for this skill
191
+ const skillRecords = querySkillUsageRecords(db);
192
+ const positive = skillRecords
193
+ .filter((r) => isHighConfidencePositiveSkillRecord(r, skillName))
194
+ .map((r) => r.query)
195
+ .filter((q): q is string => typeof q === "string" && q.length > 0)
196
+ .slice(0, 5);
197
+
198
+ // Negatives: from all_queries, excluding known positives
199
+ const posSet = new Set(positive.map((q: string) => q.toLowerCase()));
200
+ const allQueries = queryQueryLog(db);
201
+ const negative = allQueries
202
+ .map((r) => r.query)
203
+ .filter(
204
+ (q): q is string => typeof q === "string" && q.length > 0 && !posSet.has(q.toLowerCase()),
205
+ )
206
+ .slice(0, 5);
207
+
208
+ if (positive.length > 0) {
209
+ realExamples = { positive, negative };
210
+ }
211
+ } catch {
212
+ // fail-open: synthetic gen works without real examples
213
+ }
214
+
163
215
  const { system, user } = buildSyntheticPrompt(
164
216
  skillContent,
165
217
  skillName,
166
218
  maxPositives,
167
219
  maxNegatives,
220
+ realExamples,
168
221
  );
169
222
 
170
223
  const raw = await callLlm(system, user, agent, options.modelFlag);
@@ -1,33 +1,37 @@
1
1
  /**
2
2
  * Evolution audit trail: append, read, and query audit entries.
3
+ *
4
+ * Uses SQLite as the primary store via getDb(). Tests inject an in-memory
5
+ * database via _setTestDb() for isolation.
3
6
  */
4
7
 
5
- import { EVOLUTION_AUDIT_LOG } from "../constants.js";
8
+ import { getDb } from "../localdb/db.js";
9
+ import { writeEvolutionAuditToDb } from "../localdb/direct-write.js";
10
+ import { queryEvolutionAudit } from "../localdb/queries.js";
6
11
  import type { EvolutionAuditEntry } from "../types.js";
7
- import { appendJsonl, readJsonl } from "../utils/jsonl.js";
8
12
 
9
- /** Append an audit entry to the evolution audit log. */
10
- export function appendAuditEntry(
11
- entry: EvolutionAuditEntry,
12
- logPath: string = EVOLUTION_AUDIT_LOG,
13
- ): void {
14
- appendJsonl(logPath, entry);
13
+ /** Append an audit entry to the evolution audit log (SQLite). */
14
+ export function appendAuditEntry(entry: EvolutionAuditEntry, _logPath?: string): void {
15
+ writeEvolutionAuditToDb(entry);
15
16
  }
16
17
 
17
18
  /**
18
19
  * Read all audit entries, optionally filtered by skill name.
19
20
  *
20
- * When skillName is provided, returns only entries whose `details` field
21
- * contains the skill name (case-insensitive match).
21
+ * @param skillName - Optional skill name to filter by
22
22
  */
23
- export function readAuditTrail(
24
- skillName?: string,
25
- logPath: string = EVOLUTION_AUDIT_LOG,
26
- ): EvolutionAuditEntry[] {
27
- const entries = readJsonl<EvolutionAuditEntry>(logPath);
23
+ export function readAuditTrail(skillName?: string, _logPath?: string): EvolutionAuditEntry[] {
24
+ const db = getDb();
25
+ const entries = queryEvolutionAudit(db, skillName) as EvolutionAuditEntry[];
28
26
  if (!skillName) return entries;
27
+ // queryEvolutionAudit filters by skill_name field; also filter by details
28
+ // for backward compatibility (some entries may have skill name in details only)
29
29
  const needle = skillName.toLowerCase();
30
- return entries.filter((e) => (e.details ?? "").toLowerCase().includes(needle));
30
+ return entries.length > 0
31
+ ? entries
32
+ : (queryEvolutionAudit(db) as EvolutionAuditEntry[]).filter((e) =>
33
+ (e.details ?? "").toLowerCase().includes(needle),
34
+ );
31
35
  }
32
36
 
33
37
  /**
@@ -36,9 +40,10 @@ export function readAuditTrail(
36
40
  */
37
41
  export function getLastDeployedProposal(
38
42
  skillName: string,
39
- logPath: string = EVOLUTION_AUDIT_LOG,
43
+ _logPath?: string,
40
44
  ): EvolutionAuditEntry | null {
41
- const entries = readAuditTrail(skillName, logPath);
45
+ const entries = readAuditTrail(skillName);
42
46
  const deployed = entries.filter((e) => e.action === "deployed");
43
- return deployed.length > 0 ? deployed[deployed.length - 1] : null;
47
+ // Results are DESC-ordered from SQLite, so first match is most recent
48
+ return deployed.length > 0 ? deployed[0] : null;
44
49
  }
@@ -0,0 +1,176 @@
1
+ /**
2
+ * constitutional.ts
3
+ *
4
+ * Deterministic pre-validation gate for evolution proposals. Runs before
5
+ * confidence checks and LLM validation to reject obviously bad proposals
6
+ * cheaply — no LLM calls required.
7
+ *
8
+ * Four principles:
9
+ * 1. Size constraint — char limit + word-count ratio
10
+ * 2. No XML injection — reject embedded XML tags
11
+ * 3. No unbounded broadening — reject bare "all/any/every/everything"
12
+ * 4. Anchor preservation — preserve USE WHEN triggers and $skillName refs
13
+ */
14
+
15
+ // ---------------------------------------------------------------------------
16
+ // Types
17
+ // ---------------------------------------------------------------------------
18
+
19
+ export interface ConstitutionalResult {
20
+ passed: boolean;
21
+ violations: string[];
22
+ }
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Helpers
26
+ // ---------------------------------------------------------------------------
27
+
28
+ function wordCount(text: string): number {
29
+ return text.split(/\s+/).filter(Boolean).length;
30
+ }
31
+
32
+ /**
33
+ * Extract the sentence containing the match index. Splits on sentence-ending
34
+ * punctuation (`.` `!` `?`) followed by whitespace, but avoids splitting on
35
+ * common abbreviations like "e.g." or "i.e.".
36
+ */
37
+ function sentenceContaining(text: string, matchIndex: number): string {
38
+ // Split only when the next token looks like a new sentence.
39
+ const sentences = text.split(/(?<=[.!?])\s+(?=[A-Z0-9"'‘“])/);
40
+ let offset = 0;
41
+ for (const sentence of sentences) {
42
+ const realOffset = text.indexOf(sentence, offset);
43
+ if (realOffset === -1) break;
44
+ if (matchIndex >= realOffset && matchIndex < realOffset + sentence.length) {
45
+ return sentence;
46
+ }
47
+ offset = realOffset + sentence.length;
48
+ }
49
+ return text; // fallback: treat entire text as one sentence
50
+ }
51
+
52
+ const ENUMERATION_MARKERS = /\b(?:including|such as|like)\b|e\.g\.|,\s*\w+\s*,/i;
53
+
54
+ // ---------------------------------------------------------------------------
55
+ // Main check
56
+ // ---------------------------------------------------------------------------
57
+
58
+ export function checkConstitution(
59
+ proposed: string,
60
+ original: string,
61
+ _skillName: string,
62
+ ): ConstitutionalResult {
63
+ const violations: string[] = [];
64
+
65
+ // -------------------------------------------------------------------------
66
+ // Principle 1: Size constraint
67
+ // -------------------------------------------------------------------------
68
+ if (proposed.length > 8192) {
69
+ violations.push(`Size: ${proposed.length} chars exceeds 8192 limit`);
70
+ }
71
+
72
+ const origWords = wordCount(original);
73
+ const propWords = wordCount(proposed);
74
+
75
+ if (origWords > 0) {
76
+ const ratio = propWords / origWords;
77
+ if (ratio > 3.0) {
78
+ violations.push(
79
+ `Size: ${propWords} words is ${ratio.toFixed(1)}x original (${origWords} words), exceeds 3.0x limit`,
80
+ );
81
+ }
82
+ if (ratio < 0.3) {
83
+ violations.push(
84
+ `Size: ${propWords} words is ${ratio.toFixed(1)}x original (${origWords} words), below 0.3x limit`,
85
+ );
86
+ }
87
+ }
88
+
89
+ // -------------------------------------------------------------------------
90
+ // Principle 2: No XML injection
91
+ // -------------------------------------------------------------------------
92
+ if (/<[a-zA-Z][^>]*>/.test(proposed)) {
93
+ violations.push("XML injection: proposed description contains XML/HTML tags");
94
+ }
95
+
96
+ // -------------------------------------------------------------------------
97
+ // Principle 3: No unbounded broadening
98
+ // -------------------------------------------------------------------------
99
+ const broadenPattern = /\b(all|any|every|everything)\b/gi;
100
+ let match: RegExpExecArray | null = broadenPattern.exec(proposed);
101
+ while (match !== null) {
102
+ const sentence = sentenceContaining(proposed, match.index);
103
+ if (!ENUMERATION_MARKERS.test(sentence)) {
104
+ violations.push(
105
+ `Unbounded broadening: "${match[0]}" at position ${match.index} without enumeration qualifier`,
106
+ );
107
+ }
108
+ match = broadenPattern.exec(proposed);
109
+ }
110
+
111
+ // -------------------------------------------------------------------------
112
+ // Principle 4: Anchor preservation
113
+ // -------------------------------------------------------------------------
114
+ // Check for USE WHEN triggers
115
+ if (/USE WHEN/i.test(original) && !/USE WHEN/i.test(proposed)) {
116
+ violations.push(
117
+ 'Anchor: original contains "USE WHEN" trigger phrase that is missing in proposed',
118
+ );
119
+ }
120
+
121
+ // Check for $variable references
122
+ const dollarRefs = original.match(/\$[A-Za-z0-9_-]+/g);
123
+ if (dollarRefs) {
124
+ const proposedDollarRefs = new Set(proposed.match(/\$[A-Za-z0-9_-]+/g) ?? []);
125
+ for (const ref of dollarRefs) {
126
+ if (!proposedDollarRefs.has(ref)) {
127
+ violations.push(`Anchor: original contains "${ref}" reference that is missing in proposed`);
128
+ }
129
+ }
130
+ }
131
+
132
+ return {
133
+ passed: violations.length === 0,
134
+ violations,
135
+ };
136
+ }
137
+
138
+ // ---------------------------------------------------------------------------
139
+ // Size-only check (for body evolution)
140
+ // ---------------------------------------------------------------------------
141
+
142
+ /**
143
+ * Body-specific constitutional check. Only enforces the word-count ratio
144
+ * (0.3x–3.0x of original). The 1024-char absolute limit does not apply
145
+ * to body text since bodies are typically much larger than descriptions.
146
+ */
147
+ export function checkConstitutionSizeOnly(
148
+ proposed: string,
149
+ original: string,
150
+ ): ConstitutionalResult {
151
+ const violations: string[] = [];
152
+
153
+ const origWords = wordCount(original);
154
+ const propWords = wordCount(proposed);
155
+
156
+ // Only enforce word-count ratio when the original is substantial enough
157
+ // for the ratio to be meaningful (at least 10 words).
158
+ if (origWords >= 10) {
159
+ const ratio = propWords / origWords;
160
+ if (ratio > 3.0) {
161
+ violations.push(
162
+ `Size: ${propWords} words is ${ratio.toFixed(1)}x original (${origWords} words), exceeds 3.0x limit`,
163
+ );
164
+ }
165
+ if (ratio < 0.3) {
166
+ violations.push(
167
+ `Size: ${propWords} words is ${ratio.toFixed(1)}x original (${origWords} words), below 0.3x limit`,
168
+ );
169
+ }
170
+ }
171
+
172
+ return {
173
+ passed: violations.length === 0,
174
+ violations,
175
+ };
176
+ }
@@ -1,26 +1,31 @@
1
1
  /**
2
2
  * Evolution evidence trail: append and read proposal/eval artifacts that power
3
3
  * explainable dashboard drill-downs.
4
+ *
5
+ * Uses SQLite as the primary store via getDb(). Tests inject an in-memory
6
+ * database via _setTestDb() for isolation.
4
7
  */
5
8
 
6
- import { EVOLUTION_EVIDENCE_LOG } from "../constants.js";
9
+ import { getDb } from "../localdb/db.js";
10
+ import { writeEvolutionEvidenceToDb } from "../localdb/direct-write.js";
11
+ import { queryEvolutionEvidence } from "../localdb/queries.js";
7
12
  import type { EvolutionEvidenceEntry } from "../types.js";
8
- import { appendJsonl, readJsonl } from "../utils/jsonl.js";
9
13
 
10
- /** Append a structured evidence artifact to the evolution evidence log. */
14
+ /** Append a structured evidence artifact to the evolution evidence log (SQLite). */
11
15
  export function appendEvidenceEntry(
12
16
  entry: EvolutionEvidenceEntry,
13
- logPath: string = EVOLUTION_EVIDENCE_LOG,
17
+ /** @deprecated Unused; retained for API compatibility during migration */
18
+ _logPath?: string,
14
19
  ): void {
15
- appendJsonl(logPath, entry);
20
+ writeEvolutionEvidenceToDb(entry);
16
21
  }
17
22
 
18
- /** Read all evidence entries, optionally filtered by exact skill name. */
19
- export function readEvidenceTrail(
20
- skillName?: string,
21
- logPath: string = EVOLUTION_EVIDENCE_LOG,
22
- ): EvolutionEvidenceEntry[] {
23
- const entries = readJsonl<EvolutionEvidenceEntry>(logPath);
24
- if (!skillName) return entries;
25
- return entries.filter((entry) => entry.skill_name === skillName);
23
+ /**
24
+ * Read all evidence entries, optionally filtered by exact skill name.
25
+ *
26
+ * @param skillName - Optional skill name to filter by
27
+ */
28
+ export function readEvidenceTrail(skillName?: string, _logPath?: string): EvolutionEvidenceEntry[] {
29
+ const db = getDb();
30
+ return queryEvolutionEvidence(db, skillName) as EvolutionEvidenceEntry[];
26
31
  }
@@ -9,9 +9,10 @@
9
9
  import { existsSync, readFileSync } from "node:fs";
10
10
  import { parseArgs } from "node:util";
11
11
 
12
- import { QUERY_LOG } from "../constants.js";
13
12
  import { buildEvalSet } from "../eval/hooks-to-evals.js";
14
13
  import { readGradingResultsForSkill } from "../grading/results.js";
14
+ import { getDb } from "../localdb/db.js";
15
+ import { queryQueryLog, querySkillUsageRecords } from "../localdb/queries.js";
15
16
  import type {
16
17
  BodyEvolutionProposal,
17
18
  BodyValidationResult,
@@ -24,13 +25,13 @@ import type {
24
25
  QueryLogRecord,
25
26
  SkillUsageRecord,
26
27
  } from "../types.js";
27
- import { readJsonl } from "../utils/jsonl.js";
28
- import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
28
+
29
29
  import { appendAuditEntry } from "./audit.js";
30
+ import { checkConstitutionSizeOnly } from "./constitutional.js";
30
31
  import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
31
32
  import { appendEvidenceEntry } from "./evidence.js";
32
33
  import { extractFailurePatterns } from "./extract-patterns.js";
33
- import { generateBodyProposal } from "./propose-body.js";
34
+ import { type ExecutionContext, generateBodyProposal } from "./propose-body.js";
34
35
  import { generateRoutingProposal } from "./propose-routing.js";
35
36
  import { refineBodyProposal } from "./refine-body.js";
36
37
  import { validateBodyProposal } from "./validate-body.js";
@@ -85,7 +86,7 @@ export interface EvolveBodyDeps {
85
86
  appendAuditEntry?: typeof import("./audit.js").appendAuditEntry;
86
87
  appendEvidenceEntry?: typeof import("./evidence.js").appendEvidenceEntry;
87
88
  buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet;
88
- readEffectiveSkillUsageRecords?: typeof import("../utils/skill-log.js").readEffectiveSkillUsageRecords;
89
+ readEffectiveSkillUsageRecords?: () => SkillUsageRecord[];
89
90
  readFileSync?: typeof readFileSync;
90
91
  writeFileSync?: (path: string, data: string, encoding: string) => void;
91
92
  }
@@ -143,7 +144,11 @@ export async function evolveBody(
143
144
  const _appendEvidenceEntry = _deps.appendEvidenceEntry ?? appendEvidenceEntry;
144
145
  const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet;
145
146
  const _readEffectiveSkillUsageRecords =
146
- _deps.readEffectiveSkillUsageRecords ?? readEffectiveSkillUsageRecords;
147
+ _deps.readEffectiveSkillUsageRecords ??
148
+ (() => {
149
+ const db = getDb();
150
+ return querySkillUsageRecords(db) as SkillUsageRecord[];
151
+ });
147
152
  const _readFileSync = _deps.readFileSync ?? readFileSync;
148
153
  const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
149
154
 
@@ -198,7 +203,8 @@ export async function evolveBody(
198
203
  }
199
204
  evalSet = parsed as EvalEntry[];
200
205
  } else {
201
- const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
206
+ const dbForQuery = getDb();
207
+ const queryRecords = queryQueryLog(dbForQuery) as QueryLogRecord[];
202
208
  evalSet = _buildEvalSet(skillUsage, queryRecords, skillName);
203
209
  }
204
210
 
@@ -222,6 +228,64 @@ export async function evolveBody(
222
228
 
223
229
  const missedQueries = failurePatterns.flatMap((p) => p.missed_queries);
224
230
 
231
+ // Compute execution context from session telemetry (fail-open)
232
+ let executionContext: ExecutionContext | undefined;
233
+ try {
234
+ const { querySessionTelemetry } = await import("../localdb/queries.js");
235
+ const db = getDb();
236
+ const allTelemetry = querySessionTelemetry(db);
237
+
238
+ // Find session IDs that used this skill
239
+ const skillSessionIds = new Set(
240
+ skillUsage
241
+ .filter((r) => r.skill_name?.toLowerCase() === skillName.toLowerCase() && r.triggered)
242
+ .map((r) => r.session_id),
243
+ );
244
+
245
+ // Filter telemetry to skill sessions
246
+ const telemetryForSkill = allTelemetry.filter((t) => skillSessionIds.has(t.session_id));
247
+
248
+ if (telemetryForSkill.length > 0) {
249
+ const mean = (arr: number[]) => arr.reduce((a, b) => a + b, 0) / arr.length;
250
+
251
+ const toolCallCounts = telemetryForSkill.map((t) => t.total_tool_calls ?? 0);
252
+ const errorCounts = telemetryForSkill.map((t) => t.errors_encountered ?? 0);
253
+ const turnCounts = telemetryForSkill.map((t) => t.assistant_turns ?? 0);
254
+
255
+ // Count tool frequency across all sessions
256
+ const toolFreq = new Map<string, number>();
257
+ const failureToolFreq = new Map<string, number>();
258
+
259
+ for (const t of telemetryForSkill) {
260
+ const tools: Record<string, number> = t.tool_calls ?? {};
261
+ const isFailure = (t.errors_encountered ?? 0) > 2;
262
+
263
+ for (const [tool, count] of Object.entries(tools)) {
264
+ toolFreq.set(tool, (toolFreq.get(tool) ?? 0) + count);
265
+ if (isFailure) {
266
+ failureToolFreq.set(tool, (failureToolFreq.get(tool) ?? 0) + count);
267
+ }
268
+ }
269
+ }
270
+
271
+ const topN = (freq: Map<string, number>, n: number) =>
272
+ [...freq.entries()]
273
+ .sort((a, b) => b[1] - a[1])
274
+ .slice(0, n)
275
+ .map(([k]) => k);
276
+
277
+ executionContext = {
278
+ avgToolCalls: mean(toolCallCounts),
279
+ avgErrors: mean(errorCounts),
280
+ avgTurns: mean(turnCounts),
281
+ commonTools: topN(toolFreq, 5),
282
+ failureTools: topN(failureToolFreq, 3),
283
+ };
284
+ }
285
+ } catch {
286
+ // fail-open: body evolution works without execution context
287
+ }
288
+
225
289
  // Step 4: Generate -> validate -> refine loop
226
290
  let lastProposal: BodyEvolutionProposal | null = null;
227
291
  let lastValidation: BodyValidationResult | null = null;
@@ -253,6 +317,7 @@ export async function evolveBody(
253
317
  teacherAgent,
254
318
  teacherModel,
255
319
  fewShotExamples,
320
+ executionContext,
256
321
  );
257
322
  }
258
323
  } else if (lastProposal && lastValidation) {
@@ -285,6 +350,38 @@ export async function evolveBody(
285
350
  eval_set: evalSet,
286
351
  });
287
352
 
353
+ // Constitutional size check (deterministic, pre-validation — body only)
354
+ if (target === "body") {
355
+ const constitution = checkConstitutionSizeOnly(
356
+ proposal.proposed_body,
357
+ proposal.original_body,
358
+ );
359
+ if (!constitution.passed) {
360
+ const reason = `Constitutional: ${constitution.violations.join("; ")}`;
361
+ recordAudit(proposal.proposal_id, "rejected", reason);
362
+ recordEvidence({
363
+ timestamp: new Date().toISOString(),
364
+ proposal_id: proposal.proposal_id,
365
+ skill_name: skillName,
366
+ skill_path: skillPath,
367
+ target,
368
+ stage: "rejected",
369
+ rationale: proposal.rationale,
370
+ confidence: proposal.confidence,
371
+ details: reason,
372
+ original_text: proposal.original_body,
373
+ proposed_text: proposal.proposed_body,
374
+ });
375
+ return {
376
+ proposal: lastProposal,
377
+ validation: null,
378
+ deployed: false,
379
+ auditEntries,
380
+ reason,
381
+ };
382
+ }
383
+ }
384
+
288
385
  // Check confidence threshold
289
386
  if (proposal.confidence < confidenceThreshold) {
290
387
  recordAudit(