selftune 0.2.15 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +24 -19
  2. package/bin/run-hook.cjs +36 -0
  3. package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
  4. package/cli/selftune/alpha-upload/client.ts +51 -1
  5. package/cli/selftune/alpha-upload/flush.ts +46 -5
  6. package/cli/selftune/alpha-upload/stage-canonical.ts +25 -4
  7. package/cli/selftune/alpha-upload-contract.ts +9 -0
  8. package/cli/selftune/constants.ts +82 -5
  9. package/cli/selftune/contribute/sanitize.ts +52 -5
  10. package/cli/selftune/dashboard-contract.ts +100 -0
  11. package/cli/selftune/dashboard-server.ts +2 -2
  12. package/cli/selftune/evolution/description-quality.ts +12 -11
  13. package/cli/selftune/evolution/evolve.ts +238 -53
  14. package/cli/selftune/evolution/unblock-suggestions.ts +159 -0
  15. package/cli/selftune/evolution/validate-proposal.ts +9 -6
  16. package/cli/selftune/grading/grade-session.ts +20 -0
  17. package/cli/selftune/hooks/commit-track.ts +188 -0
  18. package/cli/selftune/hooks/prompt-log.ts +10 -1
  19. package/cli/selftune/hooks/session-stop.ts +2 -2
  20. package/cli/selftune/hooks/skill-eval.ts +15 -1
  21. package/cli/selftune/hooks/stdin-preview.ts +32 -0
  22. package/cli/selftune/init.ts +198 -27
  23. package/cli/selftune/localdb/direct-write.ts +69 -6
  24. package/cli/selftune/localdb/queries.ts +552 -7
  25. package/cli/selftune/localdb/schema.ts +46 -0
  26. package/cli/selftune/orchestrate.ts +32 -4
  27. package/cli/selftune/routes/overview.ts +41 -3
  28. package/cli/selftune/routes/skill-report.ts +88 -17
  29. package/cli/selftune/types.ts +32 -0
  30. package/cli/selftune/utils/hooks.ts +12 -2
  31. package/cli/selftune/utils/transcript.ts +210 -1
  32. package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
  33. package/package.json +1 -1
  34. package/packages/telemetry-contract/src/types.ts +11 -0
  35. package/skill/SKILL.md +29 -1
  36. package/skill/Workflows/AutoActivation.md +1 -1
  37. package/skill/Workflows/Evolve.md +31 -13
  38. package/skill/Workflows/ExportCanonical.md +121 -0
  39. package/skill/Workflows/Hook.md +131 -0
  40. package/skill/Workflows/Initialize.md +9 -8
  41. package/skill/Workflows/Orchestrate.md +27 -5
  42. package/skill/Workflows/Quickstart.md +94 -0
  43. package/skill/Workflows/RepairSkillUsage.md +87 -0
  44. package/skill/Workflows/Uninstall.md +82 -0
  45. package/skill/settings_snippet.json +19 -8
package/README.md CHANGED
@@ -69,6 +69,8 @@ selftune learned that real users say "slides", "deck", "presentation for Monday"
69
69
 
70
70
  **I manage an agent setup with many skills** — You have 15+ skills installed. Some work. Some don't. Some conflict. Tell your agent "how are my skills doing?" and selftune gives you a health dashboard and automatically improves the skills that aren't keeping up.
71
71
 
72
+ **I use skills for non-coding work** — Marketing workflows, research pipelines, compliance checks, slide decks. You say "make me a presentation" and nothing happens. selftune learns that "slides", "deck", and "presentation for Monday" all mean the same skill — and fixes the routing automatically.
73
+
72
74
  ## How It Works
73
75
 
74
76
  <p align="center">
@@ -77,29 +79,27 @@ selftune learned that real users say "slides", "deck", "presentation for Monday"
77
79
 
78
80
  A continuous feedback loop that makes your skills learn and adapt. Automatically. Your agent runs everything — you just install the skill and talk naturally.
79
81
 
80
- **Observe** — Hooks capture every query and which skills fired. On Claude Code, hooks install automatically during `selftune init`. Backfill existing transcripts with `selftune ingest claude`.
82
+ **Observe** — Seven real-time hooks capture every query, every skill invocation, and every correction signal. Structured telemetry — not raw logs. On Claude Code, hooks install automatically during `selftune init`. Backfill existing transcripts with `selftune ingest claude`.
83
+
84
+ **Detect** — Finds the gap between how you talk and how your skills are described. You say "make me a slide deck" and your pptx skill stays silent — selftune catches that mismatch. Clusters missed queries by invocation type. Detects correction signals ("why didn't you use X?") and triggers immediate improvement.
85
+
86
+ **Evolve** — Generates multiple proposals biased toward different invocation types, validates each against your real eval set with majority voting, runs constitutional checks, then gates with an expensive model before deploying. Not guesswork — evidence. Automatic backup on every deploy.
81
87
 
82
- **Detect** — Finds the gap between how you talk and how your skills are described. You say "make me a slide deck" and your pptx skill stays silent — selftune catches that mismatch. Real-time correction signals ("why didn't you use X?") are detected and trigger immediate improvement.
88
+ **Watch** — After deploying changes, selftune monitors trigger rates, false negatives, and per-invocation-type scores. If anything regresses, it rolls back automatically. No manual monitoring needed.
83
89
 
84
- **Evolve** — Rewrites skill descriptions and full skill bodies — to match how you actually work. Cheap-loop mode uses haiku for the loop, sonnet for the gate (~80% cost reduction). Teacher-student body evolution with 3-gate validation. Automatic backup.
90
+ **Automate** — Run `selftune cron setup` to install OS-level scheduling. selftune syncs, grades, evolves, and watches on a schedule fully autonomous.
85
91
 
86
- **Watch** After deploying changes, selftune monitors skill trigger rates. If anything regresses, it rolls back automatically.
92
+ ## How Is This Different from Agents That "Learn"?
87
93
 
88
- **Automate** Run `selftune cron setup` to install OS-level scheduling. selftune syncs, evaluates, evolves, and watches on a schedule no manual intervention needed.
94
+ Some agents claim self-improvement by saving notes about what worked. That's knowledge persistence — not a closed loop. There's no measurement, no validation, and no way to know if the saved notes are actually correct.
89
95
 
90
- ## What's New in v0.2.0
96
+ selftune is empirical. It observes real sessions, grades execution quality, detects missed triggers, proposes changes, validates them against eval sets, deploys with automatic backup, monitors for regressions, and rolls back on failure. Twelve interlocking mechanisms — not one background thread writing markdown.
91
97
 
92
- - **Full skill body evolution** Beyond descriptions: evolve routing tables and entire skill bodies using teacher-student model with structural, trigger, and quality gates
93
- - **Synthetic eval generation** `selftune eval generate --synthetic` generates eval sets from SKILL.md via LLM, no session logs needed. Solves cold-start: new skills get evals immediately.
94
- - **Cheap-loop evolution** `selftune evolve --cheap-loop` uses haiku for proposal generation and validation, sonnet only for the final deployment gate. ~80% cost reduction.
95
- - **Batch trigger validation** Validation now batches 10 queries per LLM call instead of one-per-query. ~10x faster evolution loops.
96
- - **Per-stage model control** — `--validation-model`, `--proposal-model`, and `--gate-model` flags give fine-grained control over which model runs each evolution stage.
97
- - **Auto-activation system** — Hooks detect when selftune should run and suggest actions
98
- - **Enforcement guardrails** — Blocks SKILL.md edits on monitored skills unless `selftune watch` has been run
99
- - **Live dashboard server** — `selftune dashboard --serve` with SSE auto-refresh and action buttons
100
- - **Evolution memory** — Persists context, plans, and decisions across context resets
101
- - **4 specialized agents** — Diagnosis analyst, pattern analyst, evolution reviewer, integration guide
102
- - **Sandbox test harness** — Comprehensive automated test coverage, including devcontainer-based LLM testing
98
+ | Approach | Measures quality? | Validates changes? | Detects regressions? | Rolls back? |
99
+ | ------------------------- | ----------------- | --------------------------- | ---------------------- | ----------- |
100
+ | Agent saves its own notes | No | No | No | No |
101
+ | Manual skill rewrites | No | No | No | No |
102
+ | **selftune** | 3-tier grading | Eval sets + majority voting | Post-deploy monitoring | Automatic |
103
103
 
104
104
  ## Commands
105
105
 
@@ -108,12 +108,15 @@ Your agent runs these — you just say what you want ("improve my skills", "show
108
108
  | Group | Command | What it does |
109
109
  | ---------- | -------------------------------------------- | ------------------------------------------------------------------------------------------- |
110
110
  | | `selftune status` | See which skills are undertriggering and why |
111
- | | `selftune orchestrate` | Run the full autonomous loop (sync → evolve → watch) |
111
+ | | `selftune last` | Quick insight from the most recent session |
112
+ | | `selftune orchestrate` | Run the full autonomous loop (sync → grade → evolve → watch) |
113
+ | | `selftune sync` | Refresh telemetry from source-truth transcripts |
112
114
  | | `selftune dashboard` | Open the visual skill health dashboard |
113
115
  | | `selftune doctor` | Health check: logs, hooks, config, permissions |
114
116
  | **ingest** | `selftune ingest claude` | Backfill from Claude Code transcripts |
115
117
  | | `selftune ingest codex` | Import Codex rollout logs (experimental) |
116
118
  | **grade** | `selftune grade --skill <name>` | Grade a skill session with evidence |
119
+ | | `selftune grade auto` | Auto-grade recent sessions for ungraded skills |
117
120
  | | `selftune grade baseline --skill <name>` | Measure skill value vs no-skill baseline |
118
121
  | **evolve** | `selftune evolve --skill <name>` | Propose, validate, and deploy improved descriptions |
119
122
  | | `selftune evolve body --skill <name>` | Evolve full skill body or routing table |
@@ -124,7 +127,9 @@ Your agent runs these — you just say what you want ("improve my skills", "show
124
127
  | | `selftune eval import` | Import external eval corpus from [SkillsBench](https://github.com/benchflow-ai/skillsbench) |
125
128
  | **auto** | `selftune cron setup` | Install OS-level scheduling (cron/launchd/systemd) |
126
129
  | | `selftune watch --skill <name>` | Monitor after deploy. Auto-rollback on regression. |
127
- | **other** | `selftune telemetry` | Manage anonymous usage analytics (status, enable, disable) |
130
+ | **other** | `selftune workflows` | Discover and manage multi-skill workflows |
131
+ | | `selftune badge --skill <name>` | Generate a health badge for your skill's README |
132
+ | | `selftune telemetry` | Manage anonymous usage analytics (status, enable, disable) |
128
133
  | | `selftune alpha upload` | Run a manual alpha upload cycle and emit a JSON send summary |
129
134
 
130
135
  Full command reference: `selftune --help`
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Hook runner — executes a TypeScript hook script via Bun.
4
+ *
5
+ * Usage: node run-hook.cjs <path-to-hook.ts>
6
+ *
7
+ * Stdin is piped through to the hook script (Claude Code sends JSON on stdin).
8
+ * Exit code is propagated from the hook. If bun is not found, exits 0
9
+ * (fail-open: hooks must never block Claude).
10
+ *
11
+ * Note: selftune hooks depend on Bun-specific APIs (Bun.stdin.text(),
12
+ * Bun.spawn()) and cannot run under tsx/node. The runner exists so that
13
+ * hook commands use `node run-hook.cjs` (universally available) as the
14
+ * entry point, avoiding a hard dependency on bun being in PATH for the
15
+ * shell that Claude Code invokes.
16
+ */
17
+
18
+ const { execFileSync } = require("child_process");
19
+ const hookScript = process.argv[2];
20
+
21
+ if (!hookScript) {
22
+ // No script specified — fail-open
23
+ process.exit(0);
24
+ }
25
+
26
+ try {
27
+ execFileSync("bun", ["run", hookScript], { stdio: "inherit" });
28
+ process.exit(0);
29
+ } catch (e) {
30
+ // Hook exited non-zero → propagate (e.g. exit 2 = block in PreToolUse)
31
+ if (e.status != null) {
32
+ process.exit(e.status);
33
+ }
34
+ // bun not found (ENOENT) — fail-open
35
+ process.exit(0);
36
+ }
@@ -58,7 +58,7 @@ export function buildV2PushPayload(
58
58
  const params = afterSeq !== undefined ? [afterSeq, limit] : [limit];
59
59
 
60
60
  const sql = `
61
- SELECT local_seq, record_kind, record_json
61
+ SELECT local_seq, record_kind, record_id, record_json, content_sha256
62
62
  FROM canonical_upload_staging
63
63
  ${whereClause}
64
64
  ORDER BY local_seq ASC
@@ -68,7 +68,9 @@ export function buildV2PushPayload(
68
68
  const rows = db.query(sql).all(...params) as Array<{
69
69
  local_seq: number;
70
70
  record_kind: string;
71
+ record_id: string;
71
72
  record_json: string;
73
+ content_sha256: string | null;
72
74
  }>;
73
75
 
74
76
  if (rows.length === 0) return null;
@@ -78,6 +80,7 @@ export function buildV2PushPayload(
78
80
  const orchestrateRuns: Record<string, unknown>[] = [];
79
81
  const gradingResults: Record<string, unknown>[] = [];
80
82
  const improvementSignals: Record<string, unknown>[] = [];
83
+ const contentHashes: Record<string, string> = {};
81
84
  let lastParsedSeq: number | null = null;
82
85
  let hitMalformedRow = false;
83
86
 
@@ -87,6 +90,10 @@ export function buildV2PushPayload(
87
90
  hitMalformedRow = true;
88
91
  break;
89
92
  }
93
+ // Collect content hashes for dedup — only after successful parse, keyed by kind:id
94
+ if (row.content_sha256) {
95
+ contentHashes[`${row.record_kind}:${row.record_id}`] = row.content_sha256;
96
+ }
90
97
 
91
98
  if (row.record_kind === "evolution_evidence") {
92
99
  const timestamp =
@@ -152,6 +159,12 @@ export function buildV2PushPayload(
152
159
  gradingResults,
153
160
  improvementSignals,
154
161
  );
162
+
163
+ // Attach content hashes for server-side dedup
164
+ if (Object.keys(contentHashes).length > 0) {
165
+ payload.content_hashes = contentHashes;
166
+ }
167
+
155
168
  if (lastParsedSeq === null) {
156
169
  return null;
157
170
  }
@@ -6,7 +6,7 @@
6
6
  * PushUploadResult indicating success or failure.
7
7
  */
8
8
 
9
- import type { PushUploadResult } from "../alpha-upload-contract.js";
9
+ import type { HeadCheckResult, PushUploadResult } from "../alpha-upload-contract.js";
10
10
  import { getSelftuneVersion } from "../utils/selftune-meta.js";
11
11
 
12
12
  function isPushUploadResult(value: unknown): value is PushUploadResult {
@@ -111,3 +111,53 @@ export async function uploadPushPayload(
111
111
  };
112
112
  }
113
113
  }
114
+
115
+ /**
116
+ * Lightweight HEAD check to see if a record already exists on the server.
117
+ *
118
+ * Sends HEAD {endpoint}/{recordId}. Optionally includes If-None-Match
119
+ * for content-hash comparison.
120
+ *
121
+ * Never throws -- returns { exists: false, unchanged: false } on any error
122
+ * (fail-open, matching the uploadPushPayload pattern).
123
+ */
124
+ export async function headRecord(
125
+ endpoint: string,
126
+ recordId: string,
127
+ sha256?: string,
128
+ apiKey?: string,
129
+ ): Promise<HeadCheckResult> {
130
+ const failOpen: HeadCheckResult = { exists: false, unchanged: false };
131
+ try {
132
+ const headers: Record<string, string> = {
133
+ "User-Agent": `selftune/${getSelftuneVersion()}`,
134
+ };
135
+
136
+ if (sha256) {
137
+ headers["If-None-Match"] = `"${sha256}"`;
138
+ }
139
+
140
+ if (apiKey) {
141
+ headers.Authorization = `Bearer ${apiKey}`;
142
+ }
143
+
144
+ const url = `${endpoint}/${encodeURIComponent(recordId)}`;
145
+ const response = await fetch(url, {
146
+ method: "HEAD",
147
+ headers,
148
+ signal: AbortSignal.timeout(10_000),
149
+ });
150
+
151
+ if (response.status === 200) {
152
+ return { exists: true, unchanged: false };
153
+ }
154
+ if (response.status === 304) {
155
+ return { exists: true, unchanged: true };
156
+ }
157
+ // 404 or any other status -- treat as not found
158
+ return failOpen;
159
+ } catch {
160
+ // Network error, timeout, etc. -- fail open
161
+ return failOpen;
162
+ }
163
+ }
@@ -12,7 +12,7 @@
12
12
  */
13
13
 
14
14
  import type { FlushSummary, QueueOperations } from "../alpha-upload-contract.js";
15
- import { uploadPushPayload } from "./client.js";
15
+ import { headRecord, uploadPushPayload } from "./client.js";
16
16
 
17
17
  // ---------------------------------------------------------------------------
18
18
  // Options
@@ -28,6 +28,8 @@ export interface FlushOptions {
28
28
  dryRun?: boolean;
29
29
  /** API key for Bearer auth on the cloud endpoint. */
30
30
  apiKey?: string;
31
+ /** When set, run HEAD checks against this endpoint before pushing. */
32
+ headCheckEndpoint?: string;
31
33
  }
32
34
 
33
35
  // ---------------------------------------------------------------------------
@@ -85,8 +87,9 @@ export async function flushQueue(
85
87
  const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
86
88
  const dryRun = options?.dryRun ?? false;
87
89
  const apiKey = options?.apiKey;
90
+ const headCheckEndpoint = options?.headCheckEndpoint;
88
91
 
89
- const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0 };
92
+ const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0, skipped_unchanged: 0 };
90
93
 
91
94
  const items = queue.getPending(batchSize);
92
95
 
@@ -94,7 +97,44 @@ export async function flushQueue(
94
97
  return summary;
95
98
  }
96
99
 
100
+ // -- HEAD check phase: identify records that already exist unchanged ------
101
+ const unchangedIds = new Set<number>();
102
+ if (headCheckEndpoint) {
103
+ const headChecks = items.map(async (item) => {
104
+ try {
105
+ const parsed = JSON.parse(item.payload_json) as { push_id?: string };
106
+ const pushId = parsed.push_id;
107
+ if (!pushId) return { id: item.id, skip: false };
108
+ const result = await headRecord(headCheckEndpoint, pushId, undefined, apiKey);
109
+ return { id: item.id, skip: result.exists && result.unchanged };
110
+ } catch {
111
+ // Fail-open: if HEAD check itself errors, don't skip
112
+ return { id: item.id, skip: false };
113
+ }
114
+ });
115
+
116
+ const results = await Promise.allSettled(headChecks);
117
+ for (const result of results) {
118
+ if (result.status === "fulfilled" && result.value.skip) {
119
+ unchangedIds.add(result.value.id);
120
+ }
121
+ }
122
+
123
+ // Mark unchanged items as sent in the queue without actually pushing
124
+ for (const item of items) {
125
+ if (unchangedIds.has(item.id)) {
126
+ if (!queue.markSending(item.id)) continue;
127
+ if (queue.markSent(item.id)) {
128
+ summary.skipped_unchanged++;
129
+ } else {
130
+ summary.failed++;
131
+ }
132
+ }
133
+ }
134
+ }
135
+
97
136
  for (const item of items) {
137
+ if (unchangedIds.has(item.id)) continue;
98
138
  const markFailedSafely = (message: string): void => {
99
139
  if (!queue.markFailed(item.id, message)) {
100
140
  console.error(`[alpha upload] Failed to persist queue failure state for item ${item.id}`);
@@ -149,10 +189,11 @@ export async function flushQueue(
149
189
  break;
150
190
  }
151
191
 
152
- // 409 Conflict = duplicate push_id, treat as success
153
- if (status === 409) {
192
+ // 304 Not Modified = content unchanged (dedup), 409 Conflict = duplicate push_id
193
+ // Both are treated as success — the server already has this data.
194
+ if (status === 304 || status === 409) {
154
195
  if (!queue.markSent(item.id)) {
155
- markFailedSafely("local queue state update failed after duplicate upload");
196
+ markFailedSafely("local queue state update failed after duplicate/unchanged upload");
156
197
  summary.failed++;
157
198
  } else {
158
199
  summary.sent++;
@@ -146,6 +146,18 @@ function extractNormalizedAt(record: CanonicalRecord): string {
146
146
  return record.normalized_at;
147
147
  }
148
148
 
149
+ // -- Content hashing ----------------------------------------------------------
150
+
151
+ /**
152
+ * Compute SHA256 hex digest of a string (for upload dedup).
153
+ * Uses Bun's built-in CryptoHasher for zero-dependency hashing.
154
+ */
155
+ export function computeContentSha256(input: string): string {
156
+ const hasher = new Bun.CryptoHasher("sha256");
157
+ hasher.update(input);
158
+ return hasher.digest("hex");
159
+ }
160
+
149
161
  // -- Main staging function ----------------------------------------------------
150
162
 
151
163
  /**
@@ -163,9 +175,12 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
163
175
  const now = new Date().toISOString();
164
176
 
165
177
  const stmt = db.prepare(`
166
- INSERT OR IGNORE INTO canonical_upload_staging
167
- (record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at)
168
- VALUES (?, ?, ?, ?, ?, ?, ?)
178
+ INSERT INTO canonical_upload_staging
179
+ (record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at, content_sha256)
180
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
181
+ ON CONFLICT(record_kind, record_id) DO UPDATE SET
182
+ content_sha256 = excluded.content_sha256
183
+ WHERE canonical_upload_staging.content_sha256 IS NULL AND excluded.content_sha256 IS NOT NULL
169
184
  `);
170
185
 
171
186
  // 1. Stage canonical records from SQLite (default) or JSONL (custom logPath override)
@@ -177,14 +192,16 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
177
192
  : readAndEnrichCanonicalRecords(logPath);
178
193
  for (const record of records) {
179
194
  const recordId = extractRecordId(record);
195
+ const recordJson = JSON.stringify(record);
180
196
  const result = stmt.run(
181
197
  record.record_kind,
182
198
  recordId,
183
- JSON.stringify(record),
199
+ recordJson,
184
200
  extractSessionId(record),
185
201
  extractPromptId(record),
186
202
  extractNormalizedAt(record),
187
203
  now,
204
+ computeContentSha256(recordJson),
188
205
  );
189
206
  if (result.changes > 0) staged++;
190
207
  }
@@ -222,6 +239,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
222
239
  null, // no prompt_id
223
240
  entry.timestamp,
224
241
  now,
242
+ computeContentSha256(recordJson),
225
243
  );
226
244
  if (result.changes > 0) staged++;
227
245
  }
@@ -258,6 +276,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
258
276
  null, // no prompt_id
259
277
  run.timestamp,
260
278
  now,
279
+ computeContentSha256(recordJson),
261
280
  );
262
281
  if (result.changes > 0) staged++;
263
282
  }
@@ -298,6 +317,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
298
317
  null, // no prompt_id
299
318
  gr.graded_at,
300
319
  now,
320
+ computeContentSha256(recordJson),
301
321
  );
302
322
  if (result.changes > 0) staged++;
303
323
  }
@@ -332,6 +352,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
332
352
  null, // no prompt_id
333
353
  sig.timestamp,
334
354
  now,
355
+ computeContentSha256(recordJson),
335
356
  );
336
357
  if (result.changes > 0) staged++;
337
358
  }
@@ -49,4 +49,13 @@ export interface FlushSummary {
49
49
  sent: number;
50
50
  failed: number;
51
51
  skipped: number;
52
+ /** Records skipped because a HEAD check confirmed they already exist unchanged. */
53
+ skipped_unchanged: number;
54
+ }
55
+
56
+ // -- HEAD check result --------------------------------------------------------
57
+
58
+ export interface HeadCheckResult {
59
+ exists: boolean;
60
+ unchanged: boolean;
52
61
  }
@@ -41,6 +41,11 @@ export const ORCHESTRATE_RUN_LOG = join(LOG_DIR, "orchestrate_runs.jsonl");
41
41
  export const SIGNAL_LOG = join(LOG_DIR, "improvement_signals.jsonl");
42
42
  export const ORCHESTRATE_LOCK = join(LOG_DIR, ".orchestrate.lock");
43
43
 
44
+ /** Allow tests to override the orchestrate lock without mutating the host lock file. */
45
+ export function getOrchestrateLockPath(): string {
46
+ return process.env.SELFTUNE_ORCHESTRATE_LOCK_PATH || ORCHESTRATE_LOCK;
47
+ }
48
+
44
49
  /** Evolution memory directory — human-readable session context that survives resets. */
45
50
  export const MEMORY_DIR = join(SELFTUNE_CONFIG_DIR, "memory");
46
51
  export const CONTEXT_PATH = join(MEMORY_DIR, "context.md");
@@ -162,17 +167,62 @@ export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
162
167
 
163
168
  /** Regex patterns for detecting secrets that must be redacted. */
164
169
  export const SECRET_PATTERNS = [
165
- /sk-[a-zA-Z0-9]{20,}/g, // OpenAI / Anthropic API keys
170
+ // -- API keys & tokens (platform-specific prefixes) --
171
+ /sk-[a-zA-Z0-9]{20,}/g, // OpenAI API keys
172
+ /sk-ant-[a-zA-Z0-9_-]{20,}/g, // Anthropic API keys
166
173
  /ghp_[a-zA-Z0-9]{36,}/g, // GitHub personal access tokens
167
174
  /gho_[a-zA-Z0-9]{36,}/g, // GitHub OAuth tokens
168
175
  /github_pat_[a-zA-Z0-9_]{22,}/g, // GitHub fine-grained PATs
169
- /AKIA[A-Z0-9]{16}/g, // AWS access key IDs
176
+ /npm_[a-zA-Z0-9]{36}/g, // npm tokens
177
+ /pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
178
+
179
+ // -- AWS --
180
+ /AKIA[A-Z0-9]{16}/g, // AWS access key IDs (permanent)
181
+ /ASIA[A-Z0-9]{16}/g, // AWS temporary credentials (STS)
182
+
183
+ // -- GCP --
184
+ /AIza[0-9A-Za-z_-]{35}/g, // Google API key
185
+
186
+ // -- Stripe --
187
+ /(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{24,}/g, // Stripe secret/publishable/restricted keys
188
+
189
+ // -- Twilio --
190
+ /SK[a-f0-9]{32}/g, // Twilio API key
191
+
192
+ // -- SendGrid --
193
+ /SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}/g, // SendGrid API key
194
+
195
+ // -- Mailgun --
196
+ /key-[a-zA-Z0-9]{32}/g, // Mailgun API key
197
+
198
+ // -- Slack --
170
199
  /xoxb-[a-zA-Z0-9-]+/g, // Slack bot tokens
171
200
  /xoxp-[a-zA-Z0-9-]+/g, // Slack user tokens
172
201
  /xoxs-[a-zA-Z0-9-]+/g, // Slack session tokens
173
- /eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JWTs
174
- /npm_[a-zA-Z0-9]{36}/g, // npm tokens
175
- /pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
202
+
203
+ // -- JWTs --
204
+ /eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JSON Web Tokens
205
+
206
+ // -- Private keys (PEM block headers) --
207
+ /-----BEGIN (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----[\s\S]*?-----END (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----/g, // PEM private key blocks (full multiline)
208
+
209
+ // -- Database connection URIs --
210
+ /(mongodb(\+srv)?|postgres(ql)?|mysql|mariadb|redis|rediss|amqp|amqps):\/\/[^\s"')]+/g, // DB URIs with credentials
211
+
212
+ // -- Azure --
213
+ /DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+/g, // Azure storage connection string
214
+
215
+ // -- Webhook URLs --
216
+ /https:\/\/discord(app)?\.com\/api\/webhooks\/[0-9]+\/[a-zA-Z0-9_-]+/g, // Discord webhook
217
+ /https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[a-zA-Z0-9]+/g, // Slack webhook
218
+
219
+ // -- SSH keys --
220
+ /ssh-(rsa|ed25519|ecdsa|dsa)\s+[A-Za-z0-9+/]{40,}[=]{0,3}/g, // SSH public key material
221
+
222
+ // -- Generic high-confidence patterns --
223
+ /Bearer\s+[a-zA-Z0-9_-]{20,}/g, // Bearer tokens in auth headers
224
+ /https?:\/\/[^:]+:[^@]+@[^\s"']+/g, // Basic auth embedded in URLs
225
+ /(?<![a-fA-F0-9])[a-fA-F0-9]{64,}(?![a-fA-F0-9])/g, // Long hex strings (64+ chars, likely secrets)
176
226
  ] as const;
177
227
 
178
228
  /** Regex for file paths (Unix and Windows). */
@@ -184,6 +234,33 @@ export const EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\
184
234
  /** Regex for IP addresses (v4). */
185
235
  export const IP_PATTERN = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g;
186
236
 
237
+ // ---------------------------------------------------------------------------
238
+ // PII patterns — high-confidence, low-false-positive personally identifiable info
239
+ // ---------------------------------------------------------------------------
240
+
241
+ export const PII_PATTERNS = [
242
+ // -- Phone numbers --
243
+ /\+\d{1,3}\s?\d{1,4}\s?\d{1,4}\s?\d{1,9}/g, // E.164 intl: +1 555 123 4567, +44 20 7946 0958
244
+ /\b\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b/g, // US/CA phone: (555) 123-4567, 555-123-4567, 555.123.4567
245
+
246
+ // -- Credit card numbers (major networks, with optional separators) --
247
+ /\b4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Visa (starts with 4)
248
+ /\b5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Mastercard (51-55)
249
+ /\b3[47]\d{2}[\s-]?\d{6}[\s-]?\d{5}\b/g, // Amex (34/37)
250
+ /\b6(?:011|5\d{2})[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Discover (6011/65)
251
+
252
+ // -- SSN / national IDs --
253
+ /\b\d{3}-\d{2}-\d{4}\b/g, // US SSN: 123-45-6789
254
+
255
+ // -- IPv6 --
256
+ /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g, // Full IPv6
257
+ /\b(?:[0-9a-fA-F]{1,4}:){1,7}:(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?(?!\w)/g, // Abbreviated IPv6 (with ::)
258
+ /::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\b/g, // Abbreviated IPv6 (leading ::1, ::ffff:...)
259
+
260
+ // -- Date of birth patterns (in structured contexts) --
261
+ /\b(?:dob|date\.of\.birth|birthday|born)\s*[:=]\s*\d{1,4}[-/]\d{1,2}[-/]\d{1,4}\b/gi, // DOB in key-value context
262
+ ] as const;
263
+
187
264
  /** Regex for camelCase/PascalCase identifiers longer than 8 chars (aggressive mode). */
188
265
  export const IDENTIFIER_PATTERN = /\b[a-z][a-zA-Z0-9]{8,}\b|\b[A-Z][a-zA-Z0-9]{8,}\b/g;
189
266
 
@@ -15,6 +15,7 @@ import {
15
15
  IDENTIFIER_PATTERN,
16
16
  IP_PATTERN,
17
17
  MODULE_PATTERN,
18
+ PII_PATTERNS,
18
19
  SECRET_PATTERNS,
19
20
  } from "../constants.js";
20
21
  import type { ContributionBundle } from "../types.js";
@@ -26,6 +27,49 @@ const UUID_PATTERN = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]
26
27
  const DOUBLE_QUOTED_PATTERN = /"[^"]*"/g;
27
28
  const SINGLE_QUOTED_PATTERN = /'[^']*'/g;
28
29
 
30
+ /** Apply a set of regex patterns to text, replacing matches with a token. Clones each regex to reset lastIndex. */
31
+ function applyPatterns(text: string, patterns: readonly RegExp[], token: string): string {
32
+ let result = text;
33
+ for (const pattern of patterns) {
34
+ result = result.replace(new RegExp(pattern.source, pattern.flags), token);
35
+ }
36
+ return result;
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Secret-only sanitization (used by redactSecretsDeep for defense-in-depth)
41
+ // ---------------------------------------------------------------------------
42
+
43
+ /**
44
+ * Apply only SECRET_PATTERNS redaction to a string.
45
+ * Lighter than sanitizeConservative — no path/email/IP/UUID replacement.
46
+ */
47
+ export function sanitizeSecrets(text: string): string {
48
+ if (!text) return text;
49
+ return applyPatterns(text, SECRET_PATTERNS, "[SECRET]");
50
+ }
51
+
52
+ /**
53
+ * Recursively traverse a value and redact secrets in all string leaves.
54
+ * Non-string primitives, Dates, and other non-plain objects pass through unchanged.
55
+ * Does NOT mutate the input — returns a new structure.
56
+ */
57
+ export function redactSecretsDeep<T>(value: T): T {
58
+ if (typeof value === "string") return sanitizeSecrets(value) as T;
59
+ if (Array.isArray(value)) return value.map((item) => redactSecretsDeep(item)) as T;
60
+ if (value && typeof value === "object" && !(value instanceof Date)) {
61
+ // Only recurse into plain objects — pass through Map, Set, RegExp, class instances, etc.
62
+ const proto = Object.getPrototypeOf(value);
63
+ if (proto !== null && proto !== Object.prototype) return value;
64
+ const result: Record<string, unknown> = {};
65
+ for (const [k, v] of Object.entries(value)) {
66
+ result[k] = redactSecretsDeep(v);
67
+ }
68
+ return result as T;
69
+ }
70
+ return value;
71
+ }
72
+
29
73
  // ---------------------------------------------------------------------------
30
74
  // Conservative sanitization
31
75
  // ---------------------------------------------------------------------------
@@ -36,10 +80,10 @@ export function sanitizeConservative(text: string, projectName?: string): string
36
80
  let result = text;
37
81
 
38
82
  // Secrets first (longest/most specific patterns)
39
- for (const pattern of SECRET_PATTERNS) {
40
- // Clone regex to reset lastIndex
41
- result = result.replace(new RegExp(pattern.source, pattern.flags), "[SECRET]");
42
- }
83
+ result = applyPatterns(result, SECRET_PATTERNS, "[SECRET]");
84
+
85
+ // PII (phone numbers, credit cards, SSNs, IPv6, DOBs)
86
+ result = applyPatterns(result, PII_PATTERNS, "[PII]");
43
87
 
44
88
  // File paths
45
89
  result = result.replace(new RegExp(FILE_PATH_PATTERN.source, FILE_PATH_PATTERN.flags), "[PATH]");
@@ -123,7 +167,7 @@ export function sanitizeBundle(
123
167
  level: "conservative" | "aggressive",
124
168
  projectName?: string,
125
169
  ): ContributionBundle {
126
- return {
170
+ const fieldSanitized: ContributionBundle = {
127
171
  ...bundle,
128
172
  sanitization_level: level,
129
173
  positive_queries: bundle.positive_queries.map((q) => ({
@@ -151,6 +195,9 @@ export function sanitizeBundle(
151
195
  }
152
196
  : {}),
153
197
  };
198
+
199
+ // Defense-in-depth: recursively redact any secrets that slipped through field-level sanitization
200
+ return redactSecretsDeep(fieldSanitized);
154
201
  }
155
202
 
156
203
  // ---------------------------------------------------------------------------