npm - selftune - Versions diffs - 0.2.15 → 0.2.18 - Mend

selftune 0.2.15 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/README.md +24 -19
package/bin/run-hook.cjs +36 -0
package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
package/cli/selftune/alpha-upload/client.ts +51 -1
package/cli/selftune/alpha-upload/flush.ts +46 -5
package/cli/selftune/alpha-upload/stage-canonical.ts +25 -4
package/cli/selftune/alpha-upload-contract.ts +9 -0
package/cli/selftune/constants.ts +82 -5
package/cli/selftune/contribute/sanitize.ts +52 -5
package/cli/selftune/dashboard-contract.ts +100 -0
package/cli/selftune/dashboard-server.ts +2 -2
package/cli/selftune/evolution/description-quality.ts +12 -11
package/cli/selftune/evolution/evolve.ts +238 -53
package/cli/selftune/evolution/unblock-suggestions.ts +159 -0
package/cli/selftune/evolution/validate-proposal.ts +9 -6
package/cli/selftune/grading/grade-session.ts +20 -0
package/cli/selftune/hooks/commit-track.ts +188 -0
package/cli/selftune/hooks/prompt-log.ts +10 -1
package/cli/selftune/hooks/session-stop.ts +2 -2
package/cli/selftune/hooks/skill-eval.ts +15 -1
package/cli/selftune/hooks/stdin-preview.ts +32 -0
package/cli/selftune/init.ts +198 -27
package/cli/selftune/localdb/direct-write.ts +69 -6
package/cli/selftune/localdb/queries.ts +552 -7
package/cli/selftune/localdb/schema.ts +46 -0
package/cli/selftune/orchestrate.ts +32 -4
package/cli/selftune/routes/overview.ts +41 -3
package/cli/selftune/routes/skill-report.ts +88 -17
package/cli/selftune/types.ts +32 -0
package/cli/selftune/utils/hooks.ts +12 -2
package/cli/selftune/utils/transcript.ts +210 -1
package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
package/package.json +1 -1
package/packages/telemetry-contract/src/types.ts +11 -0
package/skill/SKILL.md +29 -1
package/skill/Workflows/AutoActivation.md +1 -1
package/skill/Workflows/Evolve.md +31 -13
package/skill/Workflows/ExportCanonical.md +121 -0
package/skill/Workflows/Hook.md +131 -0
package/skill/Workflows/Initialize.md +9 -8
package/skill/Workflows/Orchestrate.md +27 -5
package/skill/Workflows/Quickstart.md +94 -0
package/skill/Workflows/RepairSkillUsage.md +87 -0
package/skill/Workflows/Uninstall.md +82 -0
package/skill/settings_snippet.json +19 -8

package/README.md CHANGED Viewed

@@ -69,6 +69,8 @@ selftune learned that real users say "slides", "deck", "presentation for Monday"
 **I manage an agent setup with many skills** — You have 15+ skills installed. Some work. Some don't. Some conflict. Tell your agent "how are my skills doing?" and selftune gives you a health dashboard and automatically improves the skills that aren't keeping up.
+**I use skills for non-coding work** — Marketing workflows, research pipelines, compliance checks, slide decks. You say "make me a presentation" and nothing happens. selftune learns that "slides", "deck", and "presentation for Monday" all mean the same skill — and fixes the routing automatically.
 ## How It Works
 <p align="center">
@@ -77,29 +79,27 @@ selftune learned that real users say "slides", "deck", "presentation for Monday"
 A continuous feedback loop that makes your skills learn and adapt. Automatically. Your agent runs everything — you just install the skill and talk naturally.
-**Observe** — Hooks capture every query and which skills fired. On Claude Code, hooks install automatically during `selftune init`. Backfill existing transcripts with `selftune ingest claude`.
+**Observe** — Seven real-time hooks capture every query, every skill invocation, and every correction signal. Structured telemetry — not raw logs. On Claude Code, hooks install automatically during `selftune init`. Backfill existing transcripts with `selftune ingest claude`.
+**Detect** — Finds the gap between how you talk and how your skills are described. You say "make me a slide deck" and your pptx skill stays silent — selftune catches that mismatch. Clusters missed queries by invocation type. Detects correction signals ("why didn't you use X?") and triggers immediate improvement.
+**Evolve** — Generates multiple proposals biased toward different invocation types, validates each against your real eval set with majority voting, runs constitutional checks, then gates with an expensive model before deploying. Not guesswork — evidence. Automatic backup on every deploy.
-**Detect** — Finds the gap between how you talk and how your skills are described. You say "make me a slide deck" and your pptx skill stays silent — selftune catches that mismatch. Real-time correction signals ("why didn't you use X?") are detected and trigger immediate improvement.
+**Watch** — After deploying changes, selftune monitors trigger rates, false negatives, and per-invocation-type scores. If anything regresses, it rolls back automatically. No manual monitoring needed.
-**Evolve** — Rewrites skill descriptions — and full skill bodies — to match how you actually work. Cheap-loop mode uses haiku for the loop, sonnet for the gate (~80% cost reduction). Teacher-student body evolution with 3-gate validation. Automatic backup.
+**Automate** — Run `selftune cron setup` to install OS-level scheduling. selftune syncs, grades, evolves, and watches on a schedule — fully autonomous.
-**Watch** — After deploying changes, selftune monitors skill trigger rates. If anything regresses, it rolls back automatically.
+## How Is This Different from Agents That "Learn"?
-**Automate** — Run `selftune cron setup` to install OS-level scheduling. selftune syncs, evaluates, evolves, and watches on a schedule — no manual intervention needed.
+Some agents claim self-improvement by saving notes about what worked. That's knowledge persistence — not a closed loop. There's no measurement, no validation, and no way to know if the saved notes are actually correct.
-## What's New in v0.2.0
+selftune is empirical. It observes real sessions, grades execution quality, detects missed triggers, proposes changes, validates them against eval sets, deploys with automatic backup, monitors for regressions, and rolls back on failure. Twelve interlocking mechanisms — not one background thread writing markdown.
-- **Full skill body evolution** — Beyond descriptions: evolve routing tables and entire skill bodies using teacher-student model with structural, trigger, and quality gates
-- **Synthetic eval generation** — `selftune eval generate --synthetic` generates eval sets from SKILL.md via LLM, no session logs needed. Solves cold-start: new skills get evals immediately.
-- **Cheap-loop evolution** — `selftune evolve --cheap-loop` uses haiku for proposal generation and validation, sonnet only for the final deployment gate. ~80% cost reduction.
-- **Batch trigger validation** — Validation now batches 10 queries per LLM call instead of one-per-query. ~10x faster evolution loops.
-- **Per-stage model control** — `--validation-model`, `--proposal-model`, and `--gate-model` flags give fine-grained control over which model runs each evolution stage.
-- **Auto-activation system** — Hooks detect when selftune should run and suggest actions
-- **Enforcement guardrails** — Blocks SKILL.md edits on monitored skills unless `selftune watch` has been run
-- **Live dashboard server** — `selftune dashboard --serve` with SSE auto-refresh and action buttons
-- **Evolution memory** — Persists context, plans, and decisions across context resets
-- **4 specialized agents** — Diagnosis analyst, pattern analyst, evolution reviewer, integration guide
-- **Sandbox test harness** — Comprehensive automated test coverage, including devcontainer-based LLM testing
+| Approach                  | Measures quality? | Validates changes?          | Detects regressions?   | Rolls back? |
+| ------------------------- | ----------------- | --------------------------- | ---------------------- | ----------- |
+| Agent saves its own notes | No                | No                          | No                     | No          |
+| Manual skill rewrites     | No                | No                          | No                     | No          |
+| **selftune**              | 3-tier grading    | Eval sets + majority voting | Post-deploy monitoring | Automatic   |
 ## Commands
@@ -108,12 +108,15 @@ Your agent runs these — you just say what you want ("improve my skills", "show
 | Group      | Command                                      | What it does                                                                                |
 | ---------- | -------------------------------------------- | ------------------------------------------------------------------------------------------- |
 |            | `selftune status`                            | See which skills are undertriggering and why                                                |
-|            | `selftune orchestrate`                       | Run the full autonomous loop (sync → evolve → watch)                                        |
+|            | `selftune last`                              | Quick insight from the most recent session                                                  |
+|            | `selftune orchestrate`                       | Run the full autonomous loop (sync → grade → evolve → watch)                                |
+|            | `selftune sync`                              | Refresh telemetry from source-truth transcripts                                             |
 |            | `selftune dashboard`                         | Open the visual skill health dashboard                                                      |
 |            | `selftune doctor`                            | Health check: logs, hooks, config, permissions                                              |
 | **ingest** | `selftune ingest claude`                     | Backfill from Claude Code transcripts                                                       |
 |            | `selftune ingest codex`                      | Import Codex rollout logs (experimental)                                                    |
 | **grade**  | `selftune grade --skill <name>`              | Grade a skill session with evidence                                                         |
+|            | `selftune grade auto`                        | Auto-grade recent sessions for ungraded skills                                              |
 |            | `selftune grade baseline --skill <name>`     | Measure skill value vs no-skill baseline                                                    |
 | **evolve** | `selftune evolve --skill <name>`             | Propose, validate, and deploy improved descriptions                                         |
 |            | `selftune evolve body --skill <name>`        | Evolve full skill body or routing table                                                     |
@@ -124,7 +127,9 @@ Your agent runs these — you just say what you want ("improve my skills", "show
 |            | `selftune eval import`                       | Import external eval corpus from [SkillsBench](https://github.com/benchflow-ai/skillsbench) |
 | **auto**   | `selftune cron setup`                        | Install OS-level scheduling (cron/launchd/systemd)                                          |
 |            | `selftune watch --skill <name>`              | Monitor after deploy. Auto-rollback on regression.                                          |
-| **other**  | `selftune telemetry`                         | Manage anonymous usage analytics (status, enable, disable)                                  |
+| **other**  | `selftune workflows`                         | Discover and manage multi-skill workflows                                                   |
+|            | `selftune badge --skill <name>`              | Generate a health badge for your skill's README                                             |
+|            | `selftune telemetry`                         | Manage anonymous usage analytics (status, enable, disable)                                  |
 |            | `selftune alpha upload`                      | Run a manual alpha upload cycle and emit a JSON send summary                                |
 Full command reference: `selftune --help`

package/bin/run-hook.cjs ADDED Viewed

@@ -0,0 +1,36 @@
+#!/usr/bin/env node
+/**
+ * Hook runner — executes a TypeScript hook script via Bun.
+ *
+ * Usage: node run-hook.cjs <path-to-hook.ts>
+ *
+ * Stdin is piped through to the hook script (Claude Code sends JSON on stdin).
+ * Exit code is propagated from the hook. If bun is not found, exits 0
+ * (fail-open: hooks must never block Claude).
+ *
+ * Note: selftune hooks depend on Bun-specific APIs (Bun.stdin.text(),
+ * Bun.spawn()) and cannot run under tsx/node. The runner exists so that
+ * hook commands use `node run-hook.cjs` (universally available) as the
+ * entry point, avoiding a hard dependency on bun being in PATH for the
+ * shell that Claude Code invokes.
+ */
+const { execFileSync } = require("child_process");
+const hookScript = process.argv[2];
+if (!hookScript) {
+  // No script specified — fail-open
+  process.exit(0);
+}
+try {
+  execFileSync("bun", ["run", hookScript], { stdio: "inherit" });
+  process.exit(0);
+} catch (e) {
+  // Hook exited non-zero → propagate (e.g. exit 2 = block in PreToolUse)
+  if (e.status != null) {
+    process.exit(e.status);
+  }
+  // bun not found (ENOENT) — fail-open
+  process.exit(0);
+}

package/cli/selftune/alpha-upload/build-payloads.ts CHANGED Viewed

@@ -58,7 +58,7 @@ export function buildV2PushPayload(
   const params = afterSeq !== undefined ? [afterSeq, limit] : [limit];
   const sql = `
-    SELECT local_seq, record_kind, record_json
+    SELECT local_seq, record_kind, record_id, record_json, content_sha256
     FROM canonical_upload_staging
     ${whereClause}
     ORDER BY local_seq ASC
@@ -68,7 +68,9 @@ export function buildV2PushPayload(
   const rows = db.query(sql).all(...params) as Array<{
     local_seq: number;
     record_kind: string;
+    record_id: string;
     record_json: string;
+    content_sha256: string | null;
   }>;
   if (rows.length === 0) return null;
@@ -78,6 +80,7 @@ export function buildV2PushPayload(
   const orchestrateRuns: Record<string, unknown>[] = [];
   const gradingResults: Record<string, unknown>[] = [];
   const improvementSignals: Record<string, unknown>[] = [];
+  const contentHashes: Record<string, string> = {};
   let lastParsedSeq: number | null = null;
   let hitMalformedRow = false;
@@ -87,6 +90,10 @@ export function buildV2PushPayload(
       hitMalformedRow = true;
       break;
     }
+    // Collect content hashes for dedup — only after successful parse, keyed by kind:id
+    if (row.content_sha256) {
+      contentHashes[`${row.record_kind}:${row.record_id}`] = row.content_sha256;
+    }
     if (row.record_kind === "evolution_evidence") {
       const timestamp =
@@ -152,6 +159,12 @@ export function buildV2PushPayload(
     gradingResults,
     improvementSignals,
   );
+  // Attach content hashes for server-side dedup
+  if (Object.keys(contentHashes).length > 0) {
+    payload.content_hashes = contentHashes;
+  }
   if (lastParsedSeq === null) {
     return null;
   }

package/cli/selftune/alpha-upload/client.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  * PushUploadResult indicating success or failure.
  */
-import type { PushUploadResult } from "../alpha-upload-contract.js";
+import type { HeadCheckResult, PushUploadResult } from "../alpha-upload-contract.js";
 import { getSelftuneVersion } from "../utils/selftune-meta.js";
 function isPushUploadResult(value: unknown): value is PushUploadResult {
@@ -111,3 +111,53 @@ export async function uploadPushPayload(
     };
   }
 }
+/**
+ * Lightweight HEAD check to see if a record already exists on the server.
+ *
+ * Sends HEAD {endpoint}/{recordId}. Optionally includes If-None-Match
+ * for content-hash comparison.
+ *
+ * Never throws -- returns { exists: false, unchanged: false } on any error
+ * (fail-open, matching the uploadPushPayload pattern).
+ */
+export async function headRecord(
+  endpoint: string,
+  recordId: string,
+  sha256?: string,
+  apiKey?: string,
+): Promise<HeadCheckResult> {
+  const failOpen: HeadCheckResult = { exists: false, unchanged: false };
+  try {
+    const headers: Record<string, string> = {
+      "User-Agent": `selftune/${getSelftuneVersion()}`,
+    };
+    if (sha256) {
+      headers["If-None-Match"] = `"${sha256}"`;
+    }
+    if (apiKey) {
+      headers.Authorization = `Bearer ${apiKey}`;
+    }
+    const url = `${endpoint}/${encodeURIComponent(recordId)}`;
+    const response = await fetch(url, {
+      method: "HEAD",
+      headers,
+      signal: AbortSignal.timeout(10_000),
+    });
+    if (response.status === 200) {
+      return { exists: true, unchanged: false };
+    }
+    if (response.status === 304) {
+      return { exists: true, unchanged: true };
+    }
+    // 404 or any other status -- treat as not found
+    return failOpen;
+  } catch {
+    // Network error, timeout, etc. -- fail open
+    return failOpen;
+  }
+}

package/cli/selftune/alpha-upload/flush.ts CHANGED Viewed

@@ -12,7 +12,7 @@
  */
 import type { FlushSummary, QueueOperations } from "../alpha-upload-contract.js";
-import { uploadPushPayload } from "./client.js";
+import { headRecord, uploadPushPayload } from "./client.js";
 // ---------------------------------------------------------------------------
 // Options
@@ -28,6 +28,8 @@ export interface FlushOptions {
   dryRun?: boolean;
   /** API key for Bearer auth on the cloud endpoint. */
   apiKey?: string;
+  /** When set, run HEAD checks against this endpoint before pushing. */
+  headCheckEndpoint?: string;
 }
 // ---------------------------------------------------------------------------
@@ -85,8 +87,9 @@ export async function flushQueue(
   const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
   const dryRun = options?.dryRun ?? false;
   const apiKey = options?.apiKey;
+  const headCheckEndpoint = options?.headCheckEndpoint;
-  const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0 };
+  const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0, skipped_unchanged: 0 };
   const items = queue.getPending(batchSize);
@@ -94,7 +97,44 @@ export async function flushQueue(
     return summary;
   }
+  // -- HEAD check phase: identify records that already exist unchanged ------
+  const unchangedIds = new Set<number>();
+  if (headCheckEndpoint) {
+    const headChecks = items.map(async (item) => {
+      try {
+        const parsed = JSON.parse(item.payload_json) as { push_id?: string };
+        const pushId = parsed.push_id;
+        if (!pushId) return { id: item.id, skip: false };
+        const result = await headRecord(headCheckEndpoint, pushId, undefined, apiKey);
+        return { id: item.id, skip: result.exists && result.unchanged };
+      } catch {
+        // Fail-open: if HEAD check itself errors, don't skip
+        return { id: item.id, skip: false };
+      }
+    });
+    const results = await Promise.allSettled(headChecks);
+    for (const result of results) {
+      if (result.status === "fulfilled" && result.value.skip) {
+        unchangedIds.add(result.value.id);
+      }
+    }
+    // Mark unchanged items as sent in the queue without actually pushing
+    for (const item of items) {
+      if (unchangedIds.has(item.id)) {
+        if (!queue.markSending(item.id)) continue;
+        if (queue.markSent(item.id)) {
+          summary.skipped_unchanged++;
+        } else {
+          summary.failed++;
+        }
+      }
+    }
+  }
   for (const item of items) {
+    if (unchangedIds.has(item.id)) continue;
     const markFailedSafely = (message: string): void => {
       if (!queue.markFailed(item.id, message)) {
         console.error(`[alpha upload] Failed to persist queue failure state for item ${item.id}`);
@@ -149,10 +189,11 @@ export async function flushQueue(
         break;
       }
-      // 409 Conflict = duplicate push_id, treat as success
-      if (status === 409) {
+      // 304 Not Modified = content unchanged (dedup), 409 Conflict = duplicate push_id
+      // Both are treated as success — the server already has this data.
+      if (status === 304 || status === 409) {
         if (!queue.markSent(item.id)) {
-          markFailedSafely("local queue state update failed after duplicate upload");
+          markFailedSafely("local queue state update failed after duplicate/unchanged upload");
           summary.failed++;
         } else {
           summary.sent++;

package/cli/selftune/alpha-upload/stage-canonical.ts CHANGED Viewed

@@ -146,6 +146,18 @@ function extractNormalizedAt(record: CanonicalRecord): string {
   return record.normalized_at;
 }
+// -- Content hashing ----------------------------------------------------------
+/**
+ * Compute SHA256 hex digest of a string (for upload dedup).
+ * Uses Bun's built-in CryptoHasher for zero-dependency hashing.
+ */
+export function computeContentSha256(input: string): string {
+  const hasher = new Bun.CryptoHasher("sha256");
+  hasher.update(input);
+  return hasher.digest("hex");
+}
 // -- Main staging function ----------------------------------------------------
 /**
@@ -163,9 +175,12 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
   const now = new Date().toISOString();
   const stmt = db.prepare(`
-    INSERT OR IGNORE INTO canonical_upload_staging
-      (record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at)
-    VALUES (?, ?, ?, ?, ?, ?, ?)
+    INSERT INTO canonical_upload_staging
+      (record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at, content_sha256)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+    ON CONFLICT(record_kind, record_id) DO UPDATE SET
+      content_sha256 = excluded.content_sha256
+    WHERE canonical_upload_staging.content_sha256 IS NULL AND excluded.content_sha256 IS NOT NULL
   `);
   // 1. Stage canonical records from SQLite (default) or JSONL (custom logPath override)
@@ -177,14 +192,16 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
       : readAndEnrichCanonicalRecords(logPath);
   for (const record of records) {
     const recordId = extractRecordId(record);
+    const recordJson = JSON.stringify(record);
     const result = stmt.run(
       record.record_kind,
       recordId,
-      JSON.stringify(record),
+      recordJson,
       extractSessionId(record),
       extractPromptId(record),
       extractNormalizedAt(record),
       now,
+      computeContentSha256(recordJson),
     );
     if (result.changes > 0) staged++;
   }
@@ -222,6 +239,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
         null, // no prompt_id
         entry.timestamp,
         now,
+        computeContentSha256(recordJson),
       );
       if (result.changes > 0) staged++;
     }
@@ -258,6 +276,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
         null, // no prompt_id
         run.timestamp,
         now,
+        computeContentSha256(recordJson),
       );
       if (result.changes > 0) staged++;
     }
@@ -298,6 +317,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
         null, // no prompt_id
         gr.graded_at,
         now,
+        computeContentSha256(recordJson),
       );
       if (result.changes > 0) staged++;
     }
@@ -332,6 +352,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
         null, // no prompt_id
         sig.timestamp,
         now,
+        computeContentSha256(recordJson),
       );
       if (result.changes > 0) staged++;
     }

package/cli/selftune/alpha-upload-contract.ts CHANGED Viewed

@@ -49,4 +49,13 @@ export interface FlushSummary {
   sent: number;
   failed: number;
   skipped: number;
+  /** Records skipped because a HEAD check confirmed they already exist unchanged. */
+  skipped_unchanged: number;
+}
+// -- HEAD check result --------------------------------------------------------
+export interface HeadCheckResult {
+  exists: boolean;
+  unchanged: boolean;
 }

package/cli/selftune/constants.ts CHANGED Viewed

@@ -41,6 +41,11 @@ export const ORCHESTRATE_RUN_LOG = join(LOG_DIR, "orchestrate_runs.jsonl");
 export const SIGNAL_LOG = join(LOG_DIR, "improvement_signals.jsonl");
 export const ORCHESTRATE_LOCK = join(LOG_DIR, ".orchestrate.lock");
+/** Allow tests to override the orchestrate lock without mutating the host lock file. */
+export function getOrchestrateLockPath(): string {
+  return process.env.SELFTUNE_ORCHESTRATE_LOCK_PATH || ORCHESTRATE_LOCK;
+}
 /** Evolution memory directory — human-readable session context that survives resets. */
 export const MEMORY_DIR = join(SELFTUNE_CONFIG_DIR, "memory");
 export const CONTEXT_PATH = join(MEMORY_DIR, "context.md");
@@ -162,17 +167,62 @@ export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
 /** Regex patterns for detecting secrets that must be redacted. */
 export const SECRET_PATTERNS = [
-  /sk-[a-zA-Z0-9]{20,}/g, // OpenAI / Anthropic API keys
+  // -- API keys & tokens (platform-specific prefixes) --
+  /sk-[a-zA-Z0-9]{20,}/g, // OpenAI API keys
+  /sk-ant-[a-zA-Z0-9_-]{20,}/g, // Anthropic API keys
   /ghp_[a-zA-Z0-9]{36,}/g, // GitHub personal access tokens
   /gho_[a-zA-Z0-9]{36,}/g, // GitHub OAuth tokens
   /github_pat_[a-zA-Z0-9_]{22,}/g, // GitHub fine-grained PATs
-  /AKIA[A-Z0-9]{16}/g, // AWS access key IDs
+  /npm_[a-zA-Z0-9]{36}/g, // npm tokens
+  /pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
+  // -- AWS --
+  /AKIA[A-Z0-9]{16}/g, // AWS access key IDs (permanent)
+  /ASIA[A-Z0-9]{16}/g, // AWS temporary credentials (STS)
+  // -- GCP --
+  /AIza[0-9A-Za-z_-]{35}/g, // Google API key
+  // -- Stripe --
+  /(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{24,}/g, // Stripe secret/publishable/restricted keys
+  // -- Twilio --
+  /SK[a-f0-9]{32}/g, // Twilio API key
+  // -- SendGrid --
+  /SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}/g, // SendGrid API key
+  // -- Mailgun --
+  /key-[a-zA-Z0-9]{32}/g, // Mailgun API key
+  // -- Slack --
   /xoxb-[a-zA-Z0-9-]+/g, // Slack bot tokens
   /xoxp-[a-zA-Z0-9-]+/g, // Slack user tokens
   /xoxs-[a-zA-Z0-9-]+/g, // Slack session tokens
-  /eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JWTs
-  /npm_[a-zA-Z0-9]{36}/g, // npm tokens
-  /pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
+  // -- JWTs --
+  /eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JSON Web Tokens
+  // -- Private keys (PEM block headers) --
+  /-----BEGIN (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----[\s\S]*?-----END (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----/g, // PEM private key blocks (full multiline)
+  // -- Database connection URIs --
+  /(mongodb(\+srv)?|postgres(ql)?|mysql|mariadb|redis|rediss|amqp|amqps):\/\/[^\s"')]+/g, // DB URIs with credentials
+  // -- Azure --
+  /DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+/g, // Azure storage connection string
+  // -- Webhook URLs --
+  /https:\/\/discord(app)?\.com\/api\/webhooks\/[0-9]+\/[a-zA-Z0-9_-]+/g, // Discord webhook
+  /https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[a-zA-Z0-9]+/g, // Slack webhook
+  // -- SSH keys --
+  /ssh-(rsa|ed25519|ecdsa|dsa)\s+[A-Za-z0-9+/]{40,}[=]{0,3}/g, // SSH public key material
+  // -- Generic high-confidence patterns --
+  /Bearer\s+[a-zA-Z0-9_-]{20,}/g, // Bearer tokens in auth headers
+  /https?:\/\/[^:]+:[^@]+@[^\s"']+/g, // Basic auth embedded in URLs
+  /(?<![a-fA-F0-9])[a-fA-F0-9]{64,}(?![a-fA-F0-9])/g, // Long hex strings (64+ chars, likely secrets)
 ] as const;
 /** Regex for file paths (Unix and Windows). */
@@ -184,6 +234,33 @@ export const EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\
 /** Regex for IP addresses (v4). */
 export const IP_PATTERN = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g;
+// ---------------------------------------------------------------------------
+// PII patterns — high-confidence, low-false-positive personally identifiable info
+// ---------------------------------------------------------------------------
+export const PII_PATTERNS = [
+  // -- Phone numbers --
+  /\+\d{1,3}\s?\d{1,4}\s?\d{1,4}\s?\d{1,9}/g, // E.164 intl: +1 555 123 4567, +44 20 7946 0958
+  /\b\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b/g, // US/CA phone: (555) 123-4567, 555-123-4567, 555.123.4567
+  // -- Credit card numbers (major networks, with optional separators) --
+  /\b4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Visa (starts with 4)
+  /\b5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Mastercard (51-55)
+  /\b3[47]\d{2}[\s-]?\d{6}[\s-]?\d{5}\b/g, // Amex (34/37)
+  /\b6(?:011|5\d{2})[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Discover (6011/65)
+  // -- SSN / national IDs --
+  /\b\d{3}-\d{2}-\d{4}\b/g, // US SSN: 123-45-6789
+  // -- IPv6 --
+  /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g, // Full IPv6
+  /\b(?:[0-9a-fA-F]{1,4}:){1,7}:(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?(?!\w)/g, // Abbreviated IPv6 (with ::)
+  /::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\b/g, // Abbreviated IPv6 (leading ::1, ::ffff:...)
+  // -- Date of birth patterns (in structured contexts) --
+  /\b(?:dob|date\.of\.birth|birthday|born)\s*[:=]\s*\d{1,4}[-/]\d{1,2}[-/]\d{1,4}\b/gi, // DOB in key-value context
+] as const;
 /** Regex for camelCase/PascalCase identifiers longer than 8 chars (aggressive mode). */
 export const IDENTIFIER_PATTERN = /\b[a-z][a-zA-Z0-9]{8,}\b|\b[A-Z][a-zA-Z0-9]{8,}\b/g;

package/cli/selftune/contribute/sanitize.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import {
   IDENTIFIER_PATTERN,
   IP_PATTERN,
   MODULE_PATTERN,
+  PII_PATTERNS,
   SECRET_PATTERNS,
 } from "../constants.js";
 import type { ContributionBundle } from "../types.js";
@@ -26,6 +27,49 @@ const UUID_PATTERN = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]
 const DOUBLE_QUOTED_PATTERN = /"[^"]*"/g;
 const SINGLE_QUOTED_PATTERN = /'[^']*'/g;
+/** Apply a set of regex patterns to text, replacing matches with a token. Clones each regex to reset lastIndex. */
+function applyPatterns(text: string, patterns: readonly RegExp[], token: string): string {
+  let result = text;
+  for (const pattern of patterns) {
+    result = result.replace(new RegExp(pattern.source, pattern.flags), token);
+  }
+  return result;
+}
+// ---------------------------------------------------------------------------
+// Secret-only sanitization (used by redactSecretsDeep for defense-in-depth)
+// ---------------------------------------------------------------------------
+/**
+ * Apply only SECRET_PATTERNS redaction to a string.
+ * Lighter than sanitizeConservative — no path/email/IP/UUID replacement.
+ */
+export function sanitizeSecrets(text: string): string {
+  if (!text) return text;
+  return applyPatterns(text, SECRET_PATTERNS, "[SECRET]");
+}
+/**
+ * Recursively traverse a value and redact secrets in all string leaves.
+ * Non-string primitives, Dates, and other non-plain objects pass through unchanged.
+ * Does NOT mutate the input — returns a new structure.
+ */
+export function redactSecretsDeep<T>(value: T): T {
+  if (typeof value === "string") return sanitizeSecrets(value) as T;
+  if (Array.isArray(value)) return value.map((item) => redactSecretsDeep(item)) as T;
+  if (value && typeof value === "object" && !(value instanceof Date)) {
+    // Only recurse into plain objects — pass through Map, Set, RegExp, class instances, etc.
+    const proto = Object.getPrototypeOf(value);
+    if (proto !== null && proto !== Object.prototype) return value;
+    const result: Record<string, unknown> = {};
+    for (const [k, v] of Object.entries(value)) {
+      result[k] = redactSecretsDeep(v);
+    }
+    return result as T;
+  }
+  return value;
+}
 // ---------------------------------------------------------------------------
 // Conservative sanitization
 // ---------------------------------------------------------------------------
@@ -36,10 +80,10 @@ export function sanitizeConservative(text: string, projectName?: string): string
   let result = text;
   // Secrets first (longest/most specific patterns)
-  for (const pattern of SECRET_PATTERNS) {
-    // Clone regex to reset lastIndex
-    result = result.replace(new RegExp(pattern.source, pattern.flags), "[SECRET]");
-  }
+  result = applyPatterns(result, SECRET_PATTERNS, "[SECRET]");
+  // PII (phone numbers, credit cards, SSNs, IPv6, DOBs)
+  result = applyPatterns(result, PII_PATTERNS, "[PII]");
   // File paths
   result = result.replace(new RegExp(FILE_PATH_PATTERN.source, FILE_PATH_PATTERN.flags), "[PATH]");
@@ -123,7 +167,7 @@ export function sanitizeBundle(
   level: "conservative" | "aggressive",
   projectName?: string,
 ): ContributionBundle {
-  return {
+  const fieldSanitized: ContributionBundle = {
     ...bundle,
     sanitization_level: level,
     positive_queries: bundle.positive_queries.map((q) => ({
@@ -151,6 +195,9 @@ export function sanitizeBundle(
         }
       : {}),
   };
+  // Defense-in-depth: recursively redact any secrets that slipped through field-level sanitization
+  return redactSecretsDeep(fieldSanitized);
 }
 // ---------------------------------------------------------------------------