npm - selftune - Versions diffs - 0.2.16 → 0.2.19 - Mend

selftune 0.2.16 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/README.md +32 -22
package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
package/apps/local-dashboard/dist/index.html +5 -5
package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
package/cli/selftune/alpha-upload/client.ts +51 -1
package/cli/selftune/alpha-upload/flush.ts +46 -5
package/cli/selftune/alpha-upload/stage-canonical.ts +32 -10
package/cli/selftune/alpha-upload-contract.ts +9 -0
package/cli/selftune/constants.ts +92 -5
package/cli/selftune/contribute/contribute.ts +30 -2
package/cli/selftune/contribute/sanitize.ts +52 -5
package/cli/selftune/contribution-config.ts +249 -0
package/cli/selftune/contribution-relay.ts +177 -0
package/cli/selftune/contribution-signals.ts +219 -0
package/cli/selftune/contribution-staging.ts +147 -0
package/cli/selftune/contributions.ts +532 -0
package/cli/selftune/creator-contributions.ts +333 -0
package/cli/selftune/dashboard-contract.ts +305 -1
package/cli/selftune/dashboard-server.ts +47 -13
package/cli/selftune/eval/family-overlap.ts +395 -0
package/cli/selftune/eval/hooks-to-evals.ts +182 -28
package/cli/selftune/eval/synthetic-evals.ts +298 -11
package/cli/selftune/evolution/description-quality.ts +12 -11
package/cli/selftune/evolution/evolve.ts +214 -51
package/cli/selftune/evolution/validate-proposal.ts +9 -6
package/cli/selftune/export.ts +2 -2
package/cli/selftune/grading/grade-session.ts +20 -0
package/cli/selftune/hooks/commit-track.ts +188 -0
package/cli/selftune/hooks/prompt-log.ts +10 -1
package/cli/selftune/hooks/session-stop.ts +2 -2
package/cli/selftune/hooks/skill-eval.ts +15 -1
package/cli/selftune/hooks/stdin-preview.ts +32 -0
package/cli/selftune/index.ts +41 -5
package/cli/selftune/ingestors/codex-rollout.ts +31 -35
package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
package/cli/selftune/localdb/db.ts +2 -2
package/cli/selftune/localdb/direct-write.ts +69 -6
package/cli/selftune/localdb/queries.ts +1253 -37
package/cli/selftune/localdb/schema.ts +66 -0
package/cli/selftune/orchestrate.ts +32 -4
package/cli/selftune/recover.ts +153 -0
package/cli/selftune/repair/skill-usage.ts +363 -4
package/cli/selftune/routes/actions.ts +35 -1
package/cli/selftune/routes/analytics.ts +14 -0
package/cli/selftune/routes/index.ts +1 -0
package/cli/selftune/routes/overview.ts +150 -4
package/cli/selftune/routes/skill-report.ts +648 -18
package/cli/selftune/status.ts +81 -2
package/cli/selftune/sync.ts +56 -2
package/cli/selftune/trust-model.ts +66 -0
package/cli/selftune/types.ts +80 -0
package/cli/selftune/utils/skill-detection.ts +43 -0
package/cli/selftune/utils/transcript.ts +210 -1
package/cli/selftune/watchlist.ts +65 -0
package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
package/package.json +1 -1
package/packages/telemetry-contract/src/types.ts +11 -0
package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
package/packages/ui/src/components/section-cards.tsx +12 -9
package/packages/ui/src/primitives/card.tsx +1 -1
package/skill/SKILL.md +40 -2
package/skill/Workflows/AlphaUpload.md +4 -0
package/skill/Workflows/Composability.md +64 -0
package/skill/Workflows/Contribute.md +6 -3
package/skill/Workflows/Contributions.md +97 -0
package/skill/Workflows/CreatorContributions.md +74 -0
package/skill/Workflows/Dashboard.md +31 -0
package/skill/Workflows/Evals.md +57 -8
package/skill/Workflows/Evolve.md +31 -13
package/skill/Workflows/ExportCanonical.md +121 -0
package/skill/Workflows/Hook.md +131 -0
package/skill/Workflows/Ingest.md +7 -0
package/skill/Workflows/Initialize.md +29 -9
package/skill/Workflows/Orchestrate.md +27 -5
package/skill/Workflows/Quickstart.md +94 -0
package/skill/Workflows/Recover.md +84 -0
package/skill/Workflows/RepairSkillUsage.md +95 -0
package/skill/Workflows/Sync.md +18 -12
package/skill/Workflows/Uninstall.md +82 -0
package/skill/settings_snippet.json +11 -0
package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12

package/cli/selftune/alpha-upload/client.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  * PushUploadResult indicating success or failure.
  */
-import type { PushUploadResult } from "../alpha-upload-contract.js";
+import type { HeadCheckResult, PushUploadResult } from "../alpha-upload-contract.js";
 import { getSelftuneVersion } from "../utils/selftune-meta.js";
 function isPushUploadResult(value: unknown): value is PushUploadResult {
@@ -111,3 +111,53 @@ export async function uploadPushPayload(
     };
   }
 }
+/**
+ * Lightweight HEAD check to see if a record already exists on the server.
+ *
+ * Sends HEAD {endpoint}/{recordId}. Optionally includes If-None-Match
+ * for content-hash comparison.
+ *
+ * Never throws -- returns { exists: false, unchanged: false } on any error
+ * (fail-open, matching the uploadPushPayload pattern).
+ */
+export async function headRecord(
+  endpoint: string,
+  recordId: string,
+  sha256?: string,
+  apiKey?: string,
+): Promise<HeadCheckResult> {
+  const failOpen: HeadCheckResult = { exists: false, unchanged: false };
+  try {
+    const headers: Record<string, string> = {
+      "User-Agent": `selftune/${getSelftuneVersion()}`,
+    };
+    if (sha256) {
+      headers["If-None-Match"] = `"${sha256}"`;
+    }
+    if (apiKey) {
+      headers.Authorization = `Bearer ${apiKey}`;
+    }
+    const url = `${endpoint}/${encodeURIComponent(recordId)}`;
+    const response = await fetch(url, {
+      method: "HEAD",
+      headers,
+      signal: AbortSignal.timeout(10_000),
+    });
+    if (response.status === 200) {
+      return { exists: true, unchanged: false };
+    }
+    if (response.status === 304) {
+      return { exists: true, unchanged: true };
+    }
+    // 404 or any other status -- treat as not found
+    return failOpen;
+  } catch {
+    // Network error, timeout, etc. -- fail open
+    return failOpen;
+  }
+}

package/cli/selftune/alpha-upload/flush.ts CHANGED Viewed

@@ -12,7 +12,7 @@
  */
 import type { FlushSummary, QueueOperations } from "../alpha-upload-contract.js";
-import { uploadPushPayload } from "./client.js";
+import { headRecord, uploadPushPayload } from "./client.js";
 // ---------------------------------------------------------------------------
 // Options
@@ -28,6 +28,8 @@ export interface FlushOptions {
   dryRun?: boolean;
   /** API key for Bearer auth on the cloud endpoint. */
   apiKey?: string;
+  /** When set, run HEAD checks against this endpoint before pushing. */
+  headCheckEndpoint?: string;
 }
 // ---------------------------------------------------------------------------
@@ -85,8 +87,9 @@ export async function flushQueue(
   const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
   const dryRun = options?.dryRun ?? false;
   const apiKey = options?.apiKey;
+  const headCheckEndpoint = options?.headCheckEndpoint;
-  const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0 };
+  const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0, skipped_unchanged: 0 };
   const items = queue.getPending(batchSize);
@@ -94,7 +97,44 @@ export async function flushQueue(
     return summary;
   }
+  // -- HEAD check phase: identify records that already exist unchanged ------
+  const unchangedIds = new Set<number>();
+  if (headCheckEndpoint) {
+    const headChecks = items.map(async (item) => {
+      try {
+        const parsed = JSON.parse(item.payload_json) as { push_id?: string };
+        const pushId = parsed.push_id;
+        if (!pushId) return { id: item.id, skip: false };
+        const result = await headRecord(headCheckEndpoint, pushId, undefined, apiKey);
+        return { id: item.id, skip: result.exists && result.unchanged };
+      } catch {
+        // Fail-open: if HEAD check itself errors, don't skip
+        return { id: item.id, skip: false };
+      }
+    });
+    const results = await Promise.allSettled(headChecks);
+    for (const result of results) {
+      if (result.status === "fulfilled" && result.value.skip) {
+        unchangedIds.add(result.value.id);
+      }
+    }
+    // Mark unchanged items as sent in the queue without actually pushing
+    for (const item of items) {
+      if (unchangedIds.has(item.id)) {
+        if (!queue.markSending(item.id)) continue;
+        if (queue.markSent(item.id)) {
+          summary.skipped_unchanged++;
+        } else {
+          summary.failed++;
+        }
+      }
+    }
+  }
   for (const item of items) {
+    if (unchangedIds.has(item.id)) continue;
     const markFailedSafely = (message: string): void => {
       if (!queue.markFailed(item.id, message)) {
         console.error(`[alpha upload] Failed to persist queue failure state for item ${item.id}`);
@@ -149,10 +189,11 @@ export async function flushQueue(
         break;
       }
-      // 409 Conflict = duplicate push_id, treat as success
-      if (status === 409) {
+      // 304 Not Modified = content unchanged (dedup), 409 Conflict = duplicate push_id
+      // Both are treated as success — the server already has this data.
+      if (status === 304 || status === 409) {
         if (!queue.markSent(item.id)) {
-          markFailedSafely("local queue state update failed after duplicate upload");
+          markFailedSafely("local queue state update failed after duplicate/unchanged upload");
           summary.failed++;
         } else {
           summary.sent++;

package/cli/selftune/alpha-upload/stage-canonical.ts CHANGED Viewed

@@ -1,9 +1,10 @@
 /**
  * Canonical upload staging writer.
  *
- * Reads canonical records from the JSONL source-of-truth log and evolution
- * evidence from SQLite, then inserts them into a single monotonic staging
- * table for lossless upload batching.
+ * Reads canonical records from SQLite by default (or from a JSONL override for
+ * explicit recovery/debugging) plus evolution evidence from SQLite, then
+ * inserts them into a single monotonic staging table for lossless upload
+ * batching.
  *
  * The staging table preserves the full canonical record JSON -- no field
  * dropping, no hardcoding of provenance fields.
@@ -146,16 +147,28 @@ function extractNormalizedAt(record: CanonicalRecord): string {
   return record.normalized_at;
 }
+// -- Content hashing ----------------------------------------------------------
+/**
+ * Compute SHA256 hex digest of a string (for upload dedup).
+ * Uses Bun's built-in CryptoHasher for zero-dependency hashing.
+ */
+export function computeContentSha256(input: string): string {
+  const hasher = new Bun.CryptoHasher("sha256");
+  hasher.update(input);
+  return hasher.digest("hex");
+}
 // -- Main staging function ----------------------------------------------------
 /**
- * Stage canonical records from the JSONL log and evolution evidence from SQLite
- * into the canonical_upload_staging table.
+ * Stage canonical records from SQLite by default (or a custom JSONL log path
+ * override) and evolution evidence from SQLite into canonical_upload_staging.
  *
  * Uses INSERT OR IGNORE for dedup by (record_kind, record_id).
  *
  * @param db - SQLite database handle
- * @param logPath - Path to canonical JSONL log (defaults to CANONICAL_LOG)
+ * @param logPath - Canonical JSONL override path (default sentinel keeps SQLite-backed staging)
  * @returns Number of newly staged records
  */
 export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_LOG): number {
@@ -163,9 +176,12 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
   const now = new Date().toISOString();
   const stmt = db.prepare(`
-    INSERT OR IGNORE INTO canonical_upload_staging
-      (record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at)
-    VALUES (?, ?, ?, ?, ?, ?, ?)
+    INSERT INTO canonical_upload_staging
+      (record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at, content_sha256)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+    ON CONFLICT(record_kind, record_id) DO UPDATE SET
+      content_sha256 = excluded.content_sha256
+    WHERE canonical_upload_staging.content_sha256 IS NULL AND excluded.content_sha256 IS NOT NULL
   `);
   // 1. Stage canonical records from SQLite (default) or JSONL (custom logPath override)
@@ -177,14 +193,16 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
       : readAndEnrichCanonicalRecords(logPath);
   for (const record of records) {
     const recordId = extractRecordId(record);
+    const recordJson = JSON.stringify(record);
     const result = stmt.run(
       record.record_kind,
       recordId,
-      JSON.stringify(record),
+      recordJson,
       extractSessionId(record),
       extractPromptId(record),
       extractNormalizedAt(record),
       now,
+      computeContentSha256(recordJson),
     );
     if (result.changes > 0) staged++;
   }
@@ -222,6 +240,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
         null, // no prompt_id
         entry.timestamp,
         now,
+        computeContentSha256(recordJson),
       );
       if (result.changes > 0) staged++;
     }
@@ -258,6 +277,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
         null, // no prompt_id
         run.timestamp,
         now,
+        computeContentSha256(recordJson),
       );
       if (result.changes > 0) staged++;
     }
@@ -298,6 +318,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
         null, // no prompt_id
         gr.graded_at,
         now,
+        computeContentSha256(recordJson),
       );
       if (result.changes > 0) staged++;
     }
@@ -332,6 +353,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
         null, // no prompt_id
         sig.timestamp,
         now,
+        computeContentSha256(recordJson),
       );
       if (result.changes > 0) staged++;
     }

package/cli/selftune/alpha-upload-contract.ts CHANGED Viewed

@@ -49,4 +49,13 @@ export interface FlushSummary {
   sent: number;
   failed: number;
   skipped: number;
+  /** Records skipped because a HEAD check confirmed they already exist unchanged. */
+  skipped_unchanged: number;
+}
+// -- HEAD check result --------------------------------------------------------
+export interface HeadCheckResult {
+  exists: boolean;
+  unchanged: boolean;
 }

package/cli/selftune/constants.ts CHANGED Viewed

@@ -41,6 +41,11 @@ export const ORCHESTRATE_RUN_LOG = join(LOG_DIR, "orchestrate_runs.jsonl");
 export const SIGNAL_LOG = join(LOG_DIR, "improvement_signals.jsonl");
 export const ORCHESTRATE_LOCK = join(LOG_DIR, ".orchestrate.lock");
+/** Allow tests to override the orchestrate lock without mutating the host lock file. */
+export function getOrchestrateLockPath(): string {
+  return process.env.SELFTUNE_ORCHESTRATE_LOCK_PATH || ORCHESTRATE_LOCK;
+}
 /** Evolution memory directory — human-readable session context that survives resets. */
 export const MEMORY_DIR = join(SELFTUNE_CONFIG_DIR, "memory");
 export const CONTEXT_PATH = join(MEMORY_DIR, "context.md");
@@ -155,6 +160,16 @@ export const OPENCLAW_INGEST_MARKER = join(SELFTUNE_CONFIG_DIR, "openclaw-ingest
 /** Default output directory for contribution bundles. */
 export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
+/** Creator-directed contribution preferences (per-skill opt-in state). */
+export const CONTRIBUTION_PREFERENCES_PATH = join(
+  SELFTUNE_CONFIG_DIR,
+  "contribution-preferences.json",
+);
+/** Creator overview watchlist preference. */
+export const WATCHED_SKILLS_PATH = join(SELFTUNE_CONFIG_DIR, "watched-skills.json");
+/** Creator-directed relay endpoint for staged contribution signals. */
+export const CONTRIBUTION_RELAY_ENDPOINT =
+  process.env.SELFTUNE_CONTRIBUTION_RELAY_ENDPOINT ?? "https://api.selftune.dev/api/v1/signals";
 // ---------------------------------------------------------------------------
 // Sanitization constants (for contribute command)
@@ -162,17 +177,62 @@ export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
 /** Regex patterns for detecting secrets that must be redacted. */
 export const SECRET_PATTERNS = [
-  /sk-[a-zA-Z0-9]{20,}/g, // OpenAI / Anthropic API keys
+  // -- API keys & tokens (platform-specific prefixes) --
+  /sk-[a-zA-Z0-9]{20,}/g, // OpenAI API keys
+  /sk-ant-[a-zA-Z0-9_-]{20,}/g, // Anthropic API keys
   /ghp_[a-zA-Z0-9]{36,}/g, // GitHub personal access tokens
   /gho_[a-zA-Z0-9]{36,}/g, // GitHub OAuth tokens
   /github_pat_[a-zA-Z0-9_]{22,}/g, // GitHub fine-grained PATs
-  /AKIA[A-Z0-9]{16}/g, // AWS access key IDs
+  /npm_[a-zA-Z0-9]{36}/g, // npm tokens
+  /pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
+  // -- AWS --
+  /AKIA[A-Z0-9]{16}/g, // AWS access key IDs (permanent)
+  /ASIA[A-Z0-9]{16}/g, // AWS temporary credentials (STS)
+  // -- GCP --
+  /AIza[0-9A-Za-z_-]{35}/g, // Google API key
+  // -- Stripe --
+  /(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{24,}/g, // Stripe secret/publishable/restricted keys
+  // -- Twilio --
+  /SK[a-f0-9]{32}/g, // Twilio API key
+  // -- SendGrid --
+  /SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}/g, // SendGrid API key
+  // -- Mailgun --
+  /key-[a-zA-Z0-9]{32}/g, // Mailgun API key
+  // -- Slack --
   /xoxb-[a-zA-Z0-9-]+/g, // Slack bot tokens
   /xoxp-[a-zA-Z0-9-]+/g, // Slack user tokens
   /xoxs-[a-zA-Z0-9-]+/g, // Slack session tokens
-  /eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JWTs
-  /npm_[a-zA-Z0-9]{36}/g, // npm tokens
-  /pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
+  // -- JWTs --
+  /eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JSON Web Tokens
+  // -- Private keys (PEM block headers) --
+  /-----BEGIN (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----[\s\S]*?-----END (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----/g, // PEM private key blocks (full multiline)
+  // -- Database connection URIs --
+  /(mongodb(\+srv)?|postgres(ql)?|mysql|mariadb|redis|rediss|amqp|amqps):\/\/[^\s"')]+/g, // DB URIs with credentials
+  // -- Azure --
+  /DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+/g, // Azure storage connection string
+  // -- Webhook URLs --
+  /https:\/\/discord(app)?\.com\/api\/webhooks\/[0-9]+\/[a-zA-Z0-9_-]+/g, // Discord webhook
+  /https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[a-zA-Z0-9]+/g, // Slack webhook
+  // -- SSH keys --
+  /ssh-(rsa|ed25519|ecdsa|dsa)\s+[A-Za-z0-9+/]{40,}[=]{0,3}/g, // SSH public key material
+  // -- Generic high-confidence patterns --
+  /Bearer\s+[a-zA-Z0-9_-]{20,}/g, // Bearer tokens in auth headers
+  /https?:\/\/[^:]+:[^@]+@[^\s"']+/g, // Basic auth embedded in URLs
+  /(?<![a-fA-F0-9])[a-fA-F0-9]{64,}(?![a-fA-F0-9])/g, // Long hex strings (64+ chars, likely secrets)
 ] as const;
 /** Regex for file paths (Unix and Windows). */
@@ -184,6 +244,33 @@ export const EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\
 /** Regex for IP addresses (v4). */
 export const IP_PATTERN = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g;
+// ---------------------------------------------------------------------------
+// PII patterns — high-confidence, low-false-positive personally identifiable info
+// ---------------------------------------------------------------------------
+export const PII_PATTERNS = [
+  // -- Phone numbers --
+  /\+\d{1,3}\s?\d{1,4}\s?\d{1,4}\s?\d{1,9}/g, // E.164 intl: +1 555 123 4567, +44 20 7946 0958
+  /\b\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b/g, // US/CA phone: (555) 123-4567, 555-123-4567, 555.123.4567
+  // -- Credit card numbers (major networks, with optional separators) --
+  /\b4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Visa (starts with 4)
+  /\b5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Mastercard (51-55)
+  /\b3[47]\d{2}[\s-]?\d{6}[\s-]?\d{5}\b/g, // Amex (34/37)
+  /\b6(?:011|5\d{2})[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Discover (6011/65)
+  // -- SSN / national IDs --
+  /\b\d{3}-\d{2}-\d{4}\b/g, // US SSN: 123-45-6789
+  // -- IPv6 --
+  /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g, // Full IPv6
+  /\b(?:[0-9a-fA-F]{1,4}:){1,7}:(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?(?!\w)/g, // Abbreviated IPv6 (with ::)
+  /::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\b/g, // Abbreviated IPv6 (leading ::1, ::ffff:...)
+  // -- Date of birth patterns (in structured contexts) --
+  /\b(?:dob|date\.of\.birth|birthday|born)\s*[:=]\s*\d{1,4}[-/]\d{1,2}[-/]\d{1,4}\b/gi, // DOB in key-value context
+] as const;
 /** Regex for camelCase/PascalCase identifiers longer than 8 chars (aggressive mode). */
 export const IDENTIFIER_PATTERN = /\b[a-z][a-zA-Z0-9]{8,}\b|\b[A-Z][a-zA-Z0-9]{8,}\b/g;

package/cli/selftune/contribute/contribute.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env bun
 /**
- * selftune contribute — opt-in export of anonymized skill observability data.
+ * selftune contribute — community export of anonymized skill observability data.
  *
  * Usage:
  *   bun run cli/selftune/contribute/contribute.ts --skill selftune [--preview] [--output file.json]
@@ -31,10 +31,38 @@ export async function cliMain(): Promise<void> {
       submit: { type: "boolean", default: false },
       endpoint: { type: "string", default: "https://selftune-api.fly.dev" },
       github: { type: "boolean", default: false },
+      help: { type: "boolean", short: "h", default: false },
     },
     strict: true,
   });
+  if (values.help) {
+    console.log(`selftune contribute — Export an anonymized community bundle
+Usage:
+  selftune contribute --skill <name> [--preview] [--sanitize conservative|aggressive]
+  selftune contribute --skill <name> [--output <file>] [--submit]
+Purpose:
+  Build a sanitized community contribution bundle from local SQLite data.
+  This is separate from:
+    selftune contributions  Creator-directed sharing preferences
+    selftune alpha upload   Personal cloud upload cycle
+Options:
+  --skill <name>                    Skill to export
+  --preview                         Print the sanitized bundle instead of writing it
+  --sanitize conservative|aggressive
+                                    Choose the sanitization level
+  --output <file>                   Write the bundle to an explicit file path
+  --since <timestamp>               Only include records on or after this time
+  --submit                          Submit the bundle after writing it
+  --endpoint <url>                  Override the default service endpoint
+  --github                          Submit via GitHub flow instead of the service
+  -h, --help                        Show this help`);
+    return;
+  }
   const skillName = values.skill ?? "selftune";
   const sanitizationLevel = values.sanitize === "aggressive" ? "aggressive" : "conservative";
@@ -81,7 +109,7 @@ export async function cliMain(): Promise<void> {
   writeFileSync(outputPath, json, "utf-8");
   // 6. Summary
-  console.log(`Contribution bundle written to: ${outputPath}`);
+  console.log(`Community contribution bundle written to: ${outputPath}`);
   console.log(`  Queries:       ${bundle.positive_queries.length}`);
   console.log(`  Eval entries:  ${bundle.eval_entries.length}`);
   console.log(`  Sessions:      ${bundle.session_metrics.total_sessions}`);

package/cli/selftune/contribute/sanitize.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import {
   IDENTIFIER_PATTERN,
   IP_PATTERN,
   MODULE_PATTERN,
+  PII_PATTERNS,
   SECRET_PATTERNS,
 } from "../constants.js";
 import type { ContributionBundle } from "../types.js";
@@ -26,6 +27,49 @@ const UUID_PATTERN = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]
 const DOUBLE_QUOTED_PATTERN = /"[^"]*"/g;
 const SINGLE_QUOTED_PATTERN = /'[^']*'/g;
+/** Apply a set of regex patterns to text, replacing matches with a token. Clones each regex to reset lastIndex. */
+function applyPatterns(text: string, patterns: readonly RegExp[], token: string): string {
+  let result = text;
+  for (const pattern of patterns) {
+    result = result.replace(new RegExp(pattern.source, pattern.flags), token);
+  }
+  return result;
+}
+// ---------------------------------------------------------------------------
+// Secret-only sanitization (used by redactSecretsDeep for defense-in-depth)
+// ---------------------------------------------------------------------------
+/**
+ * Apply only SECRET_PATTERNS redaction to a string.
+ * Lighter than sanitizeConservative — no path/email/IP/UUID replacement.
+ */
+export function sanitizeSecrets(text: string): string {
+  if (!text) return text;
+  return applyPatterns(text, SECRET_PATTERNS, "[SECRET]");
+}
+/**
+ * Recursively traverse a value and redact secrets in all string leaves.
+ * Non-string primitives, Dates, and other non-plain objects pass through unchanged.
+ * Does NOT mutate the input — returns a new structure.
+ */
+export function redactSecretsDeep<T>(value: T): T {
+  if (typeof value === "string") return sanitizeSecrets(value) as T;
+  if (Array.isArray(value)) return value.map((item) => redactSecretsDeep(item)) as T;
+  if (value && typeof value === "object" && !(value instanceof Date)) {
+    // Only recurse into plain objects — pass through Map, Set, RegExp, class instances, etc.
+    const proto = Object.getPrototypeOf(value);
+    if (proto !== null && proto !== Object.prototype) return value;
+    const result: Record<string, unknown> = {};
+    for (const [k, v] of Object.entries(value)) {
+      result[k] = redactSecretsDeep(v);
+    }
+    return result as T;
+  }
+  return value;
+}
 // ---------------------------------------------------------------------------
 // Conservative sanitization
 // ---------------------------------------------------------------------------
@@ -36,10 +80,10 @@ export function sanitizeConservative(text: string, projectName?: string): string
   let result = text;
   // Secrets first (longest/most specific patterns)
-  for (const pattern of SECRET_PATTERNS) {
-    // Clone regex to reset lastIndex
-    result = result.replace(new RegExp(pattern.source, pattern.flags), "[SECRET]");
-  }
+  result = applyPatterns(result, SECRET_PATTERNS, "[SECRET]");
+  // PII (phone numbers, credit cards, SSNs, IPv6, DOBs)
+  result = applyPatterns(result, PII_PATTERNS, "[PII]");
   // File paths
   result = result.replace(new RegExp(FILE_PATH_PATTERN.source, FILE_PATH_PATTERN.flags), "[PATH]");
@@ -123,7 +167,7 @@ export function sanitizeBundle(
   level: "conservative" | "aggressive",
   projectName?: string,
 ): ContributionBundle {
-  return {
+  const fieldSanitized: ContributionBundle = {
     ...bundle,
     sanitization_level: level,
     positive_queries: bundle.positive_queries.map((q) => ({
@@ -151,6 +195,9 @@ export function sanitizeBundle(
         }
       : {}),
   };
+  // Defense-in-depth: recursively redact any secrets that slipped through field-level sanitization
+  return redactSecretsDeep(fieldSanitized);
 }
 // ---------------------------------------------------------------------------