selftune 0.2.16 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +32 -22
  2. package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
  3. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
  6. package/apps/local-dashboard/dist/index.html +5 -5
  7. package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
  8. package/cli/selftune/alpha-upload/client.ts +51 -1
  9. package/cli/selftune/alpha-upload/flush.ts +46 -5
  10. package/cli/selftune/alpha-upload/stage-canonical.ts +32 -10
  11. package/cli/selftune/alpha-upload-contract.ts +9 -0
  12. package/cli/selftune/constants.ts +92 -5
  13. package/cli/selftune/contribute/contribute.ts +30 -2
  14. package/cli/selftune/contribute/sanitize.ts +52 -5
  15. package/cli/selftune/contribution-config.ts +249 -0
  16. package/cli/selftune/contribution-relay.ts +177 -0
  17. package/cli/selftune/contribution-signals.ts +219 -0
  18. package/cli/selftune/contribution-staging.ts +147 -0
  19. package/cli/selftune/contributions.ts +532 -0
  20. package/cli/selftune/creator-contributions.ts +333 -0
  21. package/cli/selftune/dashboard-contract.ts +305 -1
  22. package/cli/selftune/dashboard-server.ts +47 -13
  23. package/cli/selftune/eval/family-overlap.ts +395 -0
  24. package/cli/selftune/eval/hooks-to-evals.ts +182 -28
  25. package/cli/selftune/eval/synthetic-evals.ts +298 -11
  26. package/cli/selftune/evolution/description-quality.ts +12 -11
  27. package/cli/selftune/evolution/evolve.ts +214 -51
  28. package/cli/selftune/evolution/validate-proposal.ts +9 -6
  29. package/cli/selftune/export.ts +2 -2
  30. package/cli/selftune/grading/grade-session.ts +20 -0
  31. package/cli/selftune/hooks/commit-track.ts +188 -0
  32. package/cli/selftune/hooks/prompt-log.ts +10 -1
  33. package/cli/selftune/hooks/session-stop.ts +2 -2
  34. package/cli/selftune/hooks/skill-eval.ts +15 -1
  35. package/cli/selftune/hooks/stdin-preview.ts +32 -0
  36. package/cli/selftune/index.ts +41 -5
  37. package/cli/selftune/ingestors/codex-rollout.ts +31 -35
  38. package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
  39. package/cli/selftune/localdb/db.ts +2 -2
  40. package/cli/selftune/localdb/direct-write.ts +69 -6
  41. package/cli/selftune/localdb/queries.ts +1253 -37
  42. package/cli/selftune/localdb/schema.ts +66 -0
  43. package/cli/selftune/orchestrate.ts +32 -4
  44. package/cli/selftune/recover.ts +153 -0
  45. package/cli/selftune/repair/skill-usage.ts +363 -4
  46. package/cli/selftune/routes/actions.ts +35 -1
  47. package/cli/selftune/routes/analytics.ts +14 -0
  48. package/cli/selftune/routes/index.ts +1 -0
  49. package/cli/selftune/routes/overview.ts +150 -4
  50. package/cli/selftune/routes/skill-report.ts +648 -18
  51. package/cli/selftune/status.ts +81 -2
  52. package/cli/selftune/sync.ts +56 -2
  53. package/cli/selftune/trust-model.ts +66 -0
  54. package/cli/selftune/types.ts +80 -0
  55. package/cli/selftune/utils/skill-detection.ts +43 -0
  56. package/cli/selftune/utils/transcript.ts +210 -1
  57. package/cli/selftune/watchlist.ts +65 -0
  58. package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
  59. package/package.json +1 -1
  60. package/packages/telemetry-contract/src/types.ts +11 -0
  61. package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
  62. package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
  63. package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
  64. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
  65. package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
  66. package/packages/ui/src/components/section-cards.tsx +12 -9
  67. package/packages/ui/src/primitives/card.tsx +1 -1
  68. package/skill/SKILL.md +40 -2
  69. package/skill/Workflows/AlphaUpload.md +4 -0
  70. package/skill/Workflows/Composability.md +64 -0
  71. package/skill/Workflows/Contribute.md +6 -3
  72. package/skill/Workflows/Contributions.md +97 -0
  73. package/skill/Workflows/CreatorContributions.md +74 -0
  74. package/skill/Workflows/Dashboard.md +31 -0
  75. package/skill/Workflows/Evals.md +57 -8
  76. package/skill/Workflows/Evolve.md +31 -13
  77. package/skill/Workflows/ExportCanonical.md +121 -0
  78. package/skill/Workflows/Hook.md +131 -0
  79. package/skill/Workflows/Ingest.md +7 -0
  80. package/skill/Workflows/Initialize.md +29 -9
  81. package/skill/Workflows/Orchestrate.md +27 -5
  82. package/skill/Workflows/Quickstart.md +94 -0
  83. package/skill/Workflows/Recover.md +84 -0
  84. package/skill/Workflows/RepairSkillUsage.md +95 -0
  85. package/skill/Workflows/Sync.md +18 -12
  86. package/skill/Workflows/Uninstall.md +82 -0
  87. package/skill/settings_snippet.json +11 -0
  88. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
  89. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
  90. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
  91. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
@@ -6,7 +6,7 @@
6
6
  * PushUploadResult indicating success or failure.
7
7
  */
8
8
 
9
- import type { PushUploadResult } from "../alpha-upload-contract.js";
9
+ import type { HeadCheckResult, PushUploadResult } from "../alpha-upload-contract.js";
10
10
  import { getSelftuneVersion } from "../utils/selftune-meta.js";
11
11
 
12
12
  function isPushUploadResult(value: unknown): value is PushUploadResult {
@@ -111,3 +111,53 @@ export async function uploadPushPayload(
111
111
  };
112
112
  }
113
113
  }
114
+
115
+ /**
116
+ * Lightweight HEAD check to see if a record already exists on the server.
117
+ *
118
+ * Sends HEAD {endpoint}/{recordId}. Optionally includes If-None-Match
119
+ * for content-hash comparison.
120
+ *
121
+ * Never throws -- returns { exists: false, unchanged: false } on any error
122
+ * (fail-open, matching the uploadPushPayload pattern).
123
+ */
124
+ export async function headRecord(
125
+ endpoint: string,
126
+ recordId: string,
127
+ sha256?: string,
128
+ apiKey?: string,
129
+ ): Promise<HeadCheckResult> {
130
+ const failOpen: HeadCheckResult = { exists: false, unchanged: false };
131
+ try {
132
+ const headers: Record<string, string> = {
133
+ "User-Agent": `selftune/${getSelftuneVersion()}`,
134
+ };
135
+
136
+ if (sha256) {
137
+ headers["If-None-Match"] = `"${sha256}"`;
138
+ }
139
+
140
+ if (apiKey) {
141
+ headers.Authorization = `Bearer ${apiKey}`;
142
+ }
143
+
144
+ const url = `${endpoint}/${encodeURIComponent(recordId)}`;
145
+ const response = await fetch(url, {
146
+ method: "HEAD",
147
+ headers,
148
+ signal: AbortSignal.timeout(10_000),
149
+ });
150
+
151
+ if (response.status === 200) {
152
+ return { exists: true, unchanged: false };
153
+ }
154
+ if (response.status === 304) {
155
+ return { exists: true, unchanged: true };
156
+ }
157
+ // 404 or any other status -- treat as not found
158
+ return failOpen;
159
+ } catch {
160
+ // Network error, timeout, etc. -- fail open
161
+ return failOpen;
162
+ }
163
+ }
@@ -12,7 +12,7 @@
12
12
  */
13
13
 
14
14
  import type { FlushSummary, QueueOperations } from "../alpha-upload-contract.js";
15
- import { uploadPushPayload } from "./client.js";
15
+ import { headRecord, uploadPushPayload } from "./client.js";
16
16
 
17
17
  // ---------------------------------------------------------------------------
18
18
  // Options
@@ -28,6 +28,8 @@ export interface FlushOptions {
28
28
  dryRun?: boolean;
29
29
  /** API key for Bearer auth on the cloud endpoint. */
30
30
  apiKey?: string;
31
+ /** When set, run HEAD checks against this endpoint before pushing. */
32
+ headCheckEndpoint?: string;
31
33
  }
32
34
 
33
35
  // ---------------------------------------------------------------------------
@@ -85,8 +87,9 @@ export async function flushQueue(
85
87
  const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
86
88
  const dryRun = options?.dryRun ?? false;
87
89
  const apiKey = options?.apiKey;
90
+ const headCheckEndpoint = options?.headCheckEndpoint;
88
91
 
89
- const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0 };
92
+ const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0, skipped_unchanged: 0 };
90
93
 
91
94
  const items = queue.getPending(batchSize);
92
95
 
@@ -94,7 +97,44 @@ export async function flushQueue(
94
97
  return summary;
95
98
  }
96
99
 
100
+ // -- HEAD check phase: identify records that already exist unchanged ------
101
+ const unchangedIds = new Set<number>();
102
+ if (headCheckEndpoint) {
103
+ const headChecks = items.map(async (item) => {
104
+ try {
105
+ const parsed = JSON.parse(item.payload_json) as { push_id?: string };
106
+ const pushId = parsed.push_id;
107
+ if (!pushId) return { id: item.id, skip: false };
108
+ const result = await headRecord(headCheckEndpoint, pushId, undefined, apiKey);
109
+ return { id: item.id, skip: result.exists && result.unchanged };
110
+ } catch {
111
+ // Fail-open: if HEAD check itself errors, don't skip
112
+ return { id: item.id, skip: false };
113
+ }
114
+ });
115
+
116
+ const results = await Promise.allSettled(headChecks);
117
+ for (const result of results) {
118
+ if (result.status === "fulfilled" && result.value.skip) {
119
+ unchangedIds.add(result.value.id);
120
+ }
121
+ }
122
+
123
+ // Mark unchanged items as sent in the queue without actually pushing
124
+ for (const item of items) {
125
+ if (unchangedIds.has(item.id)) {
126
+ if (!queue.markSending(item.id)) continue;
127
+ if (queue.markSent(item.id)) {
128
+ summary.skipped_unchanged++;
129
+ } else {
130
+ summary.failed++;
131
+ }
132
+ }
133
+ }
134
+ }
135
+
97
136
  for (const item of items) {
137
+ if (unchangedIds.has(item.id)) continue;
98
138
  const markFailedSafely = (message: string): void => {
99
139
  if (!queue.markFailed(item.id, message)) {
100
140
  console.error(`[alpha upload] Failed to persist queue failure state for item ${item.id}`);
@@ -149,10 +189,11 @@ export async function flushQueue(
149
189
  break;
150
190
  }
151
191
 
152
- // 409 Conflict = duplicate push_id, treat as success
153
- if (status === 409) {
192
+ // 304 Not Modified = content unchanged (dedup), 409 Conflict = duplicate push_id
193
+ // Both are treated as success — the server already has this data.
194
+ if (status === 304 || status === 409) {
154
195
  if (!queue.markSent(item.id)) {
155
- markFailedSafely("local queue state update failed after duplicate upload");
196
+ markFailedSafely("local queue state update failed after duplicate/unchanged upload");
156
197
  summary.failed++;
157
198
  } else {
158
199
  summary.sent++;
@@ -1,9 +1,10 @@
1
1
  /**
2
2
  * Canonical upload staging writer.
3
3
  *
4
- * Reads canonical records from the JSONL source-of-truth log and evolution
5
- * evidence from SQLite, then inserts them into a single monotonic staging
6
- * table for lossless upload batching.
4
+ * Reads canonical records from SQLite by default (or from a JSONL override for
5
+ * explicit recovery/debugging) plus evolution evidence from SQLite, then
6
+ * inserts them into a single monotonic staging table for lossless upload
7
+ * batching.
7
8
  *
8
9
  * The staging table preserves the full canonical record JSON -- no field
9
10
  * dropping, no hardcoding of provenance fields.
@@ -146,16 +147,28 @@ function extractNormalizedAt(record: CanonicalRecord): string {
146
147
  return record.normalized_at;
147
148
  }
148
149
 
150
+ // -- Content hashing ----------------------------------------------------------
151
+
152
+ /**
153
+ * Compute SHA256 hex digest of a string (for upload dedup).
154
+ * Uses Bun's built-in CryptoHasher for zero-dependency hashing.
155
+ */
156
+ export function computeContentSha256(input: string): string {
157
+ const hasher = new Bun.CryptoHasher("sha256");
158
+ hasher.update(input);
159
+ return hasher.digest("hex");
160
+ }
161
+
149
162
  // -- Main staging function ----------------------------------------------------
150
163
 
151
164
  /**
152
- * Stage canonical records from the JSONL log and evolution evidence from SQLite
153
- * into the canonical_upload_staging table.
165
+ * Stage canonical records from SQLite by default (or a custom JSONL log path
166
+ * override) and evolution evidence from SQLite into canonical_upload_staging.
154
167
  *
155
168
  * Uses INSERT OR IGNORE for dedup by (record_kind, record_id).
156
169
  *
157
170
  * @param db - SQLite database handle
158
- * @param logPath - Path to canonical JSONL log (defaults to CANONICAL_LOG)
171
+ * @param logPath - Canonical JSONL override path (default sentinel keeps SQLite-backed staging)
159
172
  * @returns Number of newly staged records
160
173
  */
161
174
  export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_LOG): number {
@@ -163,9 +176,12 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
163
176
  const now = new Date().toISOString();
164
177
 
165
178
  const stmt = db.prepare(`
166
- INSERT OR IGNORE INTO canonical_upload_staging
167
- (record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at)
168
- VALUES (?, ?, ?, ?, ?, ?, ?)
179
+ INSERT INTO canonical_upload_staging
180
+ (record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at, content_sha256)
181
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
182
+ ON CONFLICT(record_kind, record_id) DO UPDATE SET
183
+ content_sha256 = excluded.content_sha256
184
+ WHERE canonical_upload_staging.content_sha256 IS NULL AND excluded.content_sha256 IS NOT NULL
169
185
  `);
170
186
 
171
187
  // 1. Stage canonical records from SQLite (default) or JSONL (custom logPath override)
@@ -177,14 +193,16 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
177
193
  : readAndEnrichCanonicalRecords(logPath);
178
194
  for (const record of records) {
179
195
  const recordId = extractRecordId(record);
196
+ const recordJson = JSON.stringify(record);
180
197
  const result = stmt.run(
181
198
  record.record_kind,
182
199
  recordId,
183
- JSON.stringify(record),
200
+ recordJson,
184
201
  extractSessionId(record),
185
202
  extractPromptId(record),
186
203
  extractNormalizedAt(record),
187
204
  now,
205
+ computeContentSha256(recordJson),
188
206
  );
189
207
  if (result.changes > 0) staged++;
190
208
  }
@@ -222,6 +240,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
222
240
  null, // no prompt_id
223
241
  entry.timestamp,
224
242
  now,
243
+ computeContentSha256(recordJson),
225
244
  );
226
245
  if (result.changes > 0) staged++;
227
246
  }
@@ -258,6 +277,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
258
277
  null, // no prompt_id
259
278
  run.timestamp,
260
279
  now,
280
+ computeContentSha256(recordJson),
261
281
  );
262
282
  if (result.changes > 0) staged++;
263
283
  }
@@ -298,6 +318,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
298
318
  null, // no prompt_id
299
319
  gr.graded_at,
300
320
  now,
321
+ computeContentSha256(recordJson),
301
322
  );
302
323
  if (result.changes > 0) staged++;
303
324
  }
@@ -332,6 +353,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
332
353
  null, // no prompt_id
333
354
  sig.timestamp,
334
355
  now,
356
+ computeContentSha256(recordJson),
335
357
  );
336
358
  if (result.changes > 0) staged++;
337
359
  }
@@ -49,4 +49,13 @@ export interface FlushSummary {
49
49
  sent: number;
50
50
  failed: number;
51
51
  skipped: number;
52
+ /** Records skipped because a HEAD check confirmed they already exist unchanged. */
53
+ skipped_unchanged: number;
54
+ }
55
+
56
+ // -- HEAD check result --------------------------------------------------------
57
+
58
+ export interface HeadCheckResult {
59
+ exists: boolean;
60
+ unchanged: boolean;
52
61
  }
@@ -41,6 +41,11 @@ export const ORCHESTRATE_RUN_LOG = join(LOG_DIR, "orchestrate_runs.jsonl");
41
41
  export const SIGNAL_LOG = join(LOG_DIR, "improvement_signals.jsonl");
42
42
  export const ORCHESTRATE_LOCK = join(LOG_DIR, ".orchestrate.lock");
43
43
 
44
+ /** Allow tests to override the orchestrate lock without mutating the host lock file. */
45
+ export function getOrchestrateLockPath(): string {
46
+ return process.env.SELFTUNE_ORCHESTRATE_LOCK_PATH || ORCHESTRATE_LOCK;
47
+ }
48
+
44
49
  /** Evolution memory directory — human-readable session context that survives resets. */
45
50
  export const MEMORY_DIR = join(SELFTUNE_CONFIG_DIR, "memory");
46
51
  export const CONTEXT_PATH = join(MEMORY_DIR, "context.md");
@@ -155,6 +160,16 @@ export const OPENCLAW_INGEST_MARKER = join(SELFTUNE_CONFIG_DIR, "openclaw-ingest
155
160
 
156
161
  /** Default output directory for contribution bundles. */
157
162
  export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
163
+ /** Creator-directed contribution preferences (per-skill opt-in state). */
164
+ export const CONTRIBUTION_PREFERENCES_PATH = join(
165
+ SELFTUNE_CONFIG_DIR,
166
+ "contribution-preferences.json",
167
+ );
168
+ /** Creator overview watchlist preference. */
169
+ export const WATCHED_SKILLS_PATH = join(SELFTUNE_CONFIG_DIR, "watched-skills.json");
170
+ /** Creator-directed relay endpoint for staged contribution signals. */
171
+ export const CONTRIBUTION_RELAY_ENDPOINT =
172
+ process.env.SELFTUNE_CONTRIBUTION_RELAY_ENDPOINT ?? "https://api.selftune.dev/api/v1/signals";
158
173
 
159
174
  // ---------------------------------------------------------------------------
160
175
  // Sanitization constants (for contribute command)
@@ -162,17 +177,62 @@ export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
162
177
 
163
178
  /** Regex patterns for detecting secrets that must be redacted. */
164
179
  export const SECRET_PATTERNS = [
165
- /sk-[a-zA-Z0-9]{20,}/g, // OpenAI / Anthropic API keys
180
+ // -- API keys & tokens (platform-specific prefixes) --
181
+ /sk-[a-zA-Z0-9]{20,}/g, // OpenAI API keys
182
+ /sk-ant-[a-zA-Z0-9_-]{20,}/g, // Anthropic API keys
166
183
  /ghp_[a-zA-Z0-9]{36,}/g, // GitHub personal access tokens
167
184
  /gho_[a-zA-Z0-9]{36,}/g, // GitHub OAuth tokens
168
185
  /github_pat_[a-zA-Z0-9_]{22,}/g, // GitHub fine-grained PATs
169
- /AKIA[A-Z0-9]{16}/g, // AWS access key IDs
186
+ /npm_[a-zA-Z0-9]{36}/g, // npm tokens
187
+ /pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
188
+
189
+ // -- AWS --
190
+ /AKIA[A-Z0-9]{16}/g, // AWS access key IDs (permanent)
191
+ /ASIA[A-Z0-9]{16}/g, // AWS temporary credentials (STS)
192
+
193
+ // -- GCP --
194
+ /AIza[0-9A-Za-z_-]{35}/g, // Google API key
195
+
196
+ // -- Stripe --
197
+ /(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{24,}/g, // Stripe secret/publishable/restricted keys
198
+
199
+ // -- Twilio --
200
+ /SK[a-f0-9]{32}/g, // Twilio API key
201
+
202
+ // -- SendGrid --
203
+ /SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}/g, // SendGrid API key
204
+
205
+ // -- Mailgun --
206
+ /key-[a-zA-Z0-9]{32}/g, // Mailgun API key
207
+
208
+ // -- Slack --
170
209
  /xoxb-[a-zA-Z0-9-]+/g, // Slack bot tokens
171
210
  /xoxp-[a-zA-Z0-9-]+/g, // Slack user tokens
172
211
  /xoxs-[a-zA-Z0-9-]+/g, // Slack session tokens
173
- /eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JWTs
174
- /npm_[a-zA-Z0-9]{36}/g, // npm tokens
175
- /pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
212
+
213
+ // -- JWTs --
214
+ /eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JSON Web Tokens
215
+
216
+ // -- Private keys (PEM block headers) --
217
+ /-----BEGIN (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----[\s\S]*?-----END (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----/g, // PEM private key blocks (full multiline)
218
+
219
+ // -- Database connection URIs --
220
+ /(mongodb(\+srv)?|postgres(ql)?|mysql|mariadb|redis|rediss|amqp|amqps):\/\/[^\s"')]+/g, // DB URIs with credentials
221
+
222
+ // -- Azure --
223
+ /DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+/g, // Azure storage connection string
224
+
225
+ // -- Webhook URLs --
226
+ /https:\/\/discord(app)?\.com\/api\/webhooks\/[0-9]+\/[a-zA-Z0-9_-]+/g, // Discord webhook
227
+ /https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[a-zA-Z0-9]+/g, // Slack webhook
228
+
229
+ // -- SSH keys --
230
+ /ssh-(rsa|ed25519|ecdsa|dsa)\s+[A-Za-z0-9+/]{40,}[=]{0,3}/g, // SSH public key material
231
+
232
+ // -- Generic high-confidence patterns --
233
+ /Bearer\s+[a-zA-Z0-9_-]{20,}/g, // Bearer tokens in auth headers
234
+ /https?:\/\/[^:]+:[^@]+@[^\s"']+/g, // Basic auth embedded in URLs
235
+ /(?<![a-fA-F0-9])[a-fA-F0-9]{64,}(?![a-fA-F0-9])/g, // Long hex strings (64+ chars, likely secrets)
176
236
  ] as const;
177
237
 
178
238
  /** Regex for file paths (Unix and Windows). */
@@ -184,6 +244,33 @@ export const EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\
184
244
  /** Regex for IP addresses (v4). */
185
245
  export const IP_PATTERN = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g;
186
246
 
247
+ // ---------------------------------------------------------------------------
248
+ // PII patterns — high-confidence, low-false-positive personally identifiable info
249
+ // ---------------------------------------------------------------------------
250
+
251
+ export const PII_PATTERNS = [
252
+ // -- Phone numbers --
253
+ /\+\d{1,3}\s?\d{1,4}\s?\d{1,4}\s?\d{1,9}/g, // E.164 intl: +1 555 123 4567, +44 20 7946 0958
254
+ /\b\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b/g, // US/CA phone: (555) 123-4567, 555-123-4567, 555.123.4567
255
+
256
+ // -- Credit card numbers (major networks, with optional separators) --
257
+ /\b4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Visa (starts with 4)
258
+ /\b5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Mastercard (51-55)
259
+ /\b3[47]\d{2}[\s-]?\d{6}[\s-]?\d{5}\b/g, // Amex (34/37)
260
+ /\b6(?:011|5\d{2})[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Discover (6011/65)
261
+
262
+ // -- SSN / national IDs --
263
+ /\b\d{3}-\d{2}-\d{4}\b/g, // US SSN: 123-45-6789
264
+
265
+ // -- IPv6 --
266
+ /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g, // Full IPv6
267
+ /\b(?:[0-9a-fA-F]{1,4}:){1,7}:(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?(?!\w)/g, // Abbreviated IPv6 (with ::)
268
+ /::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\b/g, // Abbreviated IPv6 (leading ::1, ::ffff:...)
269
+
270
+ // -- Date of birth patterns (in structured contexts) --
271
+ /\b(?:dob|date\.of\.birth|birthday|born)\s*[:=]\s*\d{1,4}[-/]\d{1,2}[-/]\d{1,4}\b/gi, // DOB in key-value context
272
+ ] as const;
273
+
187
274
  /** Regex for camelCase/PascalCase identifiers longer than 8 chars (aggressive mode). */
188
275
  export const IDENTIFIER_PATTERN = /\b[a-z][a-zA-Z0-9]{8,}\b|\b[A-Z][a-zA-Z0-9]{8,}\b/g;
189
276
 
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env bun
2
2
  /**
3
- * selftune contribute — opt-in export of anonymized skill observability data.
3
+ * selftune contribute — community export of anonymized skill observability data.
4
4
  *
5
5
  * Usage:
6
6
  * bun run cli/selftune/contribute/contribute.ts --skill selftune [--preview] [--output file.json]
@@ -31,10 +31,38 @@ export async function cliMain(): Promise<void> {
31
31
  submit: { type: "boolean", default: false },
32
32
  endpoint: { type: "string", default: "https://selftune-api.fly.dev" },
33
33
  github: { type: "boolean", default: false },
34
+ help: { type: "boolean", short: "h", default: false },
34
35
  },
35
36
  strict: true,
36
37
  });
37
38
 
39
+ if (values.help) {
40
+ console.log(`selftune contribute — Export an anonymized community bundle
41
+
42
+ Usage:
43
+ selftune contribute --skill <name> [--preview] [--sanitize conservative|aggressive]
44
+ selftune contribute --skill <name> [--output <file>] [--submit]
45
+
46
+ Purpose:
47
+ Build a sanitized community contribution bundle from local SQLite data.
48
+ This is separate from:
49
+ selftune contributions Creator-directed sharing preferences
50
+ selftune alpha upload Personal cloud upload cycle
51
+
52
+ Options:
53
+ --skill <name> Skill to export
54
+ --preview Print the sanitized bundle instead of writing it
55
+ --sanitize conservative|aggressive
56
+ Choose the sanitization level
57
+ --output <file> Write the bundle to an explicit file path
58
+ --since <timestamp> Only include records on or after this time
59
+ --submit Submit the bundle after writing it
60
+ --endpoint <url> Override the default service endpoint
61
+ --github Submit via GitHub flow instead of the service
62
+ -h, --help Show this help`);
63
+ return;
64
+ }
65
+
38
66
  const skillName = values.skill ?? "selftune";
39
67
  const sanitizationLevel = values.sanitize === "aggressive" ? "aggressive" : "conservative";
40
68
 
@@ -81,7 +109,7 @@ export async function cliMain(): Promise<void> {
81
109
  writeFileSync(outputPath, json, "utf-8");
82
110
 
83
111
  // 6. Summary
84
- console.log(`Contribution bundle written to: ${outputPath}`);
112
+ console.log(`Community contribution bundle written to: ${outputPath}`);
85
113
  console.log(` Queries: ${bundle.positive_queries.length}`);
86
114
  console.log(` Eval entries: ${bundle.eval_entries.length}`);
87
115
  console.log(` Sessions: ${bundle.session_metrics.total_sessions}`);
@@ -15,6 +15,7 @@ import {
15
15
  IDENTIFIER_PATTERN,
16
16
  IP_PATTERN,
17
17
  MODULE_PATTERN,
18
+ PII_PATTERNS,
18
19
  SECRET_PATTERNS,
19
20
  } from "../constants.js";
20
21
  import type { ContributionBundle } from "../types.js";
@@ -26,6 +27,49 @@ const UUID_PATTERN = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]
26
27
  const DOUBLE_QUOTED_PATTERN = /"[^"]*"/g;
27
28
  const SINGLE_QUOTED_PATTERN = /'[^']*'/g;
28
29
 
30
+ /** Apply a set of regex patterns to text, replacing matches with a token. Clones each regex to reset lastIndex. */
31
+ function applyPatterns(text: string, patterns: readonly RegExp[], token: string): string {
32
+ let result = text;
33
+ for (const pattern of patterns) {
34
+ result = result.replace(new RegExp(pattern.source, pattern.flags), token);
35
+ }
36
+ return result;
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Secret-only sanitization (used by redactSecretsDeep for defense-in-depth)
41
+ // ---------------------------------------------------------------------------
42
+
43
+ /**
44
+ * Apply only SECRET_PATTERNS redaction to a string.
45
+ * Lighter than sanitizeConservative — no path/email/IP/UUID replacement.
46
+ */
47
+ export function sanitizeSecrets(text: string): string {
48
+ if (!text) return text;
49
+ return applyPatterns(text, SECRET_PATTERNS, "[SECRET]");
50
+ }
51
+
52
+ /**
53
+ * Recursively traverse a value and redact secrets in all string leaves.
54
+ * Non-string primitives, Dates, and other non-plain objects pass through unchanged.
55
+ * Does NOT mutate the input — returns a new structure.
56
+ */
57
+ export function redactSecretsDeep<T>(value: T): T {
58
+ if (typeof value === "string") return sanitizeSecrets(value) as T;
59
+ if (Array.isArray(value)) return value.map((item) => redactSecretsDeep(item)) as T;
60
+ if (value && typeof value === "object" && !(value instanceof Date)) {
61
+ // Only recurse into plain objects — pass through Map, Set, RegExp, class instances, etc.
62
+ const proto = Object.getPrototypeOf(value);
63
+ if (proto !== null && proto !== Object.prototype) return value;
64
+ const result: Record<string, unknown> = {};
65
+ for (const [k, v] of Object.entries(value)) {
66
+ result[k] = redactSecretsDeep(v);
67
+ }
68
+ return result as T;
69
+ }
70
+ return value;
71
+ }
72
+
29
73
  // ---------------------------------------------------------------------------
30
74
  // Conservative sanitization
31
75
  // ---------------------------------------------------------------------------
@@ -36,10 +80,10 @@ export function sanitizeConservative(text: string, projectName?: string): string
36
80
  let result = text;
37
81
 
38
82
  // Secrets first (longest/most specific patterns)
39
- for (const pattern of SECRET_PATTERNS) {
40
- // Clone regex to reset lastIndex
41
- result = result.replace(new RegExp(pattern.source, pattern.flags), "[SECRET]");
42
- }
83
+ result = applyPatterns(result, SECRET_PATTERNS, "[SECRET]");
84
+
85
+ // PII (phone numbers, credit cards, SSNs, IPv6, DOBs)
86
+ result = applyPatterns(result, PII_PATTERNS, "[PII]");
43
87
 
44
88
  // File paths
45
89
  result = result.replace(new RegExp(FILE_PATH_PATTERN.source, FILE_PATH_PATTERN.flags), "[PATH]");
@@ -123,7 +167,7 @@ export function sanitizeBundle(
123
167
  level: "conservative" | "aggressive",
124
168
  projectName?: string,
125
169
  ): ContributionBundle {
126
- return {
170
+ const fieldSanitized: ContributionBundle = {
127
171
  ...bundle,
128
172
  sanitization_level: level,
129
173
  positive_queries: bundle.positive_queries.map((q) => ({
@@ -151,6 +195,9 @@ export function sanitizeBundle(
151
195
  }
152
196
  : {}),
153
197
  };
198
+
199
+ // Defense-in-depth: recursively redact any secrets that slipped through field-level sanitization
200
+ return redactSecretsDeep(fieldSanitized);
154
201
  }
155
202
 
156
203
  // ---------------------------------------------------------------------------