selftune 0.2.16 → 0.2.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -22
- package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
- package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
- package/cli/selftune/alpha-upload/client.ts +51 -1
- package/cli/selftune/alpha-upload/flush.ts +46 -5
- package/cli/selftune/alpha-upload/stage-canonical.ts +32 -10
- package/cli/selftune/alpha-upload-contract.ts +9 -0
- package/cli/selftune/constants.ts +92 -5
- package/cli/selftune/contribute/contribute.ts +30 -2
- package/cli/selftune/contribute/sanitize.ts +52 -5
- package/cli/selftune/contribution-config.ts +249 -0
- package/cli/selftune/contribution-relay.ts +177 -0
- package/cli/selftune/contribution-signals.ts +219 -0
- package/cli/selftune/contribution-staging.ts +147 -0
- package/cli/selftune/contributions.ts +532 -0
- package/cli/selftune/creator-contributions.ts +333 -0
- package/cli/selftune/dashboard-contract.ts +305 -1
- package/cli/selftune/dashboard-server.ts +47 -13
- package/cli/selftune/eval/family-overlap.ts +395 -0
- package/cli/selftune/eval/hooks-to-evals.ts +182 -28
- package/cli/selftune/eval/synthetic-evals.ts +298 -11
- package/cli/selftune/evolution/description-quality.ts +12 -11
- package/cli/selftune/evolution/evolve.ts +214 -51
- package/cli/selftune/evolution/validate-proposal.ts +9 -6
- package/cli/selftune/export.ts +2 -2
- package/cli/selftune/grading/grade-session.ts +20 -0
- package/cli/selftune/hooks/commit-track.ts +188 -0
- package/cli/selftune/hooks/prompt-log.ts +10 -1
- package/cli/selftune/hooks/session-stop.ts +2 -2
- package/cli/selftune/hooks/skill-eval.ts +15 -1
- package/cli/selftune/hooks/stdin-preview.ts +32 -0
- package/cli/selftune/index.ts +41 -5
- package/cli/selftune/ingestors/codex-rollout.ts +31 -35
- package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
- package/cli/selftune/localdb/db.ts +2 -2
- package/cli/selftune/localdb/direct-write.ts +69 -6
- package/cli/selftune/localdb/queries.ts +1253 -37
- package/cli/selftune/localdb/schema.ts +66 -0
- package/cli/selftune/orchestrate.ts +32 -4
- package/cli/selftune/recover.ts +153 -0
- package/cli/selftune/repair/skill-usage.ts +363 -4
- package/cli/selftune/routes/actions.ts +35 -1
- package/cli/selftune/routes/analytics.ts +14 -0
- package/cli/selftune/routes/index.ts +1 -0
- package/cli/selftune/routes/overview.ts +150 -4
- package/cli/selftune/routes/skill-report.ts +648 -18
- package/cli/selftune/status.ts +81 -2
- package/cli/selftune/sync.ts +56 -2
- package/cli/selftune/trust-model.ts +66 -0
- package/cli/selftune/types.ts +80 -0
- package/cli/selftune/utils/skill-detection.ts +43 -0
- package/cli/selftune/utils/transcript.ts +210 -1
- package/cli/selftune/watchlist.ts +65 -0
- package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
- package/package.json +1 -1
- package/packages/telemetry-contract/src/types.ts +11 -0
- package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
- package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
- package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
- package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
- package/packages/ui/src/components/section-cards.tsx +12 -9
- package/packages/ui/src/primitives/card.tsx +1 -1
- package/skill/SKILL.md +40 -2
- package/skill/Workflows/AlphaUpload.md +4 -0
- package/skill/Workflows/Composability.md +64 -0
- package/skill/Workflows/Contribute.md +6 -3
- package/skill/Workflows/Contributions.md +97 -0
- package/skill/Workflows/CreatorContributions.md +74 -0
- package/skill/Workflows/Dashboard.md +31 -0
- package/skill/Workflows/Evals.md +57 -8
- package/skill/Workflows/Evolve.md +31 -13
- package/skill/Workflows/ExportCanonical.md +121 -0
- package/skill/Workflows/Hook.md +131 -0
- package/skill/Workflows/Ingest.md +7 -0
- package/skill/Workflows/Initialize.md +29 -9
- package/skill/Workflows/Orchestrate.md +27 -5
- package/skill/Workflows/Quickstart.md +94 -0
- package/skill/Workflows/Recover.md +84 -0
- package/skill/Workflows/RepairSkillUsage.md +95 -0
- package/skill/Workflows/Sync.md +18 -12
- package/skill/Workflows/Uninstall.md +82 -0
- package/skill/settings_snippet.json +11 -0
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
- package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* PushUploadResult indicating success or failure.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import type { PushUploadResult } from "../alpha-upload-contract.js";
|
|
9
|
+
import type { HeadCheckResult, PushUploadResult } from "../alpha-upload-contract.js";
|
|
10
10
|
import { getSelftuneVersion } from "../utils/selftune-meta.js";
|
|
11
11
|
|
|
12
12
|
function isPushUploadResult(value: unknown): value is PushUploadResult {
|
|
@@ -111,3 +111,53 @@ export async function uploadPushPayload(
|
|
|
111
111
|
};
|
|
112
112
|
}
|
|
113
113
|
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Lightweight HEAD check to see if a record already exists on the server.
|
|
117
|
+
*
|
|
118
|
+
* Sends HEAD {endpoint}/{recordId}. Optionally includes If-None-Match
|
|
119
|
+
* for content-hash comparison.
|
|
120
|
+
*
|
|
121
|
+
* Never throws -- returns { exists: false, unchanged: false } on any error
|
|
122
|
+
* (fail-open, matching the uploadPushPayload pattern).
|
|
123
|
+
*/
|
|
124
|
+
export async function headRecord(
|
|
125
|
+
endpoint: string,
|
|
126
|
+
recordId: string,
|
|
127
|
+
sha256?: string,
|
|
128
|
+
apiKey?: string,
|
|
129
|
+
): Promise<HeadCheckResult> {
|
|
130
|
+
const failOpen: HeadCheckResult = { exists: false, unchanged: false };
|
|
131
|
+
try {
|
|
132
|
+
const headers: Record<string, string> = {
|
|
133
|
+
"User-Agent": `selftune/${getSelftuneVersion()}`,
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
if (sha256) {
|
|
137
|
+
headers["If-None-Match"] = `"${sha256}"`;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (apiKey) {
|
|
141
|
+
headers.Authorization = `Bearer ${apiKey}`;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const url = `${endpoint}/${encodeURIComponent(recordId)}`;
|
|
145
|
+
const response = await fetch(url, {
|
|
146
|
+
method: "HEAD",
|
|
147
|
+
headers,
|
|
148
|
+
signal: AbortSignal.timeout(10_000),
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
if (response.status === 200) {
|
|
152
|
+
return { exists: true, unchanged: false };
|
|
153
|
+
}
|
|
154
|
+
if (response.status === 304) {
|
|
155
|
+
return { exists: true, unchanged: true };
|
|
156
|
+
}
|
|
157
|
+
// 404 or any other status -- treat as not found
|
|
158
|
+
return failOpen;
|
|
159
|
+
} catch {
|
|
160
|
+
// Network error, timeout, etc. -- fail open
|
|
161
|
+
return failOpen;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
import type { FlushSummary, QueueOperations } from "../alpha-upload-contract.js";
|
|
15
|
-
import { uploadPushPayload } from "./client.js";
|
|
15
|
+
import { headRecord, uploadPushPayload } from "./client.js";
|
|
16
16
|
|
|
17
17
|
// ---------------------------------------------------------------------------
|
|
18
18
|
// Options
|
|
@@ -28,6 +28,8 @@ export interface FlushOptions {
|
|
|
28
28
|
dryRun?: boolean;
|
|
29
29
|
/** API key for Bearer auth on the cloud endpoint. */
|
|
30
30
|
apiKey?: string;
|
|
31
|
+
/** When set, run HEAD checks against this endpoint before pushing. */
|
|
32
|
+
headCheckEndpoint?: string;
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
// ---------------------------------------------------------------------------
|
|
@@ -85,8 +87,9 @@ export async function flushQueue(
|
|
|
85
87
|
const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
86
88
|
const dryRun = options?.dryRun ?? false;
|
|
87
89
|
const apiKey = options?.apiKey;
|
|
90
|
+
const headCheckEndpoint = options?.headCheckEndpoint;
|
|
88
91
|
|
|
89
|
-
const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0 };
|
|
92
|
+
const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0, skipped_unchanged: 0 };
|
|
90
93
|
|
|
91
94
|
const items = queue.getPending(batchSize);
|
|
92
95
|
|
|
@@ -94,7 +97,44 @@ export async function flushQueue(
|
|
|
94
97
|
return summary;
|
|
95
98
|
}
|
|
96
99
|
|
|
100
|
+
// -- HEAD check phase: identify records that already exist unchanged ------
|
|
101
|
+
const unchangedIds = new Set<number>();
|
|
102
|
+
if (headCheckEndpoint) {
|
|
103
|
+
const headChecks = items.map(async (item) => {
|
|
104
|
+
try {
|
|
105
|
+
const parsed = JSON.parse(item.payload_json) as { push_id?: string };
|
|
106
|
+
const pushId = parsed.push_id;
|
|
107
|
+
if (!pushId) return { id: item.id, skip: false };
|
|
108
|
+
const result = await headRecord(headCheckEndpoint, pushId, undefined, apiKey);
|
|
109
|
+
return { id: item.id, skip: result.exists && result.unchanged };
|
|
110
|
+
} catch {
|
|
111
|
+
// Fail-open: if HEAD check itself errors, don't skip
|
|
112
|
+
return { id: item.id, skip: false };
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
const results = await Promise.allSettled(headChecks);
|
|
117
|
+
for (const result of results) {
|
|
118
|
+
if (result.status === "fulfilled" && result.value.skip) {
|
|
119
|
+
unchangedIds.add(result.value.id);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Mark unchanged items as sent in the queue without actually pushing
|
|
124
|
+
for (const item of items) {
|
|
125
|
+
if (unchangedIds.has(item.id)) {
|
|
126
|
+
if (!queue.markSending(item.id)) continue;
|
|
127
|
+
if (queue.markSent(item.id)) {
|
|
128
|
+
summary.skipped_unchanged++;
|
|
129
|
+
} else {
|
|
130
|
+
summary.failed++;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
97
136
|
for (const item of items) {
|
|
137
|
+
if (unchangedIds.has(item.id)) continue;
|
|
98
138
|
const markFailedSafely = (message: string): void => {
|
|
99
139
|
if (!queue.markFailed(item.id, message)) {
|
|
100
140
|
console.error(`[alpha upload] Failed to persist queue failure state for item ${item.id}`);
|
|
@@ -149,10 +189,11 @@ export async function flushQueue(
|
|
|
149
189
|
break;
|
|
150
190
|
}
|
|
151
191
|
|
|
152
|
-
// 409 Conflict = duplicate push_id
|
|
153
|
-
|
|
192
|
+
// 304 Not Modified = content unchanged (dedup), 409 Conflict = duplicate push_id
|
|
193
|
+
// Both are treated as success — the server already has this data.
|
|
194
|
+
if (status === 304 || status === 409) {
|
|
154
195
|
if (!queue.markSent(item.id)) {
|
|
155
|
-
markFailedSafely("local queue state update failed after duplicate upload");
|
|
196
|
+
markFailedSafely("local queue state update failed after duplicate/unchanged upload");
|
|
156
197
|
summary.failed++;
|
|
157
198
|
} else {
|
|
158
199
|
summary.sent++;
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Canonical upload staging writer.
|
|
3
3
|
*
|
|
4
|
-
* Reads canonical records from
|
|
5
|
-
* evidence from SQLite, then
|
|
6
|
-
* table for lossless upload
|
|
4
|
+
* Reads canonical records from SQLite by default (or from a JSONL override for
|
|
5
|
+
* explicit recovery/debugging) plus evolution evidence from SQLite, then
|
|
6
|
+
* inserts them into a single monotonic staging table for lossless upload
|
|
7
|
+
* batching.
|
|
7
8
|
*
|
|
8
9
|
* The staging table preserves the full canonical record JSON -- no field
|
|
9
10
|
* dropping, no hardcoding of provenance fields.
|
|
@@ -146,16 +147,28 @@ function extractNormalizedAt(record: CanonicalRecord): string {
|
|
|
146
147
|
return record.normalized_at;
|
|
147
148
|
}
|
|
148
149
|
|
|
150
|
+
// -- Content hashing ----------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Compute SHA256 hex digest of a string (for upload dedup).
|
|
154
|
+
* Uses Bun's built-in CryptoHasher for zero-dependency hashing.
|
|
155
|
+
*/
|
|
156
|
+
export function computeContentSha256(input: string): string {
|
|
157
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
158
|
+
hasher.update(input);
|
|
159
|
+
return hasher.digest("hex");
|
|
160
|
+
}
|
|
161
|
+
|
|
149
162
|
// -- Main staging function ----------------------------------------------------
|
|
150
163
|
|
|
151
164
|
/**
|
|
152
|
-
* Stage canonical records from
|
|
153
|
-
* into
|
|
165
|
+
* Stage canonical records from SQLite by default (or a custom JSONL log path
|
|
166
|
+
* override) and evolution evidence from SQLite into canonical_upload_staging.
|
|
154
167
|
*
|
|
155
168
|
* Uses INSERT OR IGNORE for dedup by (record_kind, record_id).
|
|
156
169
|
*
|
|
157
170
|
* @param db - SQLite database handle
|
|
158
|
-
* @param logPath -
|
|
171
|
+
* @param logPath - Canonical JSONL override path (default sentinel keeps SQLite-backed staging)
|
|
159
172
|
* @returns Number of newly staged records
|
|
160
173
|
*/
|
|
161
174
|
export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_LOG): number {
|
|
@@ -163,9 +176,12 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
163
176
|
const now = new Date().toISOString();
|
|
164
177
|
|
|
165
178
|
const stmt = db.prepare(`
|
|
166
|
-
INSERT
|
|
167
|
-
(record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at)
|
|
168
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
179
|
+
INSERT INTO canonical_upload_staging
|
|
180
|
+
(record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at, content_sha256)
|
|
181
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
182
|
+
ON CONFLICT(record_kind, record_id) DO UPDATE SET
|
|
183
|
+
content_sha256 = excluded.content_sha256
|
|
184
|
+
WHERE canonical_upload_staging.content_sha256 IS NULL AND excluded.content_sha256 IS NOT NULL
|
|
169
185
|
`);
|
|
170
186
|
|
|
171
187
|
// 1. Stage canonical records from SQLite (default) or JSONL (custom logPath override)
|
|
@@ -177,14 +193,16 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
177
193
|
: readAndEnrichCanonicalRecords(logPath);
|
|
178
194
|
for (const record of records) {
|
|
179
195
|
const recordId = extractRecordId(record);
|
|
196
|
+
const recordJson = JSON.stringify(record);
|
|
180
197
|
const result = stmt.run(
|
|
181
198
|
record.record_kind,
|
|
182
199
|
recordId,
|
|
183
|
-
|
|
200
|
+
recordJson,
|
|
184
201
|
extractSessionId(record),
|
|
185
202
|
extractPromptId(record),
|
|
186
203
|
extractNormalizedAt(record),
|
|
187
204
|
now,
|
|
205
|
+
computeContentSha256(recordJson),
|
|
188
206
|
);
|
|
189
207
|
if (result.changes > 0) staged++;
|
|
190
208
|
}
|
|
@@ -222,6 +240,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
222
240
|
null, // no prompt_id
|
|
223
241
|
entry.timestamp,
|
|
224
242
|
now,
|
|
243
|
+
computeContentSha256(recordJson),
|
|
225
244
|
);
|
|
226
245
|
if (result.changes > 0) staged++;
|
|
227
246
|
}
|
|
@@ -258,6 +277,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
258
277
|
null, // no prompt_id
|
|
259
278
|
run.timestamp,
|
|
260
279
|
now,
|
|
280
|
+
computeContentSha256(recordJson),
|
|
261
281
|
);
|
|
262
282
|
if (result.changes > 0) staged++;
|
|
263
283
|
}
|
|
@@ -298,6 +318,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
298
318
|
null, // no prompt_id
|
|
299
319
|
gr.graded_at,
|
|
300
320
|
now,
|
|
321
|
+
computeContentSha256(recordJson),
|
|
301
322
|
);
|
|
302
323
|
if (result.changes > 0) staged++;
|
|
303
324
|
}
|
|
@@ -332,6 +353,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
332
353
|
null, // no prompt_id
|
|
333
354
|
sig.timestamp,
|
|
334
355
|
now,
|
|
356
|
+
computeContentSha256(recordJson),
|
|
335
357
|
);
|
|
336
358
|
if (result.changes > 0) staged++;
|
|
337
359
|
}
|
|
@@ -49,4 +49,13 @@ export interface FlushSummary {
|
|
|
49
49
|
sent: number;
|
|
50
50
|
failed: number;
|
|
51
51
|
skipped: number;
|
|
52
|
+
/** Records skipped because a HEAD check confirmed they already exist unchanged. */
|
|
53
|
+
skipped_unchanged: number;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// -- HEAD check result --------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
export interface HeadCheckResult {
|
|
59
|
+
exists: boolean;
|
|
60
|
+
unchanged: boolean;
|
|
52
61
|
}
|
|
@@ -41,6 +41,11 @@ export const ORCHESTRATE_RUN_LOG = join(LOG_DIR, "orchestrate_runs.jsonl");
|
|
|
41
41
|
export const SIGNAL_LOG = join(LOG_DIR, "improvement_signals.jsonl");
|
|
42
42
|
export const ORCHESTRATE_LOCK = join(LOG_DIR, ".orchestrate.lock");
|
|
43
43
|
|
|
44
|
+
/** Allow tests to override the orchestrate lock without mutating the host lock file. */
|
|
45
|
+
export function getOrchestrateLockPath(): string {
|
|
46
|
+
return process.env.SELFTUNE_ORCHESTRATE_LOCK_PATH || ORCHESTRATE_LOCK;
|
|
47
|
+
}
|
|
48
|
+
|
|
44
49
|
/** Evolution memory directory — human-readable session context that survives resets. */
|
|
45
50
|
export const MEMORY_DIR = join(SELFTUNE_CONFIG_DIR, "memory");
|
|
46
51
|
export const CONTEXT_PATH = join(MEMORY_DIR, "context.md");
|
|
@@ -155,6 +160,16 @@ export const OPENCLAW_INGEST_MARKER = join(SELFTUNE_CONFIG_DIR, "openclaw-ingest
|
|
|
155
160
|
|
|
156
161
|
/** Default output directory for contribution bundles. */
|
|
157
162
|
export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
|
|
163
|
+
/** Creator-directed contribution preferences (per-skill opt-in state). */
|
|
164
|
+
export const CONTRIBUTION_PREFERENCES_PATH = join(
|
|
165
|
+
SELFTUNE_CONFIG_DIR,
|
|
166
|
+
"contribution-preferences.json",
|
|
167
|
+
);
|
|
168
|
+
/** Creator overview watchlist preference. */
|
|
169
|
+
export const WATCHED_SKILLS_PATH = join(SELFTUNE_CONFIG_DIR, "watched-skills.json");
|
|
170
|
+
/** Creator-directed relay endpoint for staged contribution signals. */
|
|
171
|
+
export const CONTRIBUTION_RELAY_ENDPOINT =
|
|
172
|
+
process.env.SELFTUNE_CONTRIBUTION_RELAY_ENDPOINT ?? "https://api.selftune.dev/api/v1/signals";
|
|
158
173
|
|
|
159
174
|
// ---------------------------------------------------------------------------
|
|
160
175
|
// Sanitization constants (for contribute command)
|
|
@@ -162,17 +177,62 @@ export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
|
|
|
162
177
|
|
|
163
178
|
/** Regex patterns for detecting secrets that must be redacted. */
|
|
164
179
|
export const SECRET_PATTERNS = [
|
|
165
|
-
|
|
180
|
+
// -- API keys & tokens (platform-specific prefixes) --
|
|
181
|
+
/sk-[a-zA-Z0-9]{20,}/g, // OpenAI API keys
|
|
182
|
+
/sk-ant-[a-zA-Z0-9_-]{20,}/g, // Anthropic API keys
|
|
166
183
|
/ghp_[a-zA-Z0-9]{36,}/g, // GitHub personal access tokens
|
|
167
184
|
/gho_[a-zA-Z0-9]{36,}/g, // GitHub OAuth tokens
|
|
168
185
|
/github_pat_[a-zA-Z0-9_]{22,}/g, // GitHub fine-grained PATs
|
|
169
|
-
/
|
|
186
|
+
/npm_[a-zA-Z0-9]{36}/g, // npm tokens
|
|
187
|
+
/pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
|
|
188
|
+
|
|
189
|
+
// -- AWS --
|
|
190
|
+
/AKIA[A-Z0-9]{16}/g, // AWS access key IDs (permanent)
|
|
191
|
+
/ASIA[A-Z0-9]{16}/g, // AWS temporary credentials (STS)
|
|
192
|
+
|
|
193
|
+
// -- GCP --
|
|
194
|
+
/AIza[0-9A-Za-z_-]{35}/g, // Google API key
|
|
195
|
+
|
|
196
|
+
// -- Stripe --
|
|
197
|
+
/(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{24,}/g, // Stripe secret/publishable/restricted keys
|
|
198
|
+
|
|
199
|
+
// -- Twilio --
|
|
200
|
+
/SK[a-f0-9]{32}/g, // Twilio API key
|
|
201
|
+
|
|
202
|
+
// -- SendGrid --
|
|
203
|
+
/SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}/g, // SendGrid API key
|
|
204
|
+
|
|
205
|
+
// -- Mailgun --
|
|
206
|
+
/key-[a-zA-Z0-9]{32}/g, // Mailgun API key
|
|
207
|
+
|
|
208
|
+
// -- Slack --
|
|
170
209
|
/xoxb-[a-zA-Z0-9-]+/g, // Slack bot tokens
|
|
171
210
|
/xoxp-[a-zA-Z0-9-]+/g, // Slack user tokens
|
|
172
211
|
/xoxs-[a-zA-Z0-9-]+/g, // Slack session tokens
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
/
|
|
212
|
+
|
|
213
|
+
// -- JWTs --
|
|
214
|
+
/eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JSON Web Tokens
|
|
215
|
+
|
|
216
|
+
// -- Private keys (PEM block headers) --
|
|
217
|
+
/-----BEGIN (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----[\s\S]*?-----END (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----/g, // PEM private key blocks (full multiline)
|
|
218
|
+
|
|
219
|
+
// -- Database connection URIs --
|
|
220
|
+
/(mongodb(\+srv)?|postgres(ql)?|mysql|mariadb|redis|rediss|amqp|amqps):\/\/[^\s"')]+/g, // DB URIs with credentials
|
|
221
|
+
|
|
222
|
+
// -- Azure --
|
|
223
|
+
/DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+/g, // Azure storage connection string
|
|
224
|
+
|
|
225
|
+
// -- Webhook URLs --
|
|
226
|
+
/https:\/\/discord(app)?\.com\/api\/webhooks\/[0-9]+\/[a-zA-Z0-9_-]+/g, // Discord webhook
|
|
227
|
+
/https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[a-zA-Z0-9]+/g, // Slack webhook
|
|
228
|
+
|
|
229
|
+
// -- SSH keys --
|
|
230
|
+
/ssh-(rsa|ed25519|ecdsa|dsa)\s+[A-Za-z0-9+/]{40,}[=]{0,3}/g, // SSH public key material
|
|
231
|
+
|
|
232
|
+
// -- Generic high-confidence patterns --
|
|
233
|
+
/Bearer\s+[a-zA-Z0-9_-]{20,}/g, // Bearer tokens in auth headers
|
|
234
|
+
/https?:\/\/[^:]+:[^@]+@[^\s"']+/g, // Basic auth embedded in URLs
|
|
235
|
+
/(?<![a-fA-F0-9])[a-fA-F0-9]{64,}(?![a-fA-F0-9])/g, // Long hex strings (64+ chars, likely secrets)
|
|
176
236
|
] as const;
|
|
177
237
|
|
|
178
238
|
/** Regex for file paths (Unix and Windows). */
|
|
@@ -184,6 +244,33 @@ export const EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\
|
|
|
184
244
|
/** Regex for IP addresses (v4). */
|
|
185
245
|
export const IP_PATTERN = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g;
|
|
186
246
|
|
|
247
|
+
// ---------------------------------------------------------------------------
|
|
248
|
+
// PII patterns — high-confidence, low-false-positive personally identifiable info
|
|
249
|
+
// ---------------------------------------------------------------------------
|
|
250
|
+
|
|
251
|
+
export const PII_PATTERNS = [
|
|
252
|
+
// -- Phone numbers --
|
|
253
|
+
/\+\d{1,3}\s?\d{1,4}\s?\d{1,4}\s?\d{1,9}/g, // E.164 intl: +1 555 123 4567, +44 20 7946 0958
|
|
254
|
+
/\b\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b/g, // US/CA phone: (555) 123-4567, 555-123-4567, 555.123.4567
|
|
255
|
+
|
|
256
|
+
// -- Credit card numbers (major networks, with optional separators) --
|
|
257
|
+
/\b4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Visa (starts with 4)
|
|
258
|
+
/\b5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Mastercard (51-55)
|
|
259
|
+
/\b3[47]\d{2}[\s-]?\d{6}[\s-]?\d{5}\b/g, // Amex (34/37)
|
|
260
|
+
/\b6(?:011|5\d{2})[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Discover (6011/65)
|
|
261
|
+
|
|
262
|
+
// -- SSN / national IDs --
|
|
263
|
+
/\b\d{3}-\d{2}-\d{4}\b/g, // US SSN: 123-45-6789
|
|
264
|
+
|
|
265
|
+
// -- IPv6 --
|
|
266
|
+
/\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g, // Full IPv6
|
|
267
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,7}:(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?(?!\w)/g, // Abbreviated IPv6 (with ::)
|
|
268
|
+
/::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\b/g, // Abbreviated IPv6 (leading ::1, ::ffff:...)
|
|
269
|
+
|
|
270
|
+
// -- Date of birth patterns (in structured contexts) --
|
|
271
|
+
/\b(?:dob|date\.of\.birth|birthday|born)\s*[:=]\s*\d{1,4}[-/]\d{1,2}[-/]\d{1,4}\b/gi, // DOB in key-value context
|
|
272
|
+
] as const;
|
|
273
|
+
|
|
187
274
|
/** Regex for camelCase/PascalCase identifiers longer than 8 chars (aggressive mode). */
|
|
188
275
|
export const IDENTIFIER_PATTERN = /\b[a-z][a-zA-Z0-9]{8,}\b|\b[A-Z][a-zA-Z0-9]{8,}\b/g;
|
|
189
276
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
/**
|
|
3
|
-
* selftune contribute —
|
|
3
|
+
* selftune contribute — community export of anonymized skill observability data.
|
|
4
4
|
*
|
|
5
5
|
* Usage:
|
|
6
6
|
* bun run cli/selftune/contribute/contribute.ts --skill selftune [--preview] [--output file.json]
|
|
@@ -31,10 +31,38 @@ export async function cliMain(): Promise<void> {
|
|
|
31
31
|
submit: { type: "boolean", default: false },
|
|
32
32
|
endpoint: { type: "string", default: "https://selftune-api.fly.dev" },
|
|
33
33
|
github: { type: "boolean", default: false },
|
|
34
|
+
help: { type: "boolean", short: "h", default: false },
|
|
34
35
|
},
|
|
35
36
|
strict: true,
|
|
36
37
|
});
|
|
37
38
|
|
|
39
|
+
if (values.help) {
|
|
40
|
+
console.log(`selftune contribute — Export an anonymized community bundle
|
|
41
|
+
|
|
42
|
+
Usage:
|
|
43
|
+
selftune contribute --skill <name> [--preview] [--sanitize conservative|aggressive]
|
|
44
|
+
selftune contribute --skill <name> [--output <file>] [--submit]
|
|
45
|
+
|
|
46
|
+
Purpose:
|
|
47
|
+
Build a sanitized community contribution bundle from local SQLite data.
|
|
48
|
+
This is separate from:
|
|
49
|
+
selftune contributions Creator-directed sharing preferences
|
|
50
|
+
selftune alpha upload Personal cloud upload cycle
|
|
51
|
+
|
|
52
|
+
Options:
|
|
53
|
+
--skill <name> Skill to export
|
|
54
|
+
--preview Print the sanitized bundle instead of writing it
|
|
55
|
+
--sanitize conservative|aggressive
|
|
56
|
+
Choose the sanitization level
|
|
57
|
+
--output <file> Write the bundle to an explicit file path
|
|
58
|
+
--since <timestamp> Only include records on or after this time
|
|
59
|
+
--submit Submit the bundle after writing it
|
|
60
|
+
--endpoint <url> Override the default service endpoint
|
|
61
|
+
--github Submit via GitHub flow instead of the service
|
|
62
|
+
-h, --help Show this help`);
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
38
66
|
const skillName = values.skill ?? "selftune";
|
|
39
67
|
const sanitizationLevel = values.sanitize === "aggressive" ? "aggressive" : "conservative";
|
|
40
68
|
|
|
@@ -81,7 +109,7 @@ export async function cliMain(): Promise<void> {
|
|
|
81
109
|
writeFileSync(outputPath, json, "utf-8");
|
|
82
110
|
|
|
83
111
|
// 6. Summary
|
|
84
|
-
console.log(`
|
|
112
|
+
console.log(`Community contribution bundle written to: ${outputPath}`);
|
|
85
113
|
console.log(` Queries: ${bundle.positive_queries.length}`);
|
|
86
114
|
console.log(` Eval entries: ${bundle.eval_entries.length}`);
|
|
87
115
|
console.log(` Sessions: ${bundle.session_metrics.total_sessions}`);
|
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
IDENTIFIER_PATTERN,
|
|
16
16
|
IP_PATTERN,
|
|
17
17
|
MODULE_PATTERN,
|
|
18
|
+
PII_PATTERNS,
|
|
18
19
|
SECRET_PATTERNS,
|
|
19
20
|
} from "../constants.js";
|
|
20
21
|
import type { ContributionBundle } from "../types.js";
|
|
@@ -26,6 +27,49 @@ const UUID_PATTERN = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]
|
|
|
26
27
|
const DOUBLE_QUOTED_PATTERN = /"[^"]*"/g;
|
|
27
28
|
const SINGLE_QUOTED_PATTERN = /'[^']*'/g;
|
|
28
29
|
|
|
30
|
+
/** Apply a set of regex patterns to text, replacing matches with a token. Clones each regex to reset lastIndex. */
|
|
31
|
+
function applyPatterns(text: string, patterns: readonly RegExp[], token: string): string {
|
|
32
|
+
let result = text;
|
|
33
|
+
for (const pattern of patterns) {
|
|
34
|
+
result = result.replace(new RegExp(pattern.source, pattern.flags), token);
|
|
35
|
+
}
|
|
36
|
+
return result;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Secret-only sanitization (used by redactSecretsDeep for defense-in-depth)
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Apply only SECRET_PATTERNS redaction to a string.
|
|
45
|
+
* Lighter than sanitizeConservative — no path/email/IP/UUID replacement.
|
|
46
|
+
*/
|
|
47
|
+
export function sanitizeSecrets(text: string): string {
|
|
48
|
+
if (!text) return text;
|
|
49
|
+
return applyPatterns(text, SECRET_PATTERNS, "[SECRET]");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Recursively traverse a value and redact secrets in all string leaves.
|
|
54
|
+
* Non-string primitives, Dates, and other non-plain objects pass through unchanged.
|
|
55
|
+
* Does NOT mutate the input — returns a new structure.
|
|
56
|
+
*/
|
|
57
|
+
export function redactSecretsDeep<T>(value: T): T {
|
|
58
|
+
if (typeof value === "string") return sanitizeSecrets(value) as T;
|
|
59
|
+
if (Array.isArray(value)) return value.map((item) => redactSecretsDeep(item)) as T;
|
|
60
|
+
if (value && typeof value === "object" && !(value instanceof Date)) {
|
|
61
|
+
// Only recurse into plain objects — pass through Map, Set, RegExp, class instances, etc.
|
|
62
|
+
const proto = Object.getPrototypeOf(value);
|
|
63
|
+
if (proto !== null && proto !== Object.prototype) return value;
|
|
64
|
+
const result: Record<string, unknown> = {};
|
|
65
|
+
for (const [k, v] of Object.entries(value)) {
|
|
66
|
+
result[k] = redactSecretsDeep(v);
|
|
67
|
+
}
|
|
68
|
+
return result as T;
|
|
69
|
+
}
|
|
70
|
+
return value;
|
|
71
|
+
}
|
|
72
|
+
|
|
29
73
|
// ---------------------------------------------------------------------------
|
|
30
74
|
// Conservative sanitization
|
|
31
75
|
// ---------------------------------------------------------------------------
|
|
@@ -36,10 +80,10 @@ export function sanitizeConservative(text: string, projectName?: string): string
|
|
|
36
80
|
let result = text;
|
|
37
81
|
|
|
38
82
|
// Secrets first (longest/most specific patterns)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
83
|
+
result = applyPatterns(result, SECRET_PATTERNS, "[SECRET]");
|
|
84
|
+
|
|
85
|
+
// PII (phone numbers, credit cards, SSNs, IPv6, DOBs)
|
|
86
|
+
result = applyPatterns(result, PII_PATTERNS, "[PII]");
|
|
43
87
|
|
|
44
88
|
// File paths
|
|
45
89
|
result = result.replace(new RegExp(FILE_PATH_PATTERN.source, FILE_PATH_PATTERN.flags), "[PATH]");
|
|
@@ -123,7 +167,7 @@ export function sanitizeBundle(
|
|
|
123
167
|
level: "conservative" | "aggressive",
|
|
124
168
|
projectName?: string,
|
|
125
169
|
): ContributionBundle {
|
|
126
|
-
|
|
170
|
+
const fieldSanitized: ContributionBundle = {
|
|
127
171
|
...bundle,
|
|
128
172
|
sanitization_level: level,
|
|
129
173
|
positive_queries: bundle.positive_queries.map((q) => ({
|
|
@@ -151,6 +195,9 @@ export function sanitizeBundle(
|
|
|
151
195
|
}
|
|
152
196
|
: {}),
|
|
153
197
|
};
|
|
198
|
+
|
|
199
|
+
// Defense-in-depth: recursively redact any secrets that slipped through field-level sanitization
|
|
200
|
+
return redactSecretsDeep(fieldSanitized);
|
|
154
201
|
}
|
|
155
202
|
|
|
156
203
|
// ---------------------------------------------------------------------------
|