selftune 0.2.16 → 0.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -19
- package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
- package/cli/selftune/alpha-upload/client.ts +51 -1
- package/cli/selftune/alpha-upload/flush.ts +46 -5
- package/cli/selftune/alpha-upload/stage-canonical.ts +25 -4
- package/cli/selftune/alpha-upload-contract.ts +9 -0
- package/cli/selftune/constants.ts +82 -5
- package/cli/selftune/contribute/sanitize.ts +52 -5
- package/cli/selftune/dashboard-contract.ts +100 -0
- package/cli/selftune/dashboard-server.ts +2 -2
- package/cli/selftune/evolution/description-quality.ts +12 -11
- package/cli/selftune/evolution/evolve.ts +214 -51
- package/cli/selftune/evolution/validate-proposal.ts +9 -6
- package/cli/selftune/grading/grade-session.ts +20 -0
- package/cli/selftune/hooks/commit-track.ts +188 -0
- package/cli/selftune/hooks/prompt-log.ts +10 -1
- package/cli/selftune/hooks/session-stop.ts +2 -2
- package/cli/selftune/hooks/skill-eval.ts +15 -1
- package/cli/selftune/hooks/stdin-preview.ts +32 -0
- package/cli/selftune/localdb/direct-write.ts +69 -6
- package/cli/selftune/localdb/queries.ts +552 -7
- package/cli/selftune/localdb/schema.ts +46 -0
- package/cli/selftune/orchestrate.ts +32 -4
- package/cli/selftune/routes/overview.ts +41 -3
- package/cli/selftune/routes/skill-report.ts +88 -17
- package/cli/selftune/types.ts +31 -0
- package/cli/selftune/utils/transcript.ts +210 -1
- package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
- package/package.json +1 -1
- package/packages/telemetry-contract/src/types.ts +11 -0
- package/skill/SKILL.md +29 -1
- package/skill/Workflows/Evolve.md +31 -13
- package/skill/Workflows/ExportCanonical.md +121 -0
- package/skill/Workflows/Hook.md +131 -0
- package/skill/Workflows/Initialize.md +9 -8
- package/skill/Workflows/Orchestrate.md +27 -5
- package/skill/Workflows/Quickstart.md +94 -0
- package/skill/Workflows/RepairSkillUsage.md +87 -0
- package/skill/Workflows/Uninstall.md +82 -0
- package/skill/settings_snippet.json +11 -0
package/README.md
CHANGED
|
@@ -69,6 +69,8 @@ selftune learned that real users say "slides", "deck", "presentation for Monday"
|
|
|
69
69
|
|
|
70
70
|
**I manage an agent setup with many skills** — You have 15+ skills installed. Some work. Some don't. Some conflict. Tell your agent "how are my skills doing?" and selftune gives you a health dashboard and automatically improves the skills that aren't keeping up.
|
|
71
71
|
|
|
72
|
+
**I use skills for non-coding work** — Marketing workflows, research pipelines, compliance checks, slide decks. You say "make me a presentation" and nothing happens. selftune learns that "slides", "deck", and "presentation for Monday" all mean the same skill — and fixes the routing automatically.
|
|
73
|
+
|
|
72
74
|
## How It Works
|
|
73
75
|
|
|
74
76
|
<p align="center">
|
|
@@ -77,29 +79,27 @@ selftune learned that real users say "slides", "deck", "presentation for Monday"
|
|
|
77
79
|
|
|
78
80
|
A continuous feedback loop that makes your skills learn and adapt. Automatically. Your agent runs everything — you just install the skill and talk naturally.
|
|
79
81
|
|
|
80
|
-
**Observe** —
|
|
82
|
+
**Observe** — Seven real-time hooks capture every query, every skill invocation, and every correction signal. Structured telemetry — not raw logs. On Claude Code, hooks install automatically during `selftune init`. Backfill existing transcripts with `selftune ingest claude`.
|
|
83
|
+
|
|
84
|
+
**Detect** — Finds the gap between how you talk and how your skills are described. You say "make me a slide deck" and your pptx skill stays silent — selftune catches that mismatch. Clusters missed queries by invocation type. Detects correction signals ("why didn't you use X?") and triggers immediate improvement.
|
|
85
|
+
|
|
86
|
+
**Evolve** — Generates multiple proposals biased toward different invocation types, validates each against your real eval set with majority voting, runs constitutional checks, then gates with an expensive model before deploying. Not guesswork — evidence. Automatic backup on every deploy.
|
|
81
87
|
|
|
82
|
-
**
|
|
88
|
+
**Watch** — After deploying changes, selftune monitors trigger rates, false negatives, and per-invocation-type scores. If anything regresses, it rolls back automatically. No manual monitoring needed.
|
|
83
89
|
|
|
84
|
-
**
|
|
90
|
+
**Automate** — Run `selftune cron setup` to install OS-level scheduling. selftune syncs, grades, evolves, and watches on a schedule — fully autonomous.
|
|
85
91
|
|
|
86
|
-
|
|
92
|
+
## How Is This Different from Agents That "Learn"?
|
|
87
93
|
|
|
88
|
-
|
|
94
|
+
Some agents claim self-improvement by saving notes about what worked. That's knowledge persistence — not a closed loop. There's no measurement, no validation, and no way to know if the saved notes are actually correct.
|
|
89
95
|
|
|
90
|
-
|
|
96
|
+
selftune is empirical. It observes real sessions, grades execution quality, detects missed triggers, proposes changes, validates them against eval sets, deploys with automatic backup, monitors for regressions, and rolls back on failure. Twelve interlocking mechanisms — not one background thread writing markdown.
|
|
91
97
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
- **Auto-activation system** — Hooks detect when selftune should run and suggest actions
|
|
98
|
-
- **Enforcement guardrails** — Blocks SKILL.md edits on monitored skills unless `selftune watch` has been run
|
|
99
|
-
- **Live dashboard server** — `selftune dashboard --serve` with SSE auto-refresh and action buttons
|
|
100
|
-
- **Evolution memory** — Persists context, plans, and decisions across context resets
|
|
101
|
-
- **4 specialized agents** — Diagnosis analyst, pattern analyst, evolution reviewer, integration guide
|
|
102
|
-
- **Sandbox test harness** — Comprehensive automated test coverage, including devcontainer-based LLM testing
|
|
98
|
+
| Approach | Measures quality? | Validates changes? | Detects regressions? | Rolls back? |
|
|
99
|
+
| ------------------------- | ----------------- | --------------------------- | ---------------------- | ----------- |
|
|
100
|
+
| Agent saves its own notes | No | No | No | No |
|
|
101
|
+
| Manual skill rewrites | No | No | No | No |
|
|
102
|
+
| **selftune** | 3-tier grading | Eval sets + majority voting | Post-deploy monitoring | Automatic |
|
|
103
103
|
|
|
104
104
|
## Commands
|
|
105
105
|
|
|
@@ -108,12 +108,15 @@ Your agent runs these — you just say what you want ("improve my skills", "show
|
|
|
108
108
|
| Group | Command | What it does |
|
|
109
109
|
| ---------- | -------------------------------------------- | ------------------------------------------------------------------------------------------- |
|
|
110
110
|
| | `selftune status` | See which skills are undertriggering and why |
|
|
111
|
-
| | `selftune
|
|
111
|
+
| | `selftune last` | Quick insight from the most recent session |
|
|
112
|
+
| | `selftune orchestrate` | Run the full autonomous loop (sync → grade → evolve → watch) |
|
|
113
|
+
| | `selftune sync` | Refresh telemetry from source-truth transcripts |
|
|
112
114
|
| | `selftune dashboard` | Open the visual skill health dashboard |
|
|
113
115
|
| | `selftune doctor` | Health check: logs, hooks, config, permissions |
|
|
114
116
|
| **ingest** | `selftune ingest claude` | Backfill from Claude Code transcripts |
|
|
115
117
|
| | `selftune ingest codex` | Import Codex rollout logs (experimental) |
|
|
116
118
|
| **grade** | `selftune grade --skill <name>` | Grade a skill session with evidence |
|
|
119
|
+
| | `selftune grade auto` | Auto-grade recent sessions for ungraded skills |
|
|
117
120
|
| | `selftune grade baseline --skill <name>` | Measure skill value vs no-skill baseline |
|
|
118
121
|
| **evolve** | `selftune evolve --skill <name>` | Propose, validate, and deploy improved descriptions |
|
|
119
122
|
| | `selftune evolve body --skill <name>` | Evolve full skill body or routing table |
|
|
@@ -124,7 +127,9 @@ Your agent runs these — you just say what you want ("improve my skills", "show
|
|
|
124
127
|
| | `selftune eval import` | Import external eval corpus from [SkillsBench](https://github.com/benchflow-ai/skillsbench) |
|
|
125
128
|
| **auto** | `selftune cron setup` | Install OS-level scheduling (cron/launchd/systemd) |
|
|
126
129
|
| | `selftune watch --skill <name>` | Monitor after deploy. Auto-rollback on regression. |
|
|
127
|
-
| **other** | `selftune
|
|
130
|
+
| **other** | `selftune workflows` | Discover and manage multi-skill workflows |
|
|
131
|
+
| | `selftune badge --skill <name>` | Generate a health badge for your skill's README |
|
|
132
|
+
| | `selftune telemetry` | Manage anonymous usage analytics (status, enable, disable) |
|
|
128
133
|
| | `selftune alpha upload` | Run a manual alpha upload cycle and emit a JSON send summary |
|
|
129
134
|
|
|
130
135
|
Full command reference: `selftune --help`
|
|
@@ -58,7 +58,7 @@ export function buildV2PushPayload(
|
|
|
58
58
|
const params = afterSeq !== undefined ? [afterSeq, limit] : [limit];
|
|
59
59
|
|
|
60
60
|
const sql = `
|
|
61
|
-
SELECT local_seq, record_kind, record_json
|
|
61
|
+
SELECT local_seq, record_kind, record_id, record_json, content_sha256
|
|
62
62
|
FROM canonical_upload_staging
|
|
63
63
|
${whereClause}
|
|
64
64
|
ORDER BY local_seq ASC
|
|
@@ -68,7 +68,9 @@ export function buildV2PushPayload(
|
|
|
68
68
|
const rows = db.query(sql).all(...params) as Array<{
|
|
69
69
|
local_seq: number;
|
|
70
70
|
record_kind: string;
|
|
71
|
+
record_id: string;
|
|
71
72
|
record_json: string;
|
|
73
|
+
content_sha256: string | null;
|
|
72
74
|
}>;
|
|
73
75
|
|
|
74
76
|
if (rows.length === 0) return null;
|
|
@@ -78,6 +80,7 @@ export function buildV2PushPayload(
|
|
|
78
80
|
const orchestrateRuns: Record<string, unknown>[] = [];
|
|
79
81
|
const gradingResults: Record<string, unknown>[] = [];
|
|
80
82
|
const improvementSignals: Record<string, unknown>[] = [];
|
|
83
|
+
const contentHashes: Record<string, string> = {};
|
|
81
84
|
let lastParsedSeq: number | null = null;
|
|
82
85
|
let hitMalformedRow = false;
|
|
83
86
|
|
|
@@ -87,6 +90,10 @@ export function buildV2PushPayload(
|
|
|
87
90
|
hitMalformedRow = true;
|
|
88
91
|
break;
|
|
89
92
|
}
|
|
93
|
+
// Collect content hashes for dedup — only after successful parse, keyed by kind:id
|
|
94
|
+
if (row.content_sha256) {
|
|
95
|
+
contentHashes[`${row.record_kind}:${row.record_id}`] = row.content_sha256;
|
|
96
|
+
}
|
|
90
97
|
|
|
91
98
|
if (row.record_kind === "evolution_evidence") {
|
|
92
99
|
const timestamp =
|
|
@@ -152,6 +159,12 @@ export function buildV2PushPayload(
|
|
|
152
159
|
gradingResults,
|
|
153
160
|
improvementSignals,
|
|
154
161
|
);
|
|
162
|
+
|
|
163
|
+
// Attach content hashes for server-side dedup
|
|
164
|
+
if (Object.keys(contentHashes).length > 0) {
|
|
165
|
+
payload.content_hashes = contentHashes;
|
|
166
|
+
}
|
|
167
|
+
|
|
155
168
|
if (lastParsedSeq === null) {
|
|
156
169
|
return null;
|
|
157
170
|
}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* PushUploadResult indicating success or failure.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import type { PushUploadResult } from "../alpha-upload-contract.js";
|
|
9
|
+
import type { HeadCheckResult, PushUploadResult } from "../alpha-upload-contract.js";
|
|
10
10
|
import { getSelftuneVersion } from "../utils/selftune-meta.js";
|
|
11
11
|
|
|
12
12
|
function isPushUploadResult(value: unknown): value is PushUploadResult {
|
|
@@ -111,3 +111,53 @@ export async function uploadPushPayload(
|
|
|
111
111
|
};
|
|
112
112
|
}
|
|
113
113
|
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Lightweight HEAD check to see if a record already exists on the server.
|
|
117
|
+
*
|
|
118
|
+
* Sends HEAD {endpoint}/{recordId}. Optionally includes If-None-Match
|
|
119
|
+
* for content-hash comparison.
|
|
120
|
+
*
|
|
121
|
+
* Never throws -- returns { exists: false, unchanged: false } on any error
|
|
122
|
+
* (fail-open, matching the uploadPushPayload pattern).
|
|
123
|
+
*/
|
|
124
|
+
export async function headRecord(
|
|
125
|
+
endpoint: string,
|
|
126
|
+
recordId: string,
|
|
127
|
+
sha256?: string,
|
|
128
|
+
apiKey?: string,
|
|
129
|
+
): Promise<HeadCheckResult> {
|
|
130
|
+
const failOpen: HeadCheckResult = { exists: false, unchanged: false };
|
|
131
|
+
try {
|
|
132
|
+
const headers: Record<string, string> = {
|
|
133
|
+
"User-Agent": `selftune/${getSelftuneVersion()}`,
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
if (sha256) {
|
|
137
|
+
headers["If-None-Match"] = `"${sha256}"`;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (apiKey) {
|
|
141
|
+
headers.Authorization = `Bearer ${apiKey}`;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const url = `${endpoint}/${encodeURIComponent(recordId)}`;
|
|
145
|
+
const response = await fetch(url, {
|
|
146
|
+
method: "HEAD",
|
|
147
|
+
headers,
|
|
148
|
+
signal: AbortSignal.timeout(10_000),
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
if (response.status === 200) {
|
|
152
|
+
return { exists: true, unchanged: false };
|
|
153
|
+
}
|
|
154
|
+
if (response.status === 304) {
|
|
155
|
+
return { exists: true, unchanged: true };
|
|
156
|
+
}
|
|
157
|
+
// 404 or any other status -- treat as not found
|
|
158
|
+
return failOpen;
|
|
159
|
+
} catch {
|
|
160
|
+
// Network error, timeout, etc. -- fail open
|
|
161
|
+
return failOpen;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
import type { FlushSummary, QueueOperations } from "../alpha-upload-contract.js";
|
|
15
|
-
import { uploadPushPayload } from "./client.js";
|
|
15
|
+
import { headRecord, uploadPushPayload } from "./client.js";
|
|
16
16
|
|
|
17
17
|
// ---------------------------------------------------------------------------
|
|
18
18
|
// Options
|
|
@@ -28,6 +28,8 @@ export interface FlushOptions {
|
|
|
28
28
|
dryRun?: boolean;
|
|
29
29
|
/** API key for Bearer auth on the cloud endpoint. */
|
|
30
30
|
apiKey?: string;
|
|
31
|
+
/** When set, run HEAD checks against this endpoint before pushing. */
|
|
32
|
+
headCheckEndpoint?: string;
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
// ---------------------------------------------------------------------------
|
|
@@ -85,8 +87,9 @@ export async function flushQueue(
|
|
|
85
87
|
const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
86
88
|
const dryRun = options?.dryRun ?? false;
|
|
87
89
|
const apiKey = options?.apiKey;
|
|
90
|
+
const headCheckEndpoint = options?.headCheckEndpoint;
|
|
88
91
|
|
|
89
|
-
const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0 };
|
|
92
|
+
const summary: FlushSummary = { sent: 0, failed: 0, skipped: 0, skipped_unchanged: 0 };
|
|
90
93
|
|
|
91
94
|
const items = queue.getPending(batchSize);
|
|
92
95
|
|
|
@@ -94,7 +97,44 @@ export async function flushQueue(
|
|
|
94
97
|
return summary;
|
|
95
98
|
}
|
|
96
99
|
|
|
100
|
+
// -- HEAD check phase: identify records that already exist unchanged ------
|
|
101
|
+
const unchangedIds = new Set<number>();
|
|
102
|
+
if (headCheckEndpoint) {
|
|
103
|
+
const headChecks = items.map(async (item) => {
|
|
104
|
+
try {
|
|
105
|
+
const parsed = JSON.parse(item.payload_json) as { push_id?: string };
|
|
106
|
+
const pushId = parsed.push_id;
|
|
107
|
+
if (!pushId) return { id: item.id, skip: false };
|
|
108
|
+
const result = await headRecord(headCheckEndpoint, pushId, undefined, apiKey);
|
|
109
|
+
return { id: item.id, skip: result.exists && result.unchanged };
|
|
110
|
+
} catch {
|
|
111
|
+
// Fail-open: if HEAD check itself errors, don't skip
|
|
112
|
+
return { id: item.id, skip: false };
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
const results = await Promise.allSettled(headChecks);
|
|
117
|
+
for (const result of results) {
|
|
118
|
+
if (result.status === "fulfilled" && result.value.skip) {
|
|
119
|
+
unchangedIds.add(result.value.id);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Mark unchanged items as sent in the queue without actually pushing
|
|
124
|
+
for (const item of items) {
|
|
125
|
+
if (unchangedIds.has(item.id)) {
|
|
126
|
+
if (!queue.markSending(item.id)) continue;
|
|
127
|
+
if (queue.markSent(item.id)) {
|
|
128
|
+
summary.skipped_unchanged++;
|
|
129
|
+
} else {
|
|
130
|
+
summary.failed++;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
97
136
|
for (const item of items) {
|
|
137
|
+
if (unchangedIds.has(item.id)) continue;
|
|
98
138
|
const markFailedSafely = (message: string): void => {
|
|
99
139
|
if (!queue.markFailed(item.id, message)) {
|
|
100
140
|
console.error(`[alpha upload] Failed to persist queue failure state for item ${item.id}`);
|
|
@@ -149,10 +189,11 @@ export async function flushQueue(
|
|
|
149
189
|
break;
|
|
150
190
|
}
|
|
151
191
|
|
|
152
|
-
// 409 Conflict = duplicate push_id
|
|
153
|
-
|
|
192
|
+
// 304 Not Modified = content unchanged (dedup), 409 Conflict = duplicate push_id
|
|
193
|
+
// Both are treated as success — the server already has this data.
|
|
194
|
+
if (status === 304 || status === 409) {
|
|
154
195
|
if (!queue.markSent(item.id)) {
|
|
155
|
-
markFailedSafely("local queue state update failed after duplicate upload");
|
|
196
|
+
markFailedSafely("local queue state update failed after duplicate/unchanged upload");
|
|
156
197
|
summary.failed++;
|
|
157
198
|
} else {
|
|
158
199
|
summary.sent++;
|
|
@@ -146,6 +146,18 @@ function extractNormalizedAt(record: CanonicalRecord): string {
|
|
|
146
146
|
return record.normalized_at;
|
|
147
147
|
}
|
|
148
148
|
|
|
149
|
+
// -- Content hashing ----------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Compute SHA256 hex digest of a string (for upload dedup).
|
|
153
|
+
* Uses Bun's built-in CryptoHasher for zero-dependency hashing.
|
|
154
|
+
*/
|
|
155
|
+
export function computeContentSha256(input: string): string {
|
|
156
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
157
|
+
hasher.update(input);
|
|
158
|
+
return hasher.digest("hex");
|
|
159
|
+
}
|
|
160
|
+
|
|
149
161
|
// -- Main staging function ----------------------------------------------------
|
|
150
162
|
|
|
151
163
|
/**
|
|
@@ -163,9 +175,12 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
163
175
|
const now = new Date().toISOString();
|
|
164
176
|
|
|
165
177
|
const stmt = db.prepare(`
|
|
166
|
-
INSERT
|
|
167
|
-
(record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at)
|
|
168
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
178
|
+
INSERT INTO canonical_upload_staging
|
|
179
|
+
(record_kind, record_id, record_json, session_id, prompt_id, normalized_at, staged_at, content_sha256)
|
|
180
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
181
|
+
ON CONFLICT(record_kind, record_id) DO UPDATE SET
|
|
182
|
+
content_sha256 = excluded.content_sha256
|
|
183
|
+
WHERE canonical_upload_staging.content_sha256 IS NULL AND excluded.content_sha256 IS NOT NULL
|
|
169
184
|
`);
|
|
170
185
|
|
|
171
186
|
// 1. Stage canonical records from SQLite (default) or JSONL (custom logPath override)
|
|
@@ -177,14 +192,16 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
177
192
|
: readAndEnrichCanonicalRecords(logPath);
|
|
178
193
|
for (const record of records) {
|
|
179
194
|
const recordId = extractRecordId(record);
|
|
195
|
+
const recordJson = JSON.stringify(record);
|
|
180
196
|
const result = stmt.run(
|
|
181
197
|
record.record_kind,
|
|
182
198
|
recordId,
|
|
183
|
-
|
|
199
|
+
recordJson,
|
|
184
200
|
extractSessionId(record),
|
|
185
201
|
extractPromptId(record),
|
|
186
202
|
extractNormalizedAt(record),
|
|
187
203
|
now,
|
|
204
|
+
computeContentSha256(recordJson),
|
|
188
205
|
);
|
|
189
206
|
if (result.changes > 0) staged++;
|
|
190
207
|
}
|
|
@@ -222,6 +239,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
222
239
|
null, // no prompt_id
|
|
223
240
|
entry.timestamp,
|
|
224
241
|
now,
|
|
242
|
+
computeContentSha256(recordJson),
|
|
225
243
|
);
|
|
226
244
|
if (result.changes > 0) staged++;
|
|
227
245
|
}
|
|
@@ -258,6 +276,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
258
276
|
null, // no prompt_id
|
|
259
277
|
run.timestamp,
|
|
260
278
|
now,
|
|
279
|
+
computeContentSha256(recordJson),
|
|
261
280
|
);
|
|
262
281
|
if (result.changes > 0) staged++;
|
|
263
282
|
}
|
|
@@ -298,6 +317,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
298
317
|
null, // no prompt_id
|
|
299
318
|
gr.graded_at,
|
|
300
319
|
now,
|
|
320
|
+
computeContentSha256(recordJson),
|
|
301
321
|
);
|
|
302
322
|
if (result.changes > 0) staged++;
|
|
303
323
|
}
|
|
@@ -332,6 +352,7 @@ export function stageCanonicalRecords(db: Database, logPath: string = CANONICAL_
|
|
|
332
352
|
null, // no prompt_id
|
|
333
353
|
sig.timestamp,
|
|
334
354
|
now,
|
|
355
|
+
computeContentSha256(recordJson),
|
|
335
356
|
);
|
|
336
357
|
if (result.changes > 0) staged++;
|
|
337
358
|
}
|
|
@@ -49,4 +49,13 @@ export interface FlushSummary {
|
|
|
49
49
|
sent: number;
|
|
50
50
|
failed: number;
|
|
51
51
|
skipped: number;
|
|
52
|
+
/** Records skipped because a HEAD check confirmed they already exist unchanged. */
|
|
53
|
+
skipped_unchanged: number;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// -- HEAD check result --------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
export interface HeadCheckResult {
|
|
59
|
+
exists: boolean;
|
|
60
|
+
unchanged: boolean;
|
|
52
61
|
}
|
|
@@ -41,6 +41,11 @@ export const ORCHESTRATE_RUN_LOG = join(LOG_DIR, "orchestrate_runs.jsonl");
|
|
|
41
41
|
export const SIGNAL_LOG = join(LOG_DIR, "improvement_signals.jsonl");
|
|
42
42
|
export const ORCHESTRATE_LOCK = join(LOG_DIR, ".orchestrate.lock");
|
|
43
43
|
|
|
44
|
+
/** Allow tests to override the orchestrate lock without mutating the host lock file. */
|
|
45
|
+
export function getOrchestrateLockPath(): string {
|
|
46
|
+
return process.env.SELFTUNE_ORCHESTRATE_LOCK_PATH || ORCHESTRATE_LOCK;
|
|
47
|
+
}
|
|
48
|
+
|
|
44
49
|
/** Evolution memory directory — human-readable session context that survives resets. */
|
|
45
50
|
export const MEMORY_DIR = join(SELFTUNE_CONFIG_DIR, "memory");
|
|
46
51
|
export const CONTEXT_PATH = join(MEMORY_DIR, "context.md");
|
|
@@ -162,17 +167,62 @@ export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
|
|
|
162
167
|
|
|
163
168
|
/** Regex patterns for detecting secrets that must be redacted. */
|
|
164
169
|
export const SECRET_PATTERNS = [
|
|
165
|
-
|
|
170
|
+
// -- API keys & tokens (platform-specific prefixes) --
|
|
171
|
+
/sk-[a-zA-Z0-9]{20,}/g, // OpenAI API keys
|
|
172
|
+
/sk-ant-[a-zA-Z0-9_-]{20,}/g, // Anthropic API keys
|
|
166
173
|
/ghp_[a-zA-Z0-9]{36,}/g, // GitHub personal access tokens
|
|
167
174
|
/gho_[a-zA-Z0-9]{36,}/g, // GitHub OAuth tokens
|
|
168
175
|
/github_pat_[a-zA-Z0-9_]{22,}/g, // GitHub fine-grained PATs
|
|
169
|
-
/
|
|
176
|
+
/npm_[a-zA-Z0-9]{36}/g, // npm tokens
|
|
177
|
+
/pypi-[a-zA-Z0-9]{36,}/g, // PyPI tokens
|
|
178
|
+
|
|
179
|
+
// -- AWS --
|
|
180
|
+
/AKIA[A-Z0-9]{16}/g, // AWS access key IDs (permanent)
|
|
181
|
+
/ASIA[A-Z0-9]{16}/g, // AWS temporary credentials (STS)
|
|
182
|
+
|
|
183
|
+
// -- GCP --
|
|
184
|
+
/AIza[0-9A-Za-z_-]{35}/g, // Google API key
|
|
185
|
+
|
|
186
|
+
// -- Stripe --
|
|
187
|
+
/(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{24,}/g, // Stripe secret/publishable/restricted keys
|
|
188
|
+
|
|
189
|
+
// -- Twilio --
|
|
190
|
+
/SK[a-f0-9]{32}/g, // Twilio API key
|
|
191
|
+
|
|
192
|
+
// -- SendGrid --
|
|
193
|
+
/SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}/g, // SendGrid API key
|
|
194
|
+
|
|
195
|
+
// -- Mailgun --
|
|
196
|
+
/key-[a-zA-Z0-9]{32}/g, // Mailgun API key
|
|
197
|
+
|
|
198
|
+
// -- Slack --
|
|
170
199
|
/xoxb-[a-zA-Z0-9-]+/g, // Slack bot tokens
|
|
171
200
|
/xoxp-[a-zA-Z0-9-]+/g, // Slack user tokens
|
|
172
201
|
/xoxs-[a-zA-Z0-9-]+/g, // Slack session tokens
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
/
|
|
202
|
+
|
|
203
|
+
// -- JWTs --
|
|
204
|
+
/eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/g, // JSON Web Tokens
|
|
205
|
+
|
|
206
|
+
// -- Private keys (PEM block headers) --
|
|
207
|
+
/-----BEGIN (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----[\s\S]*?-----END (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----/g, // PEM private key blocks (full multiline)
|
|
208
|
+
|
|
209
|
+
// -- Database connection URIs --
|
|
210
|
+
/(mongodb(\+srv)?|postgres(ql)?|mysql|mariadb|redis|rediss|amqp|amqps):\/\/[^\s"')]+/g, // DB URIs with credentials
|
|
211
|
+
|
|
212
|
+
// -- Azure --
|
|
213
|
+
/DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+/g, // Azure storage connection string
|
|
214
|
+
|
|
215
|
+
// -- Webhook URLs --
|
|
216
|
+
/https:\/\/discord(app)?\.com\/api\/webhooks\/[0-9]+\/[a-zA-Z0-9_-]+/g, // Discord webhook
|
|
217
|
+
/https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[a-zA-Z0-9]+/g, // Slack webhook
|
|
218
|
+
|
|
219
|
+
// -- SSH keys --
|
|
220
|
+
/ssh-(rsa|ed25519|ecdsa|dsa)\s+[A-Za-z0-9+/]{40,}[=]{0,3}/g, // SSH public key material
|
|
221
|
+
|
|
222
|
+
// -- Generic high-confidence patterns --
|
|
223
|
+
/Bearer\s+[a-zA-Z0-9_-]{20,}/g, // Bearer tokens in auth headers
|
|
224
|
+
/https?:\/\/[^:]+:[^@]+@[^\s"']+/g, // Basic auth embedded in URLs
|
|
225
|
+
/(?<![a-fA-F0-9])[a-fA-F0-9]{64,}(?![a-fA-F0-9])/g, // Long hex strings (64+ chars, likely secrets)
|
|
176
226
|
] as const;
|
|
177
227
|
|
|
178
228
|
/** Regex for file paths (Unix and Windows). */
|
|
@@ -184,6 +234,33 @@ export const EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\
|
|
|
184
234
|
/** Regex for IP addresses (v4). */
|
|
185
235
|
export const IP_PATTERN = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g;
|
|
186
236
|
|
|
237
|
+
// ---------------------------------------------------------------------------
|
|
238
|
+
// PII patterns — high-confidence, low-false-positive personally identifiable info
|
|
239
|
+
// ---------------------------------------------------------------------------
|
|
240
|
+
|
|
241
|
+
export const PII_PATTERNS = [
|
|
242
|
+
// -- Phone numbers --
|
|
243
|
+
/\+\d{1,3}\s?\d{1,4}\s?\d{1,4}\s?\d{1,9}/g, // E.164 intl: +1 555 123 4567, +44 20 7946 0958
|
|
244
|
+
/\b\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b/g, // US/CA phone: (555) 123-4567, 555-123-4567, 555.123.4567
|
|
245
|
+
|
|
246
|
+
// -- Credit card numbers (major networks, with optional separators) --
|
|
247
|
+
/\b4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Visa (starts with 4)
|
|
248
|
+
/\b5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Mastercard (51-55)
|
|
249
|
+
/\b3[47]\d{2}[\s-]?\d{6}[\s-]?\d{5}\b/g, // Amex (34/37)
|
|
250
|
+
/\b6(?:011|5\d{2})[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, // Discover (6011/65)
|
|
251
|
+
|
|
252
|
+
// -- SSN / national IDs --
|
|
253
|
+
/\b\d{3}-\d{2}-\d{4}\b/g, // US SSN: 123-45-6789
|
|
254
|
+
|
|
255
|
+
// -- IPv6 --
|
|
256
|
+
/\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g, // Full IPv6
|
|
257
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,7}:(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?(?!\w)/g, // Abbreviated IPv6 (with ::)
|
|
258
|
+
/::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\b/g, // Abbreviated IPv6 (leading ::1, ::ffff:...)
|
|
259
|
+
|
|
260
|
+
// -- Date of birth patterns (in structured contexts) --
|
|
261
|
+
/\b(?:dob|date\.of\.birth|birthday|born)\s*[:=]\s*\d{1,4}[-/]\d{1,2}[-/]\d{1,4}\b/gi, // DOB in key-value context
|
|
262
|
+
] as const;
|
|
263
|
+
|
|
187
264
|
/** Regex for camelCase/PascalCase identifiers longer than 8 chars (aggressive mode). */
|
|
188
265
|
export const IDENTIFIER_PATTERN = /\b[a-z][a-zA-Z0-9]{8,}\b|\b[A-Z][a-zA-Z0-9]{8,}\b/g;
|
|
189
266
|
|
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
IDENTIFIER_PATTERN,
|
|
16
16
|
IP_PATTERN,
|
|
17
17
|
MODULE_PATTERN,
|
|
18
|
+
PII_PATTERNS,
|
|
18
19
|
SECRET_PATTERNS,
|
|
19
20
|
} from "../constants.js";
|
|
20
21
|
import type { ContributionBundle } from "../types.js";
|
|
@@ -26,6 +27,49 @@ const UUID_PATTERN = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]
|
|
|
26
27
|
const DOUBLE_QUOTED_PATTERN = /"[^"]*"/g;
|
|
27
28
|
const SINGLE_QUOTED_PATTERN = /'[^']*'/g;
|
|
28
29
|
|
|
30
|
+
/** Apply a set of regex patterns to text, replacing matches with a token. Clones each regex to reset lastIndex. */
|
|
31
|
+
function applyPatterns(text: string, patterns: readonly RegExp[], token: string): string {
|
|
32
|
+
let result = text;
|
|
33
|
+
for (const pattern of patterns) {
|
|
34
|
+
result = result.replace(new RegExp(pattern.source, pattern.flags), token);
|
|
35
|
+
}
|
|
36
|
+
return result;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Secret-only sanitization (used by redactSecretsDeep for defense-in-depth)
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Apply only SECRET_PATTERNS redaction to a string.
|
|
45
|
+
* Lighter than sanitizeConservative — no path/email/IP/UUID replacement.
|
|
46
|
+
*/
|
|
47
|
+
export function sanitizeSecrets(text: string): string {
|
|
48
|
+
if (!text) return text;
|
|
49
|
+
return applyPatterns(text, SECRET_PATTERNS, "[SECRET]");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Recursively traverse a value and redact secrets in all string leaves.
|
|
54
|
+
* Non-string primitives, Dates, and other non-plain objects pass through unchanged.
|
|
55
|
+
* Does NOT mutate the input — returns a new structure.
|
|
56
|
+
*/
|
|
57
|
+
export function redactSecretsDeep<T>(value: T): T {
|
|
58
|
+
if (typeof value === "string") return sanitizeSecrets(value) as T;
|
|
59
|
+
if (Array.isArray(value)) return value.map((item) => redactSecretsDeep(item)) as T;
|
|
60
|
+
if (value && typeof value === "object" && !(value instanceof Date)) {
|
|
61
|
+
// Only recurse into plain objects — pass through Map, Set, RegExp, class instances, etc.
|
|
62
|
+
const proto = Object.getPrototypeOf(value);
|
|
63
|
+
if (proto !== null && proto !== Object.prototype) return value;
|
|
64
|
+
const result: Record<string, unknown> = {};
|
|
65
|
+
for (const [k, v] of Object.entries(value)) {
|
|
66
|
+
result[k] = redactSecretsDeep(v);
|
|
67
|
+
}
|
|
68
|
+
return result as T;
|
|
69
|
+
}
|
|
70
|
+
return value;
|
|
71
|
+
}
|
|
72
|
+
|
|
29
73
|
// ---------------------------------------------------------------------------
|
|
30
74
|
// Conservative sanitization
|
|
31
75
|
// ---------------------------------------------------------------------------
|
|
@@ -36,10 +80,10 @@ export function sanitizeConservative(text: string, projectName?: string): string
|
|
|
36
80
|
let result = text;
|
|
37
81
|
|
|
38
82
|
// Secrets first (longest/most specific patterns)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
83
|
+
result = applyPatterns(result, SECRET_PATTERNS, "[SECRET]");
|
|
84
|
+
|
|
85
|
+
// PII (phone numbers, credit cards, SSNs, IPv6, DOBs)
|
|
86
|
+
result = applyPatterns(result, PII_PATTERNS, "[PII]");
|
|
43
87
|
|
|
44
88
|
// File paths
|
|
45
89
|
result = result.replace(new RegExp(FILE_PATH_PATTERN.source, FILE_PATH_PATTERN.flags), "[PATH]");
|
|
@@ -123,7 +167,7 @@ export function sanitizeBundle(
|
|
|
123
167
|
level: "conservative" | "aggressive",
|
|
124
168
|
projectName?: string,
|
|
125
169
|
): ContributionBundle {
|
|
126
|
-
|
|
170
|
+
const fieldSanitized: ContributionBundle = {
|
|
127
171
|
...bundle,
|
|
128
172
|
sanitization_level: level,
|
|
129
173
|
positive_queries: bundle.positive_queries.map((q) => ({
|
|
@@ -151,6 +195,9 @@ export function sanitizeBundle(
|
|
|
151
195
|
}
|
|
152
196
|
: {}),
|
|
153
197
|
};
|
|
198
|
+
|
|
199
|
+
// Defense-in-depth: recursively redact any secrets that slipped through field-level sanitization
|
|
200
|
+
return redactSecretsDeep(fieldSanitized);
|
|
154
201
|
}
|
|
155
202
|
|
|
156
203
|
// ---------------------------------------------------------------------------
|