selftune 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +20 -10
- package/.claude/agents/evolution-reviewer.md +14 -1
- package/.claude/agents/integration-guide.md +18 -6
- package/.claude/agents/pattern-analyst.md +18 -5
- package/CHANGELOG.md +12 -4
- package/README.md +43 -35
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/cli/selftune/badge/badge-data.ts +1 -1
- package/cli/selftune/badge/badge.ts +4 -8
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +28 -0
- package/cli/selftune/contribute/contribute.ts +1 -1
- package/cli/selftune/cron/setup.ts +17 -17
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +653 -186
- package/cli/selftune/dashboard.ts +41 -176
- package/cli/selftune/eval/baseline.ts +5 -4
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/hooks-to-evals.ts +34 -15
- package/cli/selftune/eval/unit-test-cli.ts +1 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +105 -11
- package/cli/selftune/evolution/evolve.ts +371 -25
- package/cli/selftune/evolution/extract-patterns.ts +87 -29
- package/cli/selftune/evolution/rollback.ts +2 -2
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +448 -97
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +395 -116
- package/cli/selftune/ingestors/claude-replay.ts +140 -114
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +227 -14
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/monitoring/watch.ts +66 -15
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +48 -26
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +148 -0
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +78 -20
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +272 -26
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +21 -8
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +84 -53
- package/skill/Workflows/AutoActivation.md +17 -16
- package/skill/Workflows/Badge.md +6 -0
- package/skill/Workflows/Baseline.md +46 -23
- package/skill/Workflows/Composability.md +12 -5
- package/skill/Workflows/Contribute.md +17 -14
- package/skill/Workflows/Cron.md +56 -79
- package/skill/Workflows/Dashboard.md +45 -34
- package/skill/Workflows/Doctor.md +30 -17
- package/skill/Workflows/Evals.md +64 -40
- package/skill/Workflows/EvolutionMemory.md +2 -0
- package/skill/Workflows/Evolve.md +102 -47
- package/skill/Workflows/EvolveBody.md +6 -6
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +11 -5
- package/skill/Workflows/Ingest.md +43 -36
- package/skill/Workflows/Initialize.md +44 -30
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +39 -18
- package/skill/Workflows/Rollback.md +3 -3
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +34 -22
- package/skill/Workflows/Watch.md +14 -4
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +1 -1
- package/templates/multi-skill-settings.json +7 -7
- package/templates/single-skill-settings.json +6 -6
- package/dashboard/index.html +0 -1680
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@selftune/telemetry-contract",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"private": true,
|
|
5
|
+
"description": "Canonical telemetry schema, types, and validators for selftune",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"author": "Daniel Petro",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "git+https://github.com/selftune-dev/selftune.git",
|
|
12
|
+
"directory": "packages/telemetry-contract"
|
|
13
|
+
},
|
|
14
|
+
"exports": {
|
|
15
|
+
".": "./index.ts",
|
|
16
|
+
"./types": "./src/types.ts",
|
|
17
|
+
"./validators": "./src/validators.ts"
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
export const CANONICAL_SCHEMA_VERSION = "2.0" as const;
|
|
2
|
+
export type CanonicalSchemaVersion = typeof CANONICAL_SCHEMA_VERSION;
|
|
3
|
+
|
|
4
|
+
export const CANONICAL_PLATFORMS = ["claude_code", "codex", "opencode", "openclaw"] as const;
|
|
5
|
+
export type CanonicalPlatform = (typeof CANONICAL_PLATFORMS)[number];
|
|
6
|
+
|
|
7
|
+
export const CANONICAL_CAPTURE_MODES = [
|
|
8
|
+
"hook",
|
|
9
|
+
"replay",
|
|
10
|
+
"wrapper",
|
|
11
|
+
"batch_ingest",
|
|
12
|
+
"repair",
|
|
13
|
+
] as const;
|
|
14
|
+
export type CanonicalCaptureMode = (typeof CANONICAL_CAPTURE_MODES)[number];
|
|
15
|
+
|
|
16
|
+
export const CANONICAL_SOURCE_SESSION_KINDS = [
|
|
17
|
+
"interactive",
|
|
18
|
+
"replayed",
|
|
19
|
+
"synthetic",
|
|
20
|
+
"repaired",
|
|
21
|
+
] as const;
|
|
22
|
+
export type CanonicalSourceSessionKind = (typeof CANONICAL_SOURCE_SESSION_KINDS)[number];
|
|
23
|
+
|
|
24
|
+
export const CANONICAL_PROMPT_KINDS = [
|
|
25
|
+
"user",
|
|
26
|
+
"continuation",
|
|
27
|
+
"task_notification",
|
|
28
|
+
"teammate_message",
|
|
29
|
+
"system_instruction",
|
|
30
|
+
"tool_output",
|
|
31
|
+
"meta",
|
|
32
|
+
"unknown",
|
|
33
|
+
] as const;
|
|
34
|
+
export type CanonicalPromptKind = (typeof CANONICAL_PROMPT_KINDS)[number];
|
|
35
|
+
|
|
36
|
+
export const CANONICAL_INVOCATION_MODES = ["explicit", "implicit", "inferred", "repaired"] as const;
|
|
37
|
+
export type CanonicalInvocationMode = (typeof CANONICAL_INVOCATION_MODES)[number];
|
|
38
|
+
|
|
39
|
+
export const CANONICAL_COMPLETION_STATUSES = [
|
|
40
|
+
"completed",
|
|
41
|
+
"failed",
|
|
42
|
+
"interrupted",
|
|
43
|
+
"cancelled",
|
|
44
|
+
"unknown",
|
|
45
|
+
] as const;
|
|
46
|
+
export type CanonicalCompletionStatus = (typeof CANONICAL_COMPLETION_STATUSES)[number];
|
|
47
|
+
|
|
48
|
+
export const CANONICAL_RECORD_KINDS = [
|
|
49
|
+
"session",
|
|
50
|
+
"prompt",
|
|
51
|
+
"skill_invocation",
|
|
52
|
+
"execution_fact",
|
|
53
|
+
"normalization_run",
|
|
54
|
+
] as const;
|
|
55
|
+
export type CanonicalRecordKind = (typeof CANONICAL_RECORD_KINDS)[number];
|
|
56
|
+
|
|
57
|
+
export interface CanonicalRawSourceRef {
|
|
58
|
+
path?: string;
|
|
59
|
+
line?: number;
|
|
60
|
+
event_type?: string;
|
|
61
|
+
raw_id?: string;
|
|
62
|
+
metadata?: Record<string, unknown>;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface CanonicalRecordBase {
|
|
66
|
+
record_kind: CanonicalRecordKind;
|
|
67
|
+
schema_version: CanonicalSchemaVersion;
|
|
68
|
+
normalizer_version: string;
|
|
69
|
+
normalized_at: string;
|
|
70
|
+
platform: CanonicalPlatform;
|
|
71
|
+
capture_mode: CanonicalCaptureMode;
|
|
72
|
+
raw_source_ref: CanonicalRawSourceRef;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export interface CanonicalSessionRecordBase extends CanonicalRecordBase {
|
|
76
|
+
source_session_kind: CanonicalSourceSessionKind;
|
|
77
|
+
session_id: string;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface CanonicalSessionRecord extends CanonicalSessionRecordBase {
|
|
81
|
+
record_kind: "session";
|
|
82
|
+
started_at?: string;
|
|
83
|
+
ended_at?: string;
|
|
84
|
+
external_session_id?: string;
|
|
85
|
+
parent_session_id?: string;
|
|
86
|
+
agent_id?: string;
|
|
87
|
+
agent_type?: string;
|
|
88
|
+
agent_cli?: string;
|
|
89
|
+
session_key?: string;
|
|
90
|
+
channel?: string;
|
|
91
|
+
workspace_path?: string;
|
|
92
|
+
repo_root?: string;
|
|
93
|
+
repo_remote?: string;
|
|
94
|
+
branch?: string;
|
|
95
|
+
commit_sha?: string;
|
|
96
|
+
permission_mode?: string;
|
|
97
|
+
approval_policy?: string;
|
|
98
|
+
sandbox_policy?: string;
|
|
99
|
+
provider?: string;
|
|
100
|
+
model?: string;
|
|
101
|
+
completion_status?: CanonicalCompletionStatus;
|
|
102
|
+
end_reason?: string;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export interface CanonicalPromptRecord extends CanonicalSessionRecordBase {
|
|
106
|
+
record_kind: "prompt";
|
|
107
|
+
prompt_id: string;
|
|
108
|
+
occurred_at: string;
|
|
109
|
+
prompt_text: string;
|
|
110
|
+
prompt_hash?: string;
|
|
111
|
+
prompt_kind: CanonicalPromptKind;
|
|
112
|
+
is_actionable: boolean;
|
|
113
|
+
prompt_index?: number;
|
|
114
|
+
parent_prompt_id?: string;
|
|
115
|
+
source_message_id?: string;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export interface CanonicalSkillInvocationRecord extends CanonicalSessionRecordBase {
|
|
119
|
+
record_kind: "skill_invocation";
|
|
120
|
+
skill_invocation_id: string;
|
|
121
|
+
occurred_at: string;
|
|
122
|
+
matched_prompt_id?: string;
|
|
123
|
+
skill_name: string;
|
|
124
|
+
skill_path?: string;
|
|
125
|
+
skill_version_hash?: string;
|
|
126
|
+
invocation_mode: CanonicalInvocationMode;
|
|
127
|
+
triggered: boolean;
|
|
128
|
+
confidence: number;
|
|
129
|
+
tool_name?: string;
|
|
130
|
+
tool_call_id?: string;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase {
|
|
134
|
+
record_kind: "execution_fact";
|
|
135
|
+
occurred_at: string;
|
|
136
|
+
prompt_id?: string;
|
|
137
|
+
tool_calls_json: Record<string, number>;
|
|
138
|
+
total_tool_calls: number;
|
|
139
|
+
bash_commands_redacted: string[];
|
|
140
|
+
assistant_turns: number;
|
|
141
|
+
errors_encountered: number;
|
|
142
|
+
input_tokens?: number;
|
|
143
|
+
output_tokens?: number;
|
|
144
|
+
duration_ms?: number;
|
|
145
|
+
completion_status?: CanonicalCompletionStatus;
|
|
146
|
+
end_reason?: string;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
export interface CanonicalNormalizationRunRecord extends CanonicalRecordBase {
|
|
150
|
+
record_kind: "normalization_run";
|
|
151
|
+
run_id: string;
|
|
152
|
+
run_at: string;
|
|
153
|
+
raw_records_seen: number;
|
|
154
|
+
canonical_records_written: number;
|
|
155
|
+
repair_applied: boolean;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export type CanonicalRecord =
|
|
159
|
+
| CanonicalSessionRecord
|
|
160
|
+
| CanonicalPromptRecord
|
|
161
|
+
| CanonicalSkillInvocationRecord
|
|
162
|
+
| CanonicalExecutionFactRecord
|
|
163
|
+
| CanonicalNormalizationRunRecord;
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import {
|
|
2
|
+
CANONICAL_CAPTURE_MODES,
|
|
3
|
+
CANONICAL_COMPLETION_STATUSES,
|
|
4
|
+
CANONICAL_INVOCATION_MODES,
|
|
5
|
+
CANONICAL_PLATFORMS,
|
|
6
|
+
CANONICAL_PROMPT_KINDS,
|
|
7
|
+
CANONICAL_RECORD_KINDS,
|
|
8
|
+
CANONICAL_SCHEMA_VERSION,
|
|
9
|
+
CANONICAL_SOURCE_SESSION_KINDS,
|
|
10
|
+
type CanonicalRawSourceRef,
|
|
11
|
+
type CanonicalRecord,
|
|
12
|
+
} from "./types.js";
|
|
13
|
+
|
|
14
|
+
function isObject(value: unknown): value is Record<string, unknown> {
|
|
15
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function hasString(value: Record<string, unknown>, key: string): boolean {
|
|
19
|
+
return typeof value[key] === "string" && value[key].length > 0;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function includesValue<T extends readonly string[]>(values: T, value: unknown): value is T[number] {
|
|
23
|
+
return typeof value === "string" && values.includes(value);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function isFiniteNumber(value: unknown): value is number {
|
|
27
|
+
return typeof value === "number" && Number.isFinite(value);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function isStringArray(value: unknown): value is string[] {
|
|
31
|
+
return Array.isArray(value) && value.every((item) => typeof item === "string");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function isNumberRecord(value: unknown): value is Record<string, number> {
|
|
35
|
+
return isObject(value) && Object.values(value).every(isFiniteNumber);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function hasSessionScope(value: Record<string, unknown>): boolean {
|
|
39
|
+
return (
|
|
40
|
+
includesValue(CANONICAL_SOURCE_SESSION_KINDS, value.source_session_kind) &&
|
|
41
|
+
hasString(value, "session_id")
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function isCanonicalRawSourceRef(value: unknown): value is CanonicalRawSourceRef {
|
|
46
|
+
return isObject(value);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function isCanonicalRecord(value: unknown): value is CanonicalRecord {
|
|
50
|
+
if (!isObject(value)) return false;
|
|
51
|
+
if (value.schema_version !== CANONICAL_SCHEMA_VERSION) return false;
|
|
52
|
+
if (!includesValue(CANONICAL_RECORD_KINDS, value.record_kind)) return false;
|
|
53
|
+
if (!includesValue(CANONICAL_PLATFORMS, value.platform)) return false;
|
|
54
|
+
if (!includesValue(CANONICAL_CAPTURE_MODES, value.capture_mode)) return false;
|
|
55
|
+
if (!hasString(value, "normalizer_version")) return false;
|
|
56
|
+
if (!hasString(value, "normalized_at")) return false;
|
|
57
|
+
if (!isCanonicalRawSourceRef(value.raw_source_ref)) return false;
|
|
58
|
+
|
|
59
|
+
switch (value.record_kind) {
|
|
60
|
+
case "session":
|
|
61
|
+
return (
|
|
62
|
+
hasSessionScope(value) &&
|
|
63
|
+
(value.completion_status === undefined ||
|
|
64
|
+
includesValue(CANONICAL_COMPLETION_STATUSES, value.completion_status))
|
|
65
|
+
);
|
|
66
|
+
case "prompt":
|
|
67
|
+
return (
|
|
68
|
+
hasSessionScope(value) &&
|
|
69
|
+
hasString(value, "prompt_id") &&
|
|
70
|
+
hasString(value, "occurred_at") &&
|
|
71
|
+
hasString(value, "prompt_text") &&
|
|
72
|
+
includesValue(CANONICAL_PROMPT_KINDS, value.prompt_kind) &&
|
|
73
|
+
typeof value.is_actionable === "boolean"
|
|
74
|
+
);
|
|
75
|
+
case "skill_invocation":
|
|
76
|
+
return (
|
|
77
|
+
hasSessionScope(value) &&
|
|
78
|
+
hasString(value, "skill_invocation_id") &&
|
|
79
|
+
hasString(value, "occurred_at") &&
|
|
80
|
+
(value.matched_prompt_id === undefined || hasString(value, "matched_prompt_id")) &&
|
|
81
|
+
hasString(value, "skill_name") &&
|
|
82
|
+
includesValue(CANONICAL_INVOCATION_MODES, value.invocation_mode) &&
|
|
83
|
+
typeof value.triggered === "boolean" &&
|
|
84
|
+
isFiniteNumber(value.confidence)
|
|
85
|
+
);
|
|
86
|
+
case "execution_fact":
|
|
87
|
+
return (
|
|
88
|
+
hasSessionScope(value) &&
|
|
89
|
+
hasString(value, "occurred_at") &&
|
|
90
|
+
isNumberRecord(value.tool_calls_json) &&
|
|
91
|
+
isFiniteNumber(value.total_tool_calls) &&
|
|
92
|
+
isStringArray(value.bash_commands_redacted) &&
|
|
93
|
+
isFiniteNumber(value.assistant_turns) &&
|
|
94
|
+
isFiniteNumber(value.errors_encountered) &&
|
|
95
|
+
(value.completion_status === undefined ||
|
|
96
|
+
includesValue(CANONICAL_COMPLETION_STATUSES, value.completion_status))
|
|
97
|
+
);
|
|
98
|
+
case "normalization_run":
|
|
99
|
+
return (
|
|
100
|
+
hasString(value, "run_id") &&
|
|
101
|
+
hasString(value, "run_at") &&
|
|
102
|
+
isFiniteNumber(value.raw_records_seen) &&
|
|
103
|
+
isFiniteNumber(value.canonical_records_written) &&
|
|
104
|
+
typeof value.repair_applied === "boolean"
|
|
105
|
+
);
|
|
106
|
+
default:
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
}
|
package/skill/SKILL.md
CHANGED
|
@@ -19,6 +19,11 @@ description: >
|
|
|
19
19
|
Observe real agent sessions, detect missed triggers, grade execution quality,
|
|
20
20
|
and evolve skill descriptions toward the language real users actually use.
|
|
21
21
|
|
|
22
|
+
**You are the operator.** The user installed this skill so YOU can manage their
|
|
23
|
+
skill health autonomously. They will say things like "set up selftune",
|
|
24
|
+
"improve my skills", or "how are my skills doing?" — and you route to the
|
|
25
|
+
correct workflow below. The user does not run CLI commands directly; you do.
|
|
26
|
+
|
|
22
27
|
## Bootstrap
|
|
23
28
|
|
|
24
29
|
If `~/.selftune/config.json` does not exist, read `Workflows/Initialize.md`
|
|
@@ -32,63 +37,75 @@ selftune <command> [options]
|
|
|
32
37
|
```
|
|
33
38
|
|
|
34
39
|
Most commands output deterministic JSON. Parse JSON output for machine-readable commands.
|
|
35
|
-
`selftune dashboard` is an exception:
|
|
36
|
-
informational progress lines.
|
|
40
|
+
`selftune dashboard` is an exception: `--export` generates an HTML artifact, while
|
|
41
|
+
`--serve` starts a local server; both may print informational progress lines.
|
|
37
42
|
|
|
38
43
|
## Quick Reference
|
|
39
44
|
|
|
40
45
|
```bash
|
|
41
|
-
|
|
42
|
-
selftune
|
|
43
|
-
selftune
|
|
44
|
-
selftune
|
|
46
|
+
# Ingest group
|
|
47
|
+
selftune ingest claude [--since DATE] [--dry-run] [--force] [--verbose]
|
|
48
|
+
selftune ingest codex # (experimental)
|
|
49
|
+
selftune ingest opencode # (experimental)
|
|
50
|
+
selftune ingest openclaw [--agents-dir PATH] [--since DATE] [--dry-run] [--force] [--verbose] # (experimental)
|
|
51
|
+
selftune ingest wrap-codex -- <codex args> # (experimental)
|
|
52
|
+
|
|
53
|
+
# Grade group
|
|
54
|
+
selftune grade auto --skill <name> [--expectations "..."] [--agent <name>]
|
|
55
|
+
selftune grade baseline --skill <name> --skill-path <path> [--eval-set <path>] [--agent <name>]
|
|
56
|
+
|
|
57
|
+
# Evolve group
|
|
58
|
+
selftune evolve --skill <name> --skill-path <path> [--dry-run]
|
|
59
|
+
selftune evolve body --skill <name> --skill-path <path> --target <routing_table|full_body> [--dry-run]
|
|
60
|
+
selftune evolve rollback --skill <name> --skill-path <path> [--proposal-id <id>]
|
|
61
|
+
|
|
62
|
+
# Eval group
|
|
63
|
+
selftune eval generate --skill <name> [--list-skills] [--stats] [--max N]
|
|
64
|
+
selftune eval unit-test --skill <name> --tests <path> [--run-agent] [--generate]
|
|
65
|
+
selftune eval import --dir <path> --skill <name> --output <path> [--match-strategy exact|fuzzy]
|
|
66
|
+
selftune eval composability --skill <name> [--window N] [--telemetry-log <path>]
|
|
67
|
+
|
|
68
|
+
# Other commands
|
|
45
69
|
selftune watch --skill <name> --skill-path <path> [--auto-rollback]
|
|
46
70
|
selftune status
|
|
47
71
|
selftune last
|
|
48
72
|
selftune doctor
|
|
49
73
|
selftune dashboard [--export] [--out FILE] [--serve]
|
|
50
|
-
selftune
|
|
51
|
-
selftune ingest-opencode
|
|
52
|
-
selftune ingest-openclaw [--agents-dir PATH] [--since DATE] [--dry-run] [--force] [--verbose]
|
|
53
|
-
selftune wrap-codex -- <codex args>
|
|
54
|
-
selftune replay [--since DATE] [--dry-run] [--force] [--verbose]
|
|
74
|
+
selftune dashboard --serve [--port <port>]
|
|
55
75
|
selftune contribute [--skill NAME] [--preview] [--sanitize LEVEL] [--submit]
|
|
56
|
-
selftune cron setup [--dry-run]
|
|
76
|
+
selftune cron setup [--dry-run] # auto-detect platform (cron/launchd/systemd)
|
|
77
|
+
selftune cron setup --platform openclaw [--dry-run] [--tz <timezone>] # OpenClaw-specific
|
|
57
78
|
selftune cron list
|
|
58
79
|
selftune cron remove [--dry-run]
|
|
59
|
-
selftune dashboard --serve [--port <port>]
|
|
60
|
-
selftune evolve-body --skill <name> --skill-path <path> --target <routing_table|full_body> [--dry-run]
|
|
61
|
-
selftune baseline --skill <name> --skill-path <path> [--eval-set <path>] [--agent <name>]
|
|
62
|
-
selftune unit-test --skill <name> --tests <path> [--run-agent] [--generate]
|
|
63
|
-
selftune composability --skill <name> [--window N] [--telemetry-log <path>]
|
|
64
|
-
selftune import-skillsbench --dir <path> --skill <name> --output <path> [--match-strategy exact|fuzzy]
|
|
65
80
|
```
|
|
66
81
|
|
|
67
82
|
## Workflow Routing
|
|
68
83
|
|
|
69
84
|
| Trigger keywords | Workflow | File |
|
|
70
85
|
|------------------|----------|------|
|
|
71
|
-
| grade, score, evaluate, assess session | Grade | Workflows/Grade.md |
|
|
72
|
-
| evals, eval set, undertriggering, skill stats | Evals | Workflows/Evals.md |
|
|
73
|
-
| evolve, improve, triggers, catch more queries | Evolve | Workflows/Evolve.md |
|
|
74
|
-
| rollback, undo, restore, revert evolution | Rollback | Workflows/Rollback.md |
|
|
75
|
-
| watch, monitor, regression, post-deploy, performing | Watch | Workflows/Watch.md |
|
|
76
|
-
| doctor, health, hooks, broken, diagnose | Doctor | Workflows/Doctor.md |
|
|
77
|
-
| ingest, import, codex logs, opencode, openclaw, wrap codex | Ingest | Workflows/Ingest.md |
|
|
78
|
-
|
|
|
79
|
-
| contribute, share, community, export data, anonymized | Contribute | Workflows/Contribute.md |
|
|
80
|
-
| init, setup, bootstrap, first time | Initialize | Workflows/Initialize.md |
|
|
81
|
-
| cron, schedule, autonomous, automate evolution | Cron | Workflows/Cron.md |
|
|
86
|
+
| grade, score, evaluate, assess session, auto-grade | Grade † | Workflows/Grade.md |
|
|
87
|
+
| evals, eval set, undertriggering, skill stats, eval generate | Evals | Workflows/Evals.md |
|
|
88
|
+
| evolve, improve, optimize skills, make skills better, triggers, catch more queries | Evolve † | Workflows/Evolve.md |
|
|
89
|
+
| evolve rollback, undo, restore, revert evolution, go back, undo last change | Rollback | Workflows/Rollback.md |
|
|
90
|
+
| watch, monitor, regression, post-deploy, performing, keep an eye on | Watch † | Workflows/Watch.md |
|
|
91
|
+
| doctor, health, hooks, broken, diagnose, not working, something wrong | Doctor | Workflows/Doctor.md |
|
|
92
|
+
| ingest, import, codex logs, opencode, openclaw, wrap codex, ingest claude | Ingest † | Workflows/Ingest.md |
|
|
93
|
+
| ingest claude, backfill, claude transcripts, historical sessions | Replay | Workflows/Replay.md |
|
|
94
|
+
| contribute, share, community, export data, anonymized, give back, help others | Contribute | Workflows/Contribute.md |
|
|
95
|
+
| init, setup, set up, bootstrap, first time, install, configure selftune | Initialize | Workflows/Initialize.md |
|
|
96
|
+
| cron, schedule, autonomous, automate evolution, run automatically, run on its own | Cron | Workflows/Cron.md |
|
|
82
97
|
| auto-activate, suggestions, activation rules, nag, why suggest | AutoActivation | Workflows/AutoActivation.md |
|
|
83
|
-
| dashboard, visual, open dashboard, skill grid, serve dashboard, live dashboard | Dashboard | Workflows/Dashboard.md |
|
|
98
|
+
| dashboard, visual, open dashboard, show dashboard, skill grid, serve dashboard, live dashboard | Dashboard | Workflows/Dashboard.md |
|
|
84
99
|
| evolution memory, context memory, session continuity, what happened last | EvolutionMemory | Workflows/EvolutionMemory.md |
|
|
85
100
|
| evolve body, evolve routing, full body evolution, rewrite skill, teacher student | EvolveBody | Workflows/EvolveBody.md |
|
|
86
|
-
| baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | Workflows/Baseline.md |
|
|
87
|
-
| unit
|
|
88
|
-
| composability, co-occurrence, skill conflicts, skills together, conflict score | Composability | Workflows/Composability.md |
|
|
89
|
-
| import
|
|
90
|
-
| status, health summary, skill health, pass rates, how are skills | Status | *(direct command — no workflow file)* |
|
|
91
|
-
| last, last session, recent session, what happened | Last | *(direct command — no workflow file)* |
|
|
101
|
+
| grade baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | Workflows/Baseline.md |
|
|
102
|
+
| eval unit-test, skill test, test skill, generate tests, run tests, assertions | UnitTest | Workflows/UnitTest.md |
|
|
103
|
+
| eval composability, co-occurrence, skill conflicts, skills together, conflict score | Composability | Workflows/Composability.md |
|
|
104
|
+
| eval import, skillsbench, external evals, benchmark tasks, import corpus | ImportSkillsBench | Workflows/ImportSkillsBench.md |
|
|
105
|
+
| status, health summary, skill health, pass rates, how are skills, skills working, skills doing, run selftune, start selftune | Status | *(direct command — no workflow file)* |
|
|
106
|
+
| last, last session, recent session, what happened, what changed, what did selftune do | Last | *(direct command — no workflow file)* |
|
|
107
|
+
|
|
108
|
+
Workflows marked with † also run autonomously via `selftune orchestrate` without user interaction.
|
|
92
109
|
|
|
93
110
|
## Interactive Configuration
|
|
94
111
|
|
|
@@ -124,25 +141,27 @@ not a mandatory gate.
|
|
|
124
141
|
### Workflows That Skip Pre-Flight
|
|
125
142
|
|
|
126
143
|
These read-only or simple workflows run immediately without prompting:
|
|
127
|
-
`status`, `last`, `doctor`, `dashboard`, `watch`, `rollback`,
|
|
128
|
-
`
|
|
129
|
-
`import
|
|
144
|
+
`status`, `last`, `doctor`, `dashboard`, `watch`, `evolve rollback`,
|
|
145
|
+
`grade auto`, `ingest *`, `contribute`, `cron`, `eval composability`,
|
|
146
|
+
`eval unit-test`, `eval import`.
|
|
130
147
|
|
|
131
148
|
## The Feedback Loop
|
|
132
149
|
|
|
133
|
-
```
|
|
134
|
-
Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
|
|
150
|
+
```text
|
|
151
|
+
Observe --> Detect --> Diagnose --> Propose --> Validate --> Audit --> Deploy --> Watch --> Rollback
|
|
135
152
|
| |
|
|
136
153
|
+--------------------------------------------------------------------+
|
|
137
154
|
```
|
|
138
155
|
|
|
139
156
|
1. **Observe** -- Hooks capture every session (queries, triggers, metrics)
|
|
140
|
-
2. **Detect** -- `
|
|
141
|
-
3. **Diagnose** -- `grade` evaluates session quality with evidence
|
|
142
|
-
4. **Propose** -- `evolve` generates description improvements
|
|
157
|
+
2. **Detect** -- `selftune eval generate` extracts missed-trigger patterns across invocation types
|
|
158
|
+
3. **Diagnose** -- `selftune grade` evaluates session quality with evidence
|
|
159
|
+
4. **Propose** -- `selftune evolve` generates description improvements
|
|
143
160
|
5. **Validate** -- Evolution is tested against the eval set
|
|
144
|
-
6. **
|
|
145
|
-
7. **
|
|
161
|
+
6. **Audit** -- Persist proposal, evidence, and decision metadata for traceability
|
|
162
|
+
7. **Deploy** -- Updated description replaces the original (with backup)
|
|
163
|
+
8. **Watch** -- `selftune watch` monitors for regressions post-deploy
|
|
164
|
+
9. **Rollback** -- `selftune evolve rollback` restores the previous version when regressions are detected
|
|
146
165
|
|
|
147
166
|
## Resource Index
|
|
148
167
|
|
|
@@ -163,7 +182,7 @@ Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
|
|
|
163
182
|
| `Workflows/Ingest.md` | Import sessions from Codex, OpenCode, and OpenClaw |
|
|
164
183
|
| `Workflows/Replay.md` | Backfill logs from Claude Code transcripts |
|
|
165
184
|
| `Workflows/Contribute.md` | Export anonymized data for community contribution |
|
|
166
|
-
| `Workflows/Cron.md` |
|
|
185
|
+
| `Workflows/Cron.md` | Scheduling & automation (cron/launchd/systemd/OpenClaw) |
|
|
167
186
|
| `Workflows/AutoActivation.md` | Auto-activation hook behavior and rules |
|
|
168
187
|
| `Workflows/Dashboard.md` | Dashboard modes: static, export, live server |
|
|
169
188
|
| `Workflows/EvolutionMemory.md` | Evolution memory system for session continuity |
|
|
@@ -178,12 +197,12 @@ Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
|
|
|
178
197
|
selftune provides focused agents for deeper analysis. These live in
|
|
179
198
|
`.claude/agents/` and can be spawned as subagents for specialized tasks.
|
|
180
199
|
|
|
181
|
-
| Trigger keywords | Agent | Purpose |
|
|
182
|
-
|
|
183
|
-
| diagnose, root cause, why failing, skill failure, debug performance | diagnosis-analyst | Deep-dive analysis of underperforming skills |
|
|
184
|
-
| patterns, conflicts, cross-skill, overlap, trigger conflicts, optimize skills | pattern-analyst | Cross-skill pattern analysis and conflict detection |
|
|
185
|
-
| review evolution, check proposal, safe to deploy, approve evolution | evolution-reviewer | Safety gate review of pending evolution proposals |
|
|
186
|
-
| set up selftune, integrate, configure project, install selftune | integration-guide | Guided interactive setup for specific project types |
|
|
200
|
+
| Trigger keywords | Agent | Purpose | When to spawn |
|
|
201
|
+
|------------------|-------|---------|---------------|
|
|
202
|
+
| diagnose, root cause, why failing, skill failure, debug performance | diagnosis-analyst | Deep-dive analysis of underperforming skills | After doctor finds persistent issues, grades are consistently low, or status shows CRITICAL/WARNING |
|
|
203
|
+
| patterns, conflicts, cross-skill, overlap, trigger conflicts, optimize skills | pattern-analyst | Cross-skill pattern analysis and conflict detection | When user asks about cross-skill conflicts or composability scores indicate moderate-to-severe conflicts |
|
|
204
|
+
| review evolution, check proposal, safe to deploy, approve evolution | evolution-reviewer | Safety gate review of pending evolution proposals | Before deploying an evolution in interactive mode, especially for high-stakes or low-confidence proposals |
|
|
205
|
+
| set up selftune, integrate, configure project, install selftune | integration-guide | Guided interactive setup for specific project types | For complex project structures (monorepo, multi-skill, mixed agent platforms) |
|
|
187
206
|
|
|
188
207
|
## Examples
|
|
189
208
|
|
|
@@ -227,6 +246,18 @@ selftune provides focused agents for deeper analysis. These live in
|
|
|
227
246
|
- "Which skills conflict with each other?"
|
|
228
247
|
- "Analyze composability for the Research skill"
|
|
229
248
|
- "Import SkillsBench tasks for my skill"
|
|
249
|
+
- "Install selftune"
|
|
250
|
+
- "Configure selftune for this project"
|
|
251
|
+
- "Make my skills better"
|
|
252
|
+
- "Optimize my skills"
|
|
253
|
+
- "Are my skills working?"
|
|
254
|
+
- "Show me the dashboard"
|
|
255
|
+
- "What changed since last time?"
|
|
256
|
+
- "What did selftune do?"
|
|
257
|
+
- "Run selftune"
|
|
258
|
+
- "Start selftune"
|
|
259
|
+
- "Go back to the previous version"
|
|
260
|
+
- "Undo the last change"
|
|
230
261
|
|
|
231
262
|
## Negative Examples
|
|
232
263
|
|
|
@@ -40,7 +40,7 @@ Detection scans all hook entries in settings for any command containing
|
|
|
40
40
|
| `post-session-diagnostic` | Suggest diagnostic review | >2 unmatched queries in current session | `selftune last` |
|
|
41
41
|
| `grading-threshold-breach` | Suggest evolution | Session pass rate < 0.6 (60%) | `selftune evolve` |
|
|
42
42
|
| `stale-evolution` | Suggest evolution | >7 days since last evolution AND pending false negatives exist | `selftune evolve` |
|
|
43
|
-
| `regression-detected` | Suggest rollback | Watch snapshot shows `regression_detected: true` | `selftune rollback` |
|
|
43
|
+
| `regression-detected` | Suggest rollback | Watch snapshot shows `regression_detected: true` | `selftune evolve rollback` |
|
|
44
44
|
|
|
45
45
|
### Rule Details
|
|
46
46
|
|
|
@@ -121,24 +121,25 @@ Delete or comment out the entry to disable all auto-activation suggestions.
|
|
|
121
121
|
|
|
122
122
|
## Common Patterns
|
|
123
123
|
|
|
124
|
-
**
|
|
125
|
-
> Remove the auto-activate hook from settings
|
|
126
|
-
>
|
|
124
|
+
**User wants to disable auto-suggestions**
|
|
125
|
+
> Remove the auto-activate hook entry from `~/.claude/settings.json`
|
|
126
|
+
> (see Disabling section above). Each rule fires at most once per session.
|
|
127
127
|
|
|
128
|
-
**
|
|
129
|
-
>
|
|
130
|
-
>
|
|
128
|
+
**User asks why selftune suggestions appear**
|
|
129
|
+
> Explain that the auto-activate hook detected an actionable condition.
|
|
130
|
+
> Parse the suggestion text to identify which rule fired and report the
|
|
131
|
+
> recommended action.
|
|
131
132
|
|
|
132
|
-
**
|
|
133
|
+
**Suggestions are not appearing when expected**
|
|
133
134
|
> Run `selftune doctor` to verify the hook is installed. Check that
|
|
134
135
|
> `UserPromptSubmit` includes the auto-activate hook in settings.
|
|
135
136
|
|
|
136
|
-
**
|
|
137
|
-
> Verify PAI's `skill-activation-prompt` hook is in settings.
|
|
138
|
-
>
|
|
137
|
+
**PAI coexistence conflict**
|
|
138
|
+
> Verify PAI's `skill-activation-prompt` hook is in `~/.claude/settings.json`.
|
|
139
|
+
> If present, selftune skips all suggestions automatically. If the user
|
|
140
|
+
> sees duplicates, one of the two hooks is misconfigured.
|
|
139
141
|
|
|
140
|
-
**
|
|
141
|
-
>
|
|
142
|
-
>
|
|
143
|
-
>
|
|
144
|
-
> rules file.
|
|
142
|
+
**User wants custom activation rules**
|
|
143
|
+
> Direct the user to `cli/selftune/activation-rules.ts`. New rules must
|
|
144
|
+
> conform to the `ActivationRule` interface: pure filesystem readers with
|
|
145
|
+
> no network calls or heavy imports.
|