selftune 0.2.22 → 0.2.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +95 -15
- package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
- package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
- package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
- package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/adapters/codex/install.ts +310 -78
- package/cli/selftune/adapters/opencode/install.ts +3 -4
- package/cli/selftune/adapters/pi/hook.ts +273 -0
- package/cli/selftune/adapters/pi/install.ts +207 -0
- package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
- package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
- package/cli/selftune/auto-update.ts +200 -8
- package/cli/selftune/canonical-export.ts +55 -25
- package/cli/selftune/command-surface.ts +397 -0
- package/cli/selftune/constants.ts +10 -1
- package/cli/selftune/contribute/contribute.ts +64 -13
- package/cli/selftune/contribution-config.ts +57 -3
- package/cli/selftune/contribution-preferences.ts +117 -0
- package/cli/selftune/contribution-signals.ts +8 -4
- package/cli/selftune/contribution-staging.ts +13 -2
- package/cli/selftune/contributions.ts +55 -121
- package/cli/selftune/creator-contributions.ts +29 -10
- package/cli/selftune/cron/setup.ts +7 -3
- package/cli/selftune/dashboard-contract.ts +87 -0
- package/cli/selftune/dashboard-server.ts +168 -17
- package/cli/selftune/dashboard.ts +350 -17
- package/cli/selftune/eval/baseline.ts +21 -5
- package/cli/selftune/eval/execution-eval.ts +170 -0
- package/cli/selftune/eval/family-overlap.ts +2 -2
- package/cli/selftune/eval/hooks-to-evals.ts +228 -82
- package/cli/selftune/eval/import-skillsbench.ts +2 -2
- package/cli/selftune/eval/invocation-classifier.ts +56 -0
- package/cli/selftune/eval/synthetic-evals.ts +5 -3
- package/cli/selftune/eval/unit-test-cli.ts +7 -4
- package/cli/selftune/evolution/apply-proposal.ts +295 -0
- package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
- package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
- package/cli/selftune/evolution/evidence.ts +2 -6
- package/cli/selftune/evolution/evolve-body.ts +152 -38
- package/cli/selftune/evolution/evolve.ts +244 -52
- package/cli/selftune/evolution/rollback.ts +0 -1
- package/cli/selftune/evolution/validate-body.ts +111 -49
- package/cli/selftune/evolution/validate-host-replay.ts +510 -60
- package/cli/selftune/evolution/validate-proposal.ts +11 -150
- package/cli/selftune/evolution/validate-routing.ts +51 -108
- package/cli/selftune/evolution/validation-contract.ts +91 -0
- package/cli/selftune/grading/auto-grade.ts +11 -7
- package/cli/selftune/grading/grade-session.ts +10 -16
- package/cli/selftune/hooks/skill-eval.ts +2 -1
- package/cli/selftune/hooks-shared/types.ts +1 -0
- package/cli/selftune/index.ts +58 -15
- package/cli/selftune/ingestors/claude-replay.ts +15 -10
- package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
- package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
- package/cli/selftune/ingestors/pi-ingest.ts +727 -0
- package/cli/selftune/init.ts +38 -4
- package/cli/selftune/localdb/direct-write.ts +120 -1
- package/cli/selftune/localdb/materialize.ts +6 -7
- package/cli/selftune/localdb/queries/cron.ts +34 -0
- package/cli/selftune/localdb/queries/dashboard.ts +834 -0
- package/cli/selftune/localdb/queries/evolution.ts +158 -0
- package/cli/selftune/localdb/queries/execution.ts +133 -0
- package/cli/selftune/localdb/queries/json.ts +18 -0
- package/cli/selftune/localdb/queries/monitoring.ts +263 -0
- package/cli/selftune/localdb/queries/raw.ts +95 -0
- package/cli/selftune/localdb/queries/staging.ts +270 -0
- package/cli/selftune/localdb/queries/trust.ts +392 -0
- package/cli/selftune/localdb/queries.ts +60 -2162
- package/cli/selftune/localdb/schema.ts +59 -0
- package/cli/selftune/monitoring/watch.ts +96 -29
- package/cli/selftune/normalization.ts +3 -0
- package/cli/selftune/observability.ts +12 -3
- package/cli/selftune/orchestrate/cli.ts +161 -0
- package/cli/selftune/orchestrate/execute.ts +295 -0
- package/cli/selftune/orchestrate/finalize.ts +157 -0
- package/cli/selftune/orchestrate/locks.ts +40 -0
- package/cli/selftune/orchestrate/plan.ts +131 -0
- package/cli/selftune/orchestrate/post-run.ts +59 -0
- package/cli/selftune/orchestrate/prepare.ts +334 -0
- package/cli/selftune/orchestrate/report.ts +182 -0
- package/cli/selftune/orchestrate/runtime.ts +120 -0
- package/cli/selftune/orchestrate/signals.ts +48 -0
- package/cli/selftune/orchestrate.ts +162 -1142
- package/cli/selftune/registry/client.ts +74 -0
- package/cli/selftune/registry/history.ts +54 -0
- package/cli/selftune/registry/index.ts +90 -0
- package/cli/selftune/registry/install.ts +141 -0
- package/cli/selftune/registry/list.ts +44 -0
- package/cli/selftune/registry/push.ts +171 -0
- package/cli/selftune/registry/rollback.ts +49 -0
- package/cli/selftune/registry/status.ts +62 -0
- package/cli/selftune/registry/sync.ts +125 -0
- package/cli/selftune/repair/skill-usage.ts +9 -3
- package/cli/selftune/routes/overview.ts +5 -2
- package/cli/selftune/routes/skill-report.ts +15 -2
- package/cli/selftune/schedule.ts +5 -5
- package/cli/selftune/status.ts +70 -2
- package/cli/selftune/sync.ts +127 -23
- package/cli/selftune/testing-readiness.ts +597 -0
- package/cli/selftune/types.ts +46 -5
- package/cli/selftune/uninstall.ts +2 -1
- package/cli/selftune/utils/canonical-log.ts +1 -9
- package/cli/selftune/utils/cli-error.ts +9 -0
- package/cli/selftune/utils/jsonl.ts +1 -30
- package/cli/selftune/utils/llm-call.ts +126 -6
- package/cli/selftune/utils/skill-discovery.ts +24 -0
- package/cli/selftune/workflows/proposals.ts +184 -0
- package/cli/selftune/workflows/skill-scaffold.ts +241 -0
- package/cli/selftune/workflows/workflows.ts +100 -26
- package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
- package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
- package/node_modules/@selftune/telemetry-contract/package.json +1 -1
- package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
- package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
- package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
- package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
- package/package.json +25 -9
- package/packages/dashboard-core/AGENTS.md +18 -0
- package/packages/dashboard-core/README.md +30 -0
- package/packages/dashboard-core/index.ts +3 -0
- package/packages/dashboard-core/package.json +39 -0
- package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
- package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
- package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
- package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
- package/packages/dashboard-core/src/chrome/index.ts +14 -0
- package/packages/dashboard-core/src/chrome/types.ts +81 -0
- package/packages/dashboard-core/src/chrome/utils.ts +23 -0
- package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
- package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
- package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
- package/packages/dashboard-core/src/gates/index.ts +3 -0
- package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
- package/packages/dashboard-core/src/host/adapter.ts +47 -0
- package/packages/dashboard-core/src/host/capabilities.ts +55 -0
- package/packages/dashboard-core/src/host/index.ts +3 -0
- package/packages/dashboard-core/src/models/analytics.ts +39 -0
- package/packages/dashboard-core/src/models/index.ts +4 -0
- package/packages/dashboard-core/src/models/overview.ts +98 -0
- package/packages/dashboard-core/src/models/runtime.ts +7 -0
- package/packages/dashboard-core/src/models/skills.ts +34 -0
- package/packages/dashboard-core/src/routes/index.ts +2 -0
- package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
- package/packages/dashboard-core/src/routes/manifest.ts +451 -0
- package/packages/dashboard-core/src/routes/types.ts +39 -0
- package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
- package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
- package/packages/dashboard-core/src/screens/index.ts +37 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
- package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
- package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
- package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
- package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
- package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
- package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
- package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
- package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
- package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
- package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
- package/packages/telemetry-contract/package.json +1 -1
- package/packages/telemetry-contract/src/index.ts +1 -0
- package/packages/telemetry-contract/src/schemas.ts +63 -5
- package/packages/telemetry-contract/src/types.ts +97 -7
- package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
- package/packages/ui/AGENTS.md +16 -0
- package/packages/ui/README.md +1 -1
- package/packages/ui/package.json +1 -1
- package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
- package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
- package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
- package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
- package/packages/ui/src/components/InfoTip.tsx +1 -2
- package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
- package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
- package/packages/ui/src/components/OverviewPanels.tsx +693 -0
- package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
- package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
- package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
- package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
- package/packages/ui/src/components/index.ts +56 -1
- package/packages/ui/src/components/section-cards.tsx +18 -35
- package/packages/ui/src/components/skill-health-grid.tsx +47 -37
- package/packages/ui/src/lib/constants.tsx +0 -1
- package/packages/ui/src/primitives/card.tsx +1 -1
- package/packages/ui/src/primitives/checkbox.tsx +1 -1
- package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
- package/packages/ui/src/primitives/select.tsx +2 -2
- package/packages/ui/src/primitives/tabs.tsx +7 -6
- package/packages/ui/src/types.ts +182 -4
- package/skill/SKILL.md +130 -318
- package/skill/agents/diagnosis-analyst.md +3 -3
- package/skill/agents/evolution-reviewer.md +3 -3
- package/skill/agents/integration-guide.md +3 -3
- package/skill/agents/pattern-analyst.md +2 -2
- package/skill/references/cli-quick-reference.md +89 -0
- package/skill/references/creator-playbook.md +131 -0
- package/skill/references/examples.md +48 -0
- package/skill/references/troubleshooting.md +47 -0
- package/skill/references/version-history.md +1 -1
- package/skill/selftune.contribute.json +11 -0
- package/skill/{Workflows → workflows}/Baseline.md +20 -1
- package/skill/{Workflows → workflows}/Contribute.md +23 -10
- package/skill/{Workflows → workflows}/Contributions.md +13 -5
- package/skill/workflows/CreateTestDeploy.md +170 -0
- package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
- package/skill/{Workflows → workflows}/Cron.md +1 -1
- package/skill/{Workflows → workflows}/Dashboard.md +20 -0
- package/skill/{Workflows → workflows}/Doctor.md +1 -1
- package/skill/{Workflows → workflows}/Evals.md +67 -2
- package/skill/{Workflows → workflows}/Evolve.md +119 -30
- package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
- package/skill/{Workflows → workflows}/Grade.md +1 -1
- package/skill/{Workflows → workflows}/Ingest.md +60 -2
- package/skill/{Workflows → workflows}/Initialize.md +16 -9
- package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
- package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
- package/skill/workflows/Registry.md +99 -0
- package/skill/{Workflows → workflows}/Schedule.md +3 -3
- package/skill/workflows/SignalsDashboard.md +87 -0
- package/skill/{Workflows → workflows}/Sync.md +3 -1
- package/skill/{Workflows → workflows}/UnitTest.md +19 -0
- package/skill/{Workflows → workflows}/Watch.md +42 -2
- package/skill/{Workflows → workflows}/Workflows.md +39 -2
- package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
- package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
- package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
- package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
- package/cli/selftune/utils/html.ts +0 -27
- package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
- /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
- /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
- /package/skill/{Workflows → workflows}/Badge.md +0 -0
- /package/skill/{Workflows → workflows}/Composability.md +0 -0
- /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
- /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
- /package/skill/{Workflows → workflows}/Hook.md +0 -0
- /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
- /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
- /package/skill/{Workflows → workflows}/Recover.md +0 -0
- /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
- /package/skill/{Workflows → workflows}/Replay.md +0 -0
- /package/skill/{Workflows → workflows}/Rollback.md +0 -0
- /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
- /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
|
@@ -129,6 +129,22 @@ CREATE TABLE IF NOT EXISTS evolution_audit (
|
|
|
129
129
|
validation_evidence_ref TEXT
|
|
130
130
|
)`;
|
|
131
131
|
|
|
132
|
+
// -- Replay entry results (per-entry validation outcomes) ---------------------
|
|
133
|
+
|
|
134
|
+
export const CREATE_REPLAY_ENTRY_RESULTS = `
|
|
135
|
+
CREATE TABLE IF NOT EXISTS replay_entry_results (
|
|
136
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
137
|
+
proposal_id TEXT NOT NULL,
|
|
138
|
+
skill_name TEXT NOT NULL,
|
|
139
|
+
validation_mode TEXT NOT NULL,
|
|
140
|
+
phase TEXT NOT NULL,
|
|
141
|
+
query TEXT NOT NULL,
|
|
142
|
+
should_trigger INTEGER NOT NULL,
|
|
143
|
+
triggered INTEGER NOT NULL,
|
|
144
|
+
passed INTEGER NOT NULL,
|
|
145
|
+
evidence TEXT
|
|
146
|
+
)`;
|
|
147
|
+
|
|
132
148
|
// -- Local telemetry tables (from JSONL logs) ---------------------------------
|
|
133
149
|
|
|
134
150
|
export const CREATE_SESSION_TELEMETRY = `
|
|
@@ -215,6 +231,20 @@ CREATE TABLE IF NOT EXISTS grading_results (
|
|
|
215
231
|
execution_metrics_json TEXT
|
|
216
232
|
)`;
|
|
217
233
|
|
|
234
|
+
// -- Grading baselines table (pre/post deploy grade snapshots) ---------------
|
|
235
|
+
|
|
236
|
+
export const CREATE_GRADING_BASELINES = `
|
|
237
|
+
CREATE TABLE IF NOT EXISTS grading_baselines (
|
|
238
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
239
|
+
skill_name TEXT NOT NULL,
|
|
240
|
+
proposal_id TEXT,
|
|
241
|
+
measured_at TEXT NOT NULL,
|
|
242
|
+
pass_rate REAL NOT NULL,
|
|
243
|
+
mean_score REAL,
|
|
244
|
+
sample_size INTEGER NOT NULL,
|
|
245
|
+
grading_results_json TEXT
|
|
246
|
+
)`;
|
|
247
|
+
|
|
218
248
|
// -- Improvement signal table (from signal_log.jsonl) ------------------------
|
|
219
249
|
|
|
220
250
|
export const CREATE_IMPROVEMENT_SIGNALS = `
|
|
@@ -294,6 +324,20 @@ CREATE TABLE IF NOT EXISTS commit_tracking (
|
|
|
294
324
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
295
325
|
)`;
|
|
296
326
|
|
|
327
|
+
// -- Cron run audit log -------------------------------------------------------
|
|
328
|
+
|
|
329
|
+
export const CREATE_CRON_RUNS = `
|
|
330
|
+
CREATE TABLE IF NOT EXISTS cron_runs (
|
|
331
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
332
|
+
job_name TEXT NOT NULL,
|
|
333
|
+
started_at TEXT NOT NULL,
|
|
334
|
+
elapsed_ms INTEGER NOT NULL,
|
|
335
|
+
status TEXT NOT NULL,
|
|
336
|
+
metrics_json TEXT,
|
|
337
|
+
error TEXT,
|
|
338
|
+
UNIQUE(job_name, started_at)
|
|
339
|
+
)`;
|
|
340
|
+
|
|
297
341
|
// -- Metadata table -----------------------------------------------------------
|
|
298
342
|
|
|
299
343
|
export const CREATE_META = `
|
|
@@ -339,6 +383,11 @@ export const CREATE_INDEXES = [
|
|
|
339
383
|
`CREATE INDEX IF NOT EXISTS idx_grading_skill ON grading_results(skill_name)`,
|
|
340
384
|
`CREATE INDEX IF NOT EXISTS idx_grading_ts ON grading_results(graded_at)`,
|
|
341
385
|
`CREATE UNIQUE INDEX IF NOT EXISTS idx_grading_dedup ON grading_results(session_id, skill_name, graded_at)`,
|
|
386
|
+
// -- Grading baseline indexes ------------------------------------------------
|
|
387
|
+
`CREATE INDEX IF NOT EXISTS idx_grading_bl_skill ON grading_baselines(skill_name)`,
|
|
388
|
+
`CREATE INDEX IF NOT EXISTS idx_grading_bl_proposal ON grading_baselines(proposal_id)`,
|
|
389
|
+
`CREATE INDEX IF NOT EXISTS idx_grading_bl_ts ON grading_baselines(measured_at)`,
|
|
390
|
+
`CREATE INDEX IF NOT EXISTS idx_grading_bl_skill_proposal ON grading_baselines(skill_name, proposal_id, measured_at)`,
|
|
342
391
|
// -- Improvement signal indexes ---------------------------------------------
|
|
343
392
|
`CREATE INDEX IF NOT EXISTS idx_signals_session ON improvement_signals(session_id)`,
|
|
344
393
|
`CREATE INDEX IF NOT EXISTS idx_signals_consumed ON improvement_signals(consumed)`,
|
|
@@ -355,11 +404,18 @@ export const CREATE_INDEXES = [
|
|
|
355
404
|
`CREATE INDEX IF NOT EXISTS idx_staging_kind ON canonical_upload_staging(record_kind)`,
|
|
356
405
|
`CREATE INDEX IF NOT EXISTS idx_staging_session ON canonical_upload_staging(session_id)`,
|
|
357
406
|
`CREATE UNIQUE INDEX IF NOT EXISTS idx_staging_dedup ON canonical_upload_staging(record_kind, record_id)`,
|
|
407
|
+
// -- Replay entry result indexes ---------------------------------------------
|
|
408
|
+
`CREATE INDEX IF NOT EXISTS idx_replay_entry_proposal ON replay_entry_results(proposal_id)`,
|
|
409
|
+
`CREATE INDEX IF NOT EXISTS idx_replay_entry_skill ON replay_entry_results(skill_name)`,
|
|
410
|
+
`CREATE INDEX IF NOT EXISTS idx_replay_entry_passed ON replay_entry_results(passed)`,
|
|
411
|
+
`CREATE INDEX IF NOT EXISTS idx_replay_entry_proposal_phase ON replay_entry_results(proposal_id, phase)`,
|
|
358
412
|
// -- Commit tracking indexes ------------------------------------------------
|
|
359
413
|
`CREATE INDEX IF NOT EXISTS idx_commit_sha ON commit_tracking(commit_sha)`,
|
|
360
414
|
`CREATE INDEX IF NOT EXISTS idx_commit_session ON commit_tracking(session_id)`,
|
|
361
415
|
`CREATE INDEX IF NOT EXISTS idx_commit_ts ON commit_tracking(timestamp)`,
|
|
362
416
|
`CREATE UNIQUE INDEX IF NOT EXISTS idx_commit_dedup ON commit_tracking(session_id, commit_sha)`,
|
|
417
|
+
// -- Cron run indexes -------------------------------------------------------
|
|
418
|
+
`CREATE INDEX IF NOT EXISTS idx_cron_runs_job_ts ON cron_runs(job_name, started_at)`,
|
|
363
419
|
];
|
|
364
420
|
|
|
365
421
|
/**
|
|
@@ -443,17 +499,20 @@ export const ALL_DDL = [
|
|
|
443
499
|
CREATE_EXECUTION_FACTS,
|
|
444
500
|
CREATE_EVOLUTION_EVIDENCE,
|
|
445
501
|
CREATE_EVOLUTION_AUDIT,
|
|
502
|
+
CREATE_REPLAY_ENTRY_RESULTS,
|
|
446
503
|
CREATE_SESSION_TELEMETRY,
|
|
447
504
|
CREATE_SKILL_USAGE,
|
|
448
505
|
CREATE_ORCHESTRATE_RUNS,
|
|
449
506
|
CREATE_QUERIES,
|
|
450
507
|
CREATE_GRADING_RESULTS,
|
|
508
|
+
CREATE_GRADING_BASELINES,
|
|
451
509
|
CREATE_IMPROVEMENT_SIGNALS,
|
|
452
510
|
CREATE_UPLOAD_QUEUE,
|
|
453
511
|
CREATE_CREATOR_CONTRIBUTION_STAGING,
|
|
454
512
|
CREATE_UPLOAD_WATERMARKS,
|
|
455
513
|
CREATE_CANONICAL_UPLOAD_STAGING,
|
|
456
514
|
CREATE_COMMIT_TRACKING,
|
|
515
|
+
CREATE_CRON_RUNS,
|
|
457
516
|
CREATE_META,
|
|
458
517
|
...CREATE_INDEXES,
|
|
459
518
|
];
|
|
@@ -8,12 +8,15 @@
|
|
|
8
8
|
|
|
9
9
|
import { parseArgs } from "node:util";
|
|
10
10
|
|
|
11
|
+
import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "../command-surface.js";
|
|
11
12
|
import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
|
|
12
13
|
import { classifyInvocation } from "../eval/hooks-to-evals.js";
|
|
13
14
|
import { getLastDeployedProposal } from "../evolution/audit.js";
|
|
14
15
|
import { getDb } from "../localdb/db.js";
|
|
15
16
|
import {
|
|
17
|
+
queryGradingBaseline,
|
|
16
18
|
queryQueryLog,
|
|
19
|
+
queryRecentGradingResults,
|
|
17
20
|
querySessionTelemetry,
|
|
18
21
|
querySkillUsageRecords,
|
|
19
22
|
} from "../localdb/queries.js";
|
|
@@ -42,6 +45,10 @@ export interface WatchOptions {
|
|
|
42
45
|
windowSessions: number;
|
|
43
46
|
regressionThreshold: number;
|
|
44
47
|
autoRollback: boolean;
|
|
48
|
+
/** Grade regression threshold (default 0.15). */
|
|
49
|
+
gradeRegressionThreshold?: number;
|
|
50
|
+
/** Enable grade-based regression watch (default true). */
|
|
51
|
+
enableGradeWatch?: boolean;
|
|
45
52
|
/** Injected log paths for testing (override defaults). */
|
|
46
53
|
_telemetryLogPath?: string;
|
|
47
54
|
_skillLogPath?: string;
|
|
@@ -65,6 +72,8 @@ export interface WatchResult {
|
|
|
65
72
|
rolledBack: boolean;
|
|
66
73
|
recommendation: string;
|
|
67
74
|
sync_result?: SyncResult;
|
|
75
|
+
gradeAlert?: string | null;
|
|
76
|
+
gradeRegression?: { before: number; after: number; delta: number } | null;
|
|
68
77
|
}
|
|
69
78
|
|
|
70
79
|
// ---------------------------------------------------------------------------
|
|
@@ -73,6 +82,7 @@ export interface WatchResult {
|
|
|
73
82
|
|
|
74
83
|
const DEFAULT_BASELINE_PASS_RATE = 0.5;
|
|
75
84
|
const DEFAULT_REGRESSION_THRESHOLD = 0.1;
|
|
85
|
+
const DEFAULT_GRADE_REGRESSION_THRESHOLD = 0.15;
|
|
76
86
|
export const MIN_MONITORING_SKILL_CHECKS = 3;
|
|
77
87
|
|
|
78
88
|
// ---------------------------------------------------------------------------
|
|
@@ -190,6 +200,8 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
|
|
|
190
200
|
skillPath,
|
|
191
201
|
windowSessions = 20,
|
|
192
202
|
regressionThreshold = DEFAULT_REGRESSION_THRESHOLD,
|
|
203
|
+
gradeRegressionThreshold = DEFAULT_GRADE_REGRESSION_THRESHOLD,
|
|
204
|
+
enableGradeWatch = true,
|
|
193
205
|
autoRollback = false,
|
|
194
206
|
_telemetryLogPath = TELEMETRY_LOG,
|
|
195
207
|
_skillLogPath = SKILL_LOG,
|
|
@@ -235,26 +247,71 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
|
|
|
235
247
|
regressionThreshold,
|
|
236
248
|
);
|
|
237
249
|
|
|
238
|
-
// 4. Build alert
|
|
239
|
-
|
|
250
|
+
// 4. Build trigger alert. Grade alerts are added below before rollback
|
|
251
|
+
// decisions so either signal can drive automated rollback.
|
|
252
|
+
let triggerAlert: string | null = null;
|
|
240
253
|
let rolledBack = false;
|
|
241
|
-
let recommendation: string;
|
|
242
254
|
|
|
243
255
|
if (snapshot.regression_detected) {
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
+
triggerAlert = `regression detected for "${skillName}": pass_rate=${snapshot.pass_rate.toFixed(2)} below baseline=${baselinePassRate.toFixed(2)} minus threshold=${regressionThreshold.toFixed(2)}`;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// 5. Grade regression detection (fail-open)
|
|
260
|
+
let gradeAlert: string | null = null;
|
|
261
|
+
let gradeRegression: { before: number; after: number; delta: number } | null = null;
|
|
262
|
+
|
|
263
|
+
if (enableGradeWatch) {
|
|
264
|
+
try {
|
|
265
|
+
const baseline = queryGradingBaseline(db, skillName, lastDeployed?.proposal_id);
|
|
266
|
+
const recentResults = queryRecentGradingResults(db, skillName, 10);
|
|
267
|
+
|
|
268
|
+
if (baseline && recentResults.length > 0) {
|
|
269
|
+
// Compute the average pass rate from recent grading results
|
|
270
|
+
const validResults = recentResults.filter((r) => r.pass_rate != null);
|
|
271
|
+
if (validResults.length > 0) {
|
|
272
|
+
const recentAvgPassRate =
|
|
273
|
+
validResults.reduce((sum, r) => sum + (r.pass_rate ?? 0), 0) / validResults.length;
|
|
274
|
+
const baselinePassRateGrade = baseline.pass_rate;
|
|
275
|
+
const delta = baselinePassRateGrade - recentAvgPassRate;
|
|
276
|
+
|
|
277
|
+
if (delta > gradeRegressionThreshold) {
|
|
278
|
+
gradeAlert = `grade regression detected for "${skillName}": baseline_grade_pass_rate=${baselinePassRateGrade.toFixed(2)}, recent_avg=${recentAvgPassRate.toFixed(2)}, delta=${delta.toFixed(2)} exceeds threshold=${gradeRegressionThreshold.toFixed(2)}`;
|
|
279
|
+
gradeRegression = {
|
|
280
|
+
before: baselinePassRateGrade,
|
|
281
|
+
after: recentAvgPassRate,
|
|
282
|
+
delta,
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
} catch (err) {
|
|
288
|
+
// Fail-open: grade watch should never block trigger monitoring
|
|
289
|
+
console.error(
|
|
290
|
+
JSON.stringify({
|
|
291
|
+
level: "debug",
|
|
292
|
+
code: "grade_watch_failed",
|
|
293
|
+
message: `Grade watch failed for "${skillName}": ${err instanceof Error ? err.message : String(err)}`,
|
|
294
|
+
}),
|
|
295
|
+
);
|
|
256
296
|
}
|
|
297
|
+
}
|
|
257
298
|
|
|
299
|
+
const alerts = [triggerAlert, gradeAlert].filter((value): value is string => Boolean(value));
|
|
300
|
+
const alert = alerts.length > 0 ? alerts.join("\n") : null;
|
|
301
|
+
|
|
302
|
+
if (alert && autoRollback) {
|
|
303
|
+
const rollbackFn = _rollbackFn ?? (await loadRollbackFn());
|
|
304
|
+
const proposalId = lastDeployed?.proposal_id;
|
|
305
|
+
const rollbackResult = await rollbackFn({
|
|
306
|
+
skillName,
|
|
307
|
+
skillPath,
|
|
308
|
+
proposalId,
|
|
309
|
+
});
|
|
310
|
+
rolledBack = rollbackResult.rolledBack;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
let recommendation: string;
|
|
314
|
+
if (alert) {
|
|
258
315
|
recommendation = rolledBack
|
|
259
316
|
? `Rolled back "${skillName}" to previous version. Monitor to confirm recovery.`
|
|
260
317
|
: `Consider running: selftune rollback --skill "${skillName}" --skill-path "${skillPath}"`;
|
|
@@ -285,6 +342,8 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
|
|
|
285
342
|
alert,
|
|
286
343
|
rolledBack,
|
|
287
344
|
recommendation,
|
|
345
|
+
gradeAlert,
|
|
346
|
+
gradeRegression,
|
|
288
347
|
...(syncResult ? { sync_result: syncResult } : {}),
|
|
289
348
|
};
|
|
290
349
|
}
|
|
@@ -329,6 +388,8 @@ export async function cliMain(): Promise<void> {
|
|
|
329
388
|
window: { type: "string", default: "20" },
|
|
330
389
|
threshold: { type: "string", default: "0.1" },
|
|
331
390
|
"auto-rollback": { type: "boolean", default: false },
|
|
391
|
+
"grade-threshold": { type: "string", default: "0.15" },
|
|
392
|
+
"no-grade-watch": { type: "boolean", default: false },
|
|
332
393
|
"sync-first": { type: "boolean", default: false },
|
|
333
394
|
"sync-force": { type: "boolean", default: false },
|
|
334
395
|
help: { type: "boolean", default: false },
|
|
@@ -337,20 +398,7 @@ export async function cliMain(): Promise<void> {
|
|
|
337
398
|
});
|
|
338
399
|
|
|
339
400
|
if (values.help) {
|
|
340
|
-
console.log(
|
|
341
|
-
|
|
342
|
-
Usage:
|
|
343
|
-
selftune watch --skill <name> --skill-path <path> [options]
|
|
344
|
-
|
|
345
|
-
Options:
|
|
346
|
-
--skill Skill name (required)
|
|
347
|
-
--skill-path Path to SKILL.md (required)
|
|
348
|
-
--window Number of recent sessions to consider (default: 20)
|
|
349
|
-
--threshold Regression threshold below baseline (default: 0.1)
|
|
350
|
-
--auto-rollback Automatically rollback on regression detection
|
|
351
|
-
--sync-first Refresh source-truth telemetry before reading watch inputs
|
|
352
|
-
--sync-force Force a full rescan during --sync-first
|
|
353
|
-
--help Show this help message`);
|
|
401
|
+
console.log(renderCommandHelp(PUBLIC_COMMAND_SURFACES.watch));
|
|
354
402
|
process.exit(0);
|
|
355
403
|
}
|
|
356
404
|
|
|
@@ -403,11 +451,30 @@ Options:
|
|
|
403
451
|
);
|
|
404
452
|
}
|
|
405
453
|
|
|
454
|
+
const rawGradeThreshold = values["grade-threshold"] ?? "0.15";
|
|
455
|
+
if (!/^\d+(\.\d+)?$/.test(rawGradeThreshold)) {
|
|
456
|
+
throw new CLIError(
|
|
457
|
+
"--grade-threshold must be a finite number between 0 and 1.",
|
|
458
|
+
"INVALID_FLAG",
|
|
459
|
+
"selftune watch --grade-threshold 0.15",
|
|
460
|
+
);
|
|
461
|
+
}
|
|
462
|
+
const gradeRegressionThreshold = Number.parseFloat(rawGradeThreshold);
|
|
463
|
+
if (gradeRegressionThreshold < 0 || gradeRegressionThreshold > 1) {
|
|
464
|
+
throw new CLIError(
|
|
465
|
+
"--grade-threshold must be a finite number between 0 and 1.",
|
|
466
|
+
"INVALID_FLAG",
|
|
467
|
+
"selftune watch --grade-threshold 0.15",
|
|
468
|
+
);
|
|
469
|
+
}
|
|
470
|
+
|
|
406
471
|
const result = await watch({
|
|
407
472
|
skillName: values.skill,
|
|
408
473
|
skillPath: values["skill-path"],
|
|
409
474
|
windowSessions,
|
|
410
475
|
regressionThreshold,
|
|
476
|
+
gradeRegressionThreshold,
|
|
477
|
+
enableGradeWatch: !(values["no-grade-watch"] ?? false),
|
|
411
478
|
autoRollback: values["auto-rollback"] ?? false,
|
|
412
479
|
syncFirst: values["sync-first"] ?? false,
|
|
413
480
|
syncForce: values["sync-force"] ?? false,
|
|
@@ -694,6 +694,7 @@ export function buildCanonicalSkillInvocation(
|
|
|
694
694
|
}
|
|
695
695
|
|
|
696
696
|
export interface BuildExecutionFactInput extends CanonicalBaseInput {
|
|
697
|
+
execution_fact_id?: string;
|
|
697
698
|
occurred_at: string;
|
|
698
699
|
prompt_id?: string;
|
|
699
700
|
tool_calls_json: Record<string, number>;
|
|
@@ -716,6 +717,8 @@ export function buildCanonicalExecutionFact(
|
|
|
716
717
|
const record: CanonicalExecutionFactRecord = {
|
|
717
718
|
...base,
|
|
718
719
|
record_kind: "execution_fact",
|
|
720
|
+
execution_fact_id:
|
|
721
|
+
input.execution_fact_id ?? `${input.session_id}:${input.occurred_at}:execution_fact`,
|
|
719
722
|
occurred_at: input.occurred_at,
|
|
720
723
|
tool_calls_json: input.tool_calls_json,
|
|
721
724
|
total_tool_calls: input.total_tool_calls,
|
|
@@ -14,6 +14,7 @@ import { join } from "node:path";
|
|
|
14
14
|
|
|
15
15
|
import { getAlphaGuidance } from "./agent-guidance.js";
|
|
16
16
|
import { getAlphaLinkState, readAlphaIdentity } from "./alpha-identity.js";
|
|
17
|
+
import { getSelftuneUpdateHint } from "./auto-update.js";
|
|
17
18
|
import { LOG_DIR, REQUIRED_FIELDS, SELFTUNE_CONFIG_PATH } from "./constants.js";
|
|
18
19
|
import { DB_PATH, getDb } from "./localdb/db.js";
|
|
19
20
|
import type {
|
|
@@ -26,7 +27,14 @@ import type {
|
|
|
26
27
|
} from "./types.js";
|
|
27
28
|
import { missingClaudeCodeHookKeys } from "./utils/hooks.js";
|
|
28
29
|
|
|
29
|
-
const VALID_AGENT_TYPES = new Set([
|
|
30
|
+
const VALID_AGENT_TYPES = new Set([
|
|
31
|
+
"claude_code",
|
|
32
|
+
"codex",
|
|
33
|
+
"opencode",
|
|
34
|
+
"openclaw",
|
|
35
|
+
"pi",
|
|
36
|
+
"unknown",
|
|
37
|
+
]);
|
|
30
38
|
const VALID_LLM_MODES = new Set(["agent"]);
|
|
31
39
|
|
|
32
40
|
const LOG_FILES: Record<string, string> = {
|
|
@@ -311,12 +319,13 @@ export async function checkVersionHealth(): Promise<HealthCheck[]> {
|
|
|
311
319
|
if (cmp >= 0) {
|
|
312
320
|
check.message = `v${currentVersion} (latest)`;
|
|
313
321
|
} else {
|
|
322
|
+
const updateCommand = getSelftuneUpdateHint("latest");
|
|
314
323
|
check.status = "warn";
|
|
315
|
-
check.message = `v${currentVersion} installed, v${latestVersion} available. Run:
|
|
324
|
+
check.message = `v${currentVersion} installed, v${latestVersion} available. Run: ${updateCommand}`;
|
|
316
325
|
check.guidance = {
|
|
317
326
|
code: "version_update_available",
|
|
318
327
|
message: "A newer selftune release is available.",
|
|
319
|
-
next_command:
|
|
328
|
+
next_command: updateCommand,
|
|
320
329
|
suggested_commands: ["selftune doctor"],
|
|
321
330
|
blocking: false,
|
|
322
331
|
};
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { parseArgs } from "node:util";
|
|
2
|
+
|
|
3
|
+
import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "../command-surface.js";
|
|
4
|
+
import type { OrchestrateOptions, OrchestrateResult } from "../orchestrate.js";
|
|
5
|
+
import { CLIError } from "../utils/cli-error.js";
|
|
6
|
+
|
|
7
|
+
export interface ParsedOrchestrateCliArgs {
|
|
8
|
+
showHelp: boolean;
|
|
9
|
+
warnings: string[];
|
|
10
|
+
loop: boolean;
|
|
11
|
+
loopIntervalSeconds: number;
|
|
12
|
+
runOptions: OrchestrateOptions;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function parsePositiveIntegerFlag(value: string, message: string, command: string): number {
|
|
16
|
+
if (!/^\d+$/.test(value) || Number(value) < 1) {
|
|
17
|
+
throw new CLIError(message, "INVALID_FLAG", command);
|
|
18
|
+
}
|
|
19
|
+
return Number(value);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function parseNonNegativeIntegerFlag(value: string, message: string, command: string): number {
|
|
23
|
+
if (!/^\d+$/.test(value)) {
|
|
24
|
+
throw new CLIError(message, "INVALID_FLAG", command);
|
|
25
|
+
}
|
|
26
|
+
return Number(value);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function renderOrchestrateHelp(): string {
|
|
30
|
+
return renderCommandHelp(PUBLIC_COMMAND_SURFACES.orchestrate);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function parseOrchestrateCliArgs(
|
|
34
|
+
argv: string[] = process.argv.slice(2),
|
|
35
|
+
): ParsedOrchestrateCliArgs {
|
|
36
|
+
const { values } = parseArgs({
|
|
37
|
+
args: argv,
|
|
38
|
+
options: {
|
|
39
|
+
"dry-run": { type: "boolean", default: false },
|
|
40
|
+
"review-required": { type: "boolean", default: false },
|
|
41
|
+
"auto-approve": { type: "boolean", default: false },
|
|
42
|
+
skill: { type: "string" },
|
|
43
|
+
"max-skills": { type: "string", default: "5" },
|
|
44
|
+
"recent-window": { type: "string", default: "48" },
|
|
45
|
+
"sync-force": { type: "boolean", default: false },
|
|
46
|
+
"max-auto-grade": { type: "string", default: "5" },
|
|
47
|
+
loop: { type: "boolean", default: false },
|
|
48
|
+
"loop-interval": { type: "string", default: "3600" },
|
|
49
|
+
help: { type: "boolean", short: "h", default: false },
|
|
50
|
+
},
|
|
51
|
+
strict: true,
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
if (values.help) {
|
|
55
|
+
return {
|
|
56
|
+
showHelp: true,
|
|
57
|
+
warnings: [],
|
|
58
|
+
loop: false,
|
|
59
|
+
loopIntervalSeconds: 3600,
|
|
60
|
+
runOptions: {
|
|
61
|
+
dryRun: false,
|
|
62
|
+
approvalMode: "auto",
|
|
63
|
+
maxSkills: 5,
|
|
64
|
+
recentWindowHours: 48,
|
|
65
|
+
syncForce: false,
|
|
66
|
+
maxAutoGrade: 5,
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const loop = values.loop ?? false;
|
|
72
|
+
const maxSkills = parsePositiveIntegerFlag(
|
|
73
|
+
values["max-skills"] ?? "5",
|
|
74
|
+
"--max-skills must be a positive integer",
|
|
75
|
+
"selftune orchestrate --max-skills 5",
|
|
76
|
+
);
|
|
77
|
+
const recentWindowHours = parsePositiveIntegerFlag(
|
|
78
|
+
values["recent-window"] ?? "48",
|
|
79
|
+
"--recent-window must be a positive integer",
|
|
80
|
+
"selftune orchestrate --recent-window 48",
|
|
81
|
+
);
|
|
82
|
+
const maxAutoGrade = parseNonNegativeIntegerFlag(
|
|
83
|
+
values["max-auto-grade"] ?? "5",
|
|
84
|
+
"--max-auto-grade must be a non-negative integer",
|
|
85
|
+
"selftune orchestrate --max-auto-grade 5",
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
const loopIntervalRaw = values["loop-interval"] ?? "3600";
|
|
89
|
+
if (!/^\d+$/.test(loopIntervalRaw) || (loop && Number(loopIntervalRaw) < 60)) {
|
|
90
|
+
throw new CLIError(
|
|
91
|
+
"--loop-interval must be an integer >= 60 (seconds)",
|
|
92
|
+
"INVALID_FLAG",
|
|
93
|
+
"selftune orchestrate --loop --loop-interval 3600",
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const warnings: string[] = [];
|
|
98
|
+
if (values["auto-approve"]) {
|
|
99
|
+
warnings.push(
|
|
100
|
+
"[orchestrate] --auto-approve is deprecated; autonomous mode is now the default.",
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
showHelp: false,
|
|
106
|
+
warnings,
|
|
107
|
+
loop,
|
|
108
|
+
loopIntervalSeconds: Number(loopIntervalRaw),
|
|
109
|
+
runOptions: {
|
|
110
|
+
dryRun: values["dry-run"] ?? false,
|
|
111
|
+
approvalMode: values["review-required"] ? "review" : "auto",
|
|
112
|
+
skillFilter: values.skill,
|
|
113
|
+
maxSkills,
|
|
114
|
+
recentWindowHours,
|
|
115
|
+
syncForce: values["sync-force"] ?? false,
|
|
116
|
+
maxAutoGrade,
|
|
117
|
+
},
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export function buildOrchestrateJsonOutput(result: OrchestrateResult) {
|
|
122
|
+
return {
|
|
123
|
+
...result.summary,
|
|
124
|
+
...(result.uploadSummary ? { upload: result.uploadSummary } : {}),
|
|
125
|
+
workflow_proposals: result.workflowProposals.map((proposal) => ({
|
|
126
|
+
proposal_id: proposal.proposal_id,
|
|
127
|
+
source_skill_name: proposal.source_skill_name,
|
|
128
|
+
workflow_id: proposal.workflow.workflow_id,
|
|
129
|
+
generated_skill_name: proposal.draft.skill_name,
|
|
130
|
+
output_path: proposal.draft.skill_path,
|
|
131
|
+
confidence: proposal.confidence,
|
|
132
|
+
reason: proposal.rationale,
|
|
133
|
+
})),
|
|
134
|
+
decisions: result.candidates.map((candidate) => ({
|
|
135
|
+
skill: candidate.skill,
|
|
136
|
+
action: candidate.action,
|
|
137
|
+
reason: candidate.reason,
|
|
138
|
+
...(candidate.evolveResult
|
|
139
|
+
? {
|
|
140
|
+
deployed: candidate.evolveResult.deployed,
|
|
141
|
+
evolveReason: candidate.evolveResult.reason,
|
|
142
|
+
validation: candidate.evolveResult.validation
|
|
143
|
+
? {
|
|
144
|
+
before: candidate.evolveResult.validation.before_pass_rate,
|
|
145
|
+
after: candidate.evolveResult.validation.after_pass_rate,
|
|
146
|
+
improved: candidate.evolveResult.validation.improved,
|
|
147
|
+
}
|
|
148
|
+
: null,
|
|
149
|
+
}
|
|
150
|
+
: {}),
|
|
151
|
+
...(candidate.watchResult
|
|
152
|
+
? {
|
|
153
|
+
alert: candidate.watchResult.alert,
|
|
154
|
+
rolledBack: candidate.watchResult.rolledBack,
|
|
155
|
+
passRate: candidate.watchResult.snapshot?.pass_rate ?? null,
|
|
156
|
+
recommendation: candidate.watchResult.recommendation,
|
|
157
|
+
}
|
|
158
|
+
: {}),
|
|
159
|
+
})),
|
|
160
|
+
};
|
|
161
|
+
}
|