selftune 0.2.23 → 0.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +93 -15
- package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
- package/apps/local-dashboard/dist/assets/index-Dhgv5BQO.js +15 -0
- package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
- package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/adapters/codex/install.ts +310 -78
- package/cli/selftune/adapters/opencode/install.ts +3 -4
- package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
- package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
- package/cli/selftune/auto-update.ts +200 -8
- package/cli/selftune/canonical-export.ts +55 -25
- package/cli/selftune/command-surface.ts +397 -0
- package/cli/selftune/contribute/contribute.ts +64 -13
- package/cli/selftune/contribution-config.ts +57 -3
- package/cli/selftune/contribution-preferences.ts +117 -0
- package/cli/selftune/contribution-signals.ts +8 -4
- package/cli/selftune/contribution-staging.ts +13 -2
- package/cli/selftune/contributions.ts +55 -121
- package/cli/selftune/creator-contributions.ts +29 -10
- package/cli/selftune/cron/setup.ts +7 -3
- package/cli/selftune/dashboard-contract.ts +73 -0
- package/cli/selftune/dashboard-server.ts +168 -17
- package/cli/selftune/dashboard.ts +350 -17
- package/cli/selftune/eval/baseline.ts +21 -5
- package/cli/selftune/eval/execution-eval.ts +170 -0
- package/cli/selftune/eval/family-overlap.ts +2 -2
- package/cli/selftune/eval/hooks-to-evals.ts +228 -82
- package/cli/selftune/eval/import-skillsbench.ts +2 -2
- package/cli/selftune/eval/invocation-classifier.ts +56 -0
- package/cli/selftune/eval/synthetic-evals.ts +5 -3
- package/cli/selftune/eval/unit-test-cli.ts +7 -4
- package/cli/selftune/evolution/apply-proposal.ts +295 -0
- package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
- package/cli/selftune/evolution/evolve-body.ts +100 -39
- package/cli/selftune/evolution/evolve.ts +244 -52
- package/cli/selftune/evolution/rollback.ts +0 -1
- package/cli/selftune/evolution/validate-body.ts +68 -42
- package/cli/selftune/evolution/validate-host-replay.ts +510 -60
- package/cli/selftune/evolution/validate-proposal.ts +11 -150
- package/cli/selftune/evolution/validate-routing.ts +43 -41
- package/cli/selftune/evolution/validation-contract.ts +91 -0
- package/cli/selftune/grading/auto-grade.ts +11 -7
- package/cli/selftune/grading/grade-session.ts +10 -16
- package/cli/selftune/index.ts +35 -10
- package/cli/selftune/ingestors/claude-replay.ts +15 -10
- package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
- package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
- package/cli/selftune/ingestors/pi-ingest.ts +3 -2
- package/cli/selftune/init.ts +27 -3
- package/cli/selftune/localdb/direct-write.ts +35 -1
- package/cli/selftune/localdb/queries/cron.ts +34 -0
- package/cli/selftune/localdb/queries/dashboard.ts +834 -0
- package/cli/selftune/localdb/queries/evolution.ts +158 -0
- package/cli/selftune/localdb/queries/execution.ts +133 -0
- package/cli/selftune/localdb/queries/json.ts +18 -0
- package/cli/selftune/localdb/queries/monitoring.ts +263 -0
- package/cli/selftune/localdb/queries/raw.ts +95 -0
- package/cli/selftune/localdb/queries/staging.ts +270 -0
- package/cli/selftune/localdb/queries/trust.ts +392 -0
- package/cli/selftune/localdb/queries.ts +60 -2288
- package/cli/selftune/localdb/schema.ts +21 -0
- package/cli/selftune/monitoring/watch.ts +96 -29
- package/cli/selftune/normalization.ts +3 -0
- package/cli/selftune/observability.ts +4 -2
- package/cli/selftune/orchestrate/cli.ts +161 -0
- package/cli/selftune/orchestrate/execute.ts +295 -0
- package/cli/selftune/orchestrate/finalize.ts +157 -0
- package/cli/selftune/orchestrate/locks.ts +40 -0
- package/cli/selftune/orchestrate/plan.ts +131 -0
- package/cli/selftune/orchestrate/post-run.ts +59 -0
- package/cli/selftune/orchestrate/prepare.ts +334 -0
- package/cli/selftune/orchestrate/report.ts +182 -0
- package/cli/selftune/orchestrate/runtime.ts +120 -0
- package/cli/selftune/orchestrate/signals.ts +48 -0
- package/cli/selftune/orchestrate.ts +150 -1173
- package/cli/selftune/repair/skill-usage.ts +5 -2
- package/cli/selftune/routes/overview.ts +5 -2
- package/cli/selftune/routes/skill-report.ts +15 -2
- package/cli/selftune/schedule.ts +5 -5
- package/cli/selftune/status.ts +39 -2
- package/cli/selftune/testing-readiness.ts +597 -0
- package/cli/selftune/types.ts +44 -4
- package/cli/selftune/uninstall.ts +2 -1
- package/cli/selftune/utils/canonical-log.ts +1 -9
- package/cli/selftune/utils/cli-error.ts +9 -0
- package/cli/selftune/utils/llm-call.ts +126 -6
- package/cli/selftune/utils/skill-discovery.ts +2 -0
- package/cli/selftune/workflows/proposals.ts +184 -0
- package/cli/selftune/workflows/skill-scaffold.ts +241 -0
- package/cli/selftune/workflows/workflows.ts +100 -26
- package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
- package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
- package/package.json +25 -9
- package/packages/dashboard-core/AGENTS.md +18 -0
- package/packages/dashboard-core/README.md +30 -0
- package/packages/dashboard-core/index.ts +3 -0
- package/packages/dashboard-core/package.json +39 -0
- package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
- package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
- package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
- package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
- package/packages/dashboard-core/src/chrome/index.ts +14 -0
- package/packages/dashboard-core/src/chrome/types.ts +81 -0
- package/packages/dashboard-core/src/chrome/utils.ts +23 -0
- package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
- package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
- package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
- package/packages/dashboard-core/src/gates/index.ts +3 -0
- package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
- package/packages/dashboard-core/src/host/adapter.ts +47 -0
- package/packages/dashboard-core/src/host/capabilities.ts +55 -0
- package/packages/dashboard-core/src/host/index.ts +3 -0
- package/packages/dashboard-core/src/models/analytics.ts +39 -0
- package/packages/dashboard-core/src/models/index.ts +4 -0
- package/packages/dashboard-core/src/models/overview.ts +98 -0
- package/packages/dashboard-core/src/models/runtime.ts +7 -0
- package/packages/dashboard-core/src/models/skills.ts +34 -0
- package/packages/dashboard-core/src/routes/index.ts +2 -0
- package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
- package/packages/dashboard-core/src/routes/manifest.ts +451 -0
- package/packages/dashboard-core/src/routes/types.ts +39 -0
- package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
- package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
- package/packages/dashboard-core/src/screens/index.ts +37 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
- package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
- package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
- package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
- package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
- package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
- package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
- package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
- package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
- package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
- package/packages/telemetry-contract/src/schemas.ts +41 -1
- package/packages/telemetry-contract/src/types.ts +103 -2
- package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
- package/packages/ui/src/components/OverviewPanels.tsx +67 -26
- package/packages/ui/src/primitives/tabs.tsx +7 -6
- package/packages/ui/src/types.ts +10 -0
- package/skill/SKILL.md +130 -332
- package/skill/agents/diagnosis-analyst.md +3 -3
- package/skill/agents/evolution-reviewer.md +3 -3
- package/skill/agents/integration-guide.md +3 -3
- package/skill/agents/pattern-analyst.md +2 -2
- package/skill/references/cli-quick-reference.md +89 -0
- package/skill/references/creator-playbook.md +131 -0
- package/skill/references/examples.md +48 -0
- package/skill/references/troubleshooting.md +47 -0
- package/skill/references/version-history.md +1 -1
- package/skill/selftune.contribute.json +11 -0
- package/skill/{Workflows → workflows}/Baseline.md +20 -1
- package/skill/{Workflows → workflows}/Contribute.md +23 -10
- package/skill/{Workflows → workflows}/Contributions.md +13 -5
- package/skill/workflows/CreateTestDeploy.md +170 -0
- package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
- package/skill/{Workflows → workflows}/Cron.md +1 -1
- package/skill/{Workflows → workflows}/Dashboard.md +20 -0
- package/skill/{Workflows → workflows}/Doctor.md +1 -1
- package/skill/{Workflows → workflows}/Evals.md +67 -2
- package/skill/{Workflows → workflows}/Evolve.md +119 -30
- package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
- package/skill/{Workflows → workflows}/Grade.md +1 -1
- package/skill/{Workflows → workflows}/Initialize.md +8 -4
- package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
- package/skill/{Workflows → workflows}/Schedule.md +3 -3
- package/skill/workflows/SignalsDashboard.md +87 -0
- package/skill/{Workflows → workflows}/UnitTest.md +19 -0
- package/skill/{Workflows → workflows}/Watch.md +42 -2
- package/skill/{Workflows → workflows}/Workflows.md +39 -2
- package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
- package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
- package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
- package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
- /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
- /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
- /package/skill/{Workflows → workflows}/Badge.md +0 -0
- /package/skill/{Workflows → workflows}/Composability.md +0 -0
- /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
- /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
- /package/skill/{Workflows → workflows}/Hook.md +0 -0
- /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
- /package/skill/{Workflows → workflows}/Ingest.md +0 -0
- /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
- /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
- /package/skill/{Workflows → workflows}/Recover.md +0 -0
- /package/skill/{Workflows → workflows}/Registry.md +0 -0
- /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
- /package/skill/{Workflows → workflows}/Replay.md +0 -0
- /package/skill/{Workflows → workflows}/Rollback.md +0 -0
- /package/skill/{Workflows → workflows}/Sync.md +0 -0
- /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
- /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import type { Database } from "bun:sqlite";
|
|
2
|
+
|
|
3
|
+
import { safeParseJson } from "./json.js";
|
|
4
|
+
|
|
5
|
+
export function getCreatorContributionStagingCounts(db: Database): Array<{
|
|
6
|
+
skill_name: string;
|
|
7
|
+
pending_count: number;
|
|
8
|
+
}> {
|
|
9
|
+
return db
|
|
10
|
+
.query(
|
|
11
|
+
`SELECT skill_name, COUNT(*) AS pending_count
|
|
12
|
+
FROM creator_contribution_staging
|
|
13
|
+
WHERE status = 'pending'
|
|
14
|
+
GROUP BY skill_name
|
|
15
|
+
ORDER BY skill_name`,
|
|
16
|
+
)
|
|
17
|
+
.all() as Array<{
|
|
18
|
+
skill_name: string;
|
|
19
|
+
pending_count: number;
|
|
20
|
+
}>;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface CreatorContributionRelayStats {
|
|
24
|
+
pending: number;
|
|
25
|
+
sending: number;
|
|
26
|
+
sent: number;
|
|
27
|
+
failed: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface CreatorContributionStagingRow {
|
|
31
|
+
id: number;
|
|
32
|
+
dedupe_key: string;
|
|
33
|
+
skill_name: string;
|
|
34
|
+
creator_id: string;
|
|
35
|
+
payload_json: string;
|
|
36
|
+
status: string;
|
|
37
|
+
staged_at: string;
|
|
38
|
+
updated_at: string;
|
|
39
|
+
last_error: string | null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function getCreatorContributionRelayStats(db: Database): CreatorContributionRelayStats {
|
|
43
|
+
const row = db
|
|
44
|
+
.query(
|
|
45
|
+
`SELECT
|
|
46
|
+
COALESCE(SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END), 0) AS pending,
|
|
47
|
+
COALESCE(SUM(CASE WHEN status = 'sending' THEN 1 ELSE 0 END), 0) AS sending,
|
|
48
|
+
COALESCE(SUM(CASE WHEN status = 'sent' THEN 1 ELSE 0 END), 0) AS sent,
|
|
49
|
+
COALESCE(SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END), 0) AS failed
|
|
50
|
+
FROM creator_contribution_staging`,
|
|
51
|
+
)
|
|
52
|
+
.get() as CreatorContributionRelayStats | null;
|
|
53
|
+
return row ?? { pending: 0, sending: 0, sent: 0, failed: 0 };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function getPendingCreatorContributionRows(
|
|
57
|
+
db: Database,
|
|
58
|
+
limit = 50,
|
|
59
|
+
): CreatorContributionStagingRow[] {
|
|
60
|
+
return db
|
|
61
|
+
.query(
|
|
62
|
+
`SELECT id, dedupe_key, skill_name, creator_id, payload_json, status, staged_at, updated_at, last_error
|
|
63
|
+
FROM creator_contribution_staging
|
|
64
|
+
WHERE status = 'pending'
|
|
65
|
+
ORDER BY id ASC
|
|
66
|
+
LIMIT ?`,
|
|
67
|
+
)
|
|
68
|
+
.all(limit) as CreatorContributionStagingRow[];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function queryCanonicalRecordsForStaging(db: Database): Record<string, unknown>[] {
|
|
72
|
+
const records: Record<string, unknown>[] = [];
|
|
73
|
+
|
|
74
|
+
const sessions = db
|
|
75
|
+
.query(
|
|
76
|
+
`SELECT session_id, started_at, ended_at, platform, model, completion_status,
|
|
77
|
+
source_session_kind, agent_cli, workspace_path, repo_remote, branch,
|
|
78
|
+
schema_version, normalized_at, normalizer_version, capture_mode, raw_source_ref
|
|
79
|
+
FROM sessions ORDER BY normalized_at`,
|
|
80
|
+
)
|
|
81
|
+
.all() as Array<Record<string, unknown>>;
|
|
82
|
+
const sessionById = new Map(sessions.map((session) => [session.session_id as string, session]));
|
|
83
|
+
|
|
84
|
+
for (const session of sessions) {
|
|
85
|
+
records.push({
|
|
86
|
+
record_kind: "session",
|
|
87
|
+
schema_version: session.schema_version ?? undefined,
|
|
88
|
+
normalizer_version: session.normalizer_version ?? undefined,
|
|
89
|
+
normalized_at: session.normalized_at ?? undefined,
|
|
90
|
+
platform: session.platform ?? undefined,
|
|
91
|
+
capture_mode: session.capture_mode ?? undefined,
|
|
92
|
+
raw_source_ref: safeParseJson(session.raw_source_ref as string | null) ?? undefined,
|
|
93
|
+
source_session_kind: session.source_session_kind ?? undefined,
|
|
94
|
+
session_id: session.session_id,
|
|
95
|
+
started_at: session.started_at ?? undefined,
|
|
96
|
+
ended_at: session.ended_at ?? undefined,
|
|
97
|
+
model: session.model ?? undefined,
|
|
98
|
+
completion_status: session.completion_status ?? undefined,
|
|
99
|
+
agent_cli: session.agent_cli ?? undefined,
|
|
100
|
+
workspace_path: session.workspace_path ?? undefined,
|
|
101
|
+
repo_remote: session.repo_remote ?? undefined,
|
|
102
|
+
branch: session.branch ?? undefined,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const prompts = db
|
|
107
|
+
.query(
|
|
108
|
+
`SELECT prompt_id, session_id, occurred_at, prompt_kind, is_actionable, prompt_index, prompt_text,
|
|
109
|
+
schema_version, platform, normalized_at, normalizer_version, capture_mode, raw_source_ref
|
|
110
|
+
FROM prompts ORDER BY occurred_at`,
|
|
111
|
+
)
|
|
112
|
+
.all() as Array<Record<string, unknown>>;
|
|
113
|
+
for (const prompt of prompts) {
|
|
114
|
+
const sessionEnvelope = sessionById.get(prompt.session_id as string);
|
|
115
|
+
records.push({
|
|
116
|
+
record_kind: "prompt",
|
|
117
|
+
schema_version: prompt.schema_version ?? sessionEnvelope?.schema_version ?? undefined,
|
|
118
|
+
normalizer_version:
|
|
119
|
+
prompt.normalizer_version ?? sessionEnvelope?.normalizer_version ?? undefined,
|
|
120
|
+
normalized_at: prompt.normalized_at ?? sessionEnvelope?.normalized_at ?? undefined,
|
|
121
|
+
platform: prompt.platform ?? sessionEnvelope?.platform ?? undefined,
|
|
122
|
+
capture_mode: prompt.capture_mode ?? sessionEnvelope?.capture_mode ?? undefined,
|
|
123
|
+
raw_source_ref:
|
|
124
|
+
safeParseJson(prompt.raw_source_ref as string | null) ??
|
|
125
|
+
safeParseJson(sessionEnvelope?.raw_source_ref as string | null) ??
|
|
126
|
+
undefined,
|
|
127
|
+
source_session_kind: sessionEnvelope?.source_session_kind ?? undefined,
|
|
128
|
+
session_id: prompt.session_id,
|
|
129
|
+
prompt_id: prompt.prompt_id,
|
|
130
|
+
occurred_at: prompt.occurred_at,
|
|
131
|
+
prompt_text: prompt.prompt_text,
|
|
132
|
+
prompt_kind: prompt.prompt_kind,
|
|
133
|
+
is_actionable: (prompt.is_actionable as number) === 1,
|
|
134
|
+
prompt_index: prompt.prompt_index ?? undefined,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const invocations = db
|
|
139
|
+
.query(
|
|
140
|
+
`SELECT skill_invocation_id, session_id, occurred_at, skill_name, skill_path, invocation_mode,
|
|
141
|
+
triggered, confidence, tool_name, matched_prompt_id, agent_type,
|
|
142
|
+
schema_version, platform, normalized_at, normalizer_version, capture_mode, raw_source_ref
|
|
143
|
+
FROM skill_invocations ORDER BY occurred_at`,
|
|
144
|
+
)
|
|
145
|
+
.all() as Array<Record<string, unknown>>;
|
|
146
|
+
for (const invocation of invocations) {
|
|
147
|
+
const sessionEnvelope = sessionById.get(invocation.session_id as string);
|
|
148
|
+
records.push({
|
|
149
|
+
record_kind: "skill_invocation",
|
|
150
|
+
schema_version: invocation.schema_version ?? sessionEnvelope?.schema_version ?? undefined,
|
|
151
|
+
normalizer_version:
|
|
152
|
+
invocation.normalizer_version ?? sessionEnvelope?.normalizer_version ?? undefined,
|
|
153
|
+
normalized_at: invocation.normalized_at ?? sessionEnvelope?.normalized_at ?? undefined,
|
|
154
|
+
platform: invocation.platform ?? sessionEnvelope?.platform ?? undefined,
|
|
155
|
+
capture_mode: invocation.capture_mode ?? sessionEnvelope?.capture_mode ?? undefined,
|
|
156
|
+
raw_source_ref:
|
|
157
|
+
safeParseJson(invocation.raw_source_ref as string | null) ??
|
|
158
|
+
safeParseJson(sessionEnvelope?.raw_source_ref as string | null) ??
|
|
159
|
+
undefined,
|
|
160
|
+
source_session_kind: sessionEnvelope?.source_session_kind ?? undefined,
|
|
161
|
+
session_id: invocation.session_id,
|
|
162
|
+
skill_invocation_id: invocation.skill_invocation_id,
|
|
163
|
+
occurred_at: invocation.occurred_at,
|
|
164
|
+
skill_name: invocation.skill_name,
|
|
165
|
+
skill_path: invocation.skill_path ?? undefined,
|
|
166
|
+
invocation_mode: invocation.invocation_mode,
|
|
167
|
+
triggered: (invocation.triggered as number) === 1,
|
|
168
|
+
confidence: invocation.confidence,
|
|
169
|
+
tool_name: invocation.tool_name ?? undefined,
|
|
170
|
+
matched_prompt_id: invocation.matched_prompt_id ?? undefined,
|
|
171
|
+
agent_type: invocation.agent_type ?? undefined,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const facts = db
|
|
176
|
+
.query(
|
|
177
|
+
`SELECT id AS execution_fact_id, session_id, occurred_at, prompt_id, tool_calls_json, total_tool_calls,
|
|
178
|
+
assistant_turns, errors_encountered, input_tokens, output_tokens,
|
|
179
|
+
duration_ms, completion_status,
|
|
180
|
+
schema_version, platform, normalized_at, normalizer_version, capture_mode, raw_source_ref
|
|
181
|
+
FROM execution_facts ORDER BY occurred_at`,
|
|
182
|
+
)
|
|
183
|
+
.all() as Array<Record<string, unknown>>;
|
|
184
|
+
for (const fact of facts) {
|
|
185
|
+
const sessionEnvelope = sessionById.get(fact.session_id as string);
|
|
186
|
+
records.push({
|
|
187
|
+
record_kind: "execution_fact",
|
|
188
|
+
schema_version: fact.schema_version ?? sessionEnvelope?.schema_version ?? undefined,
|
|
189
|
+
normalizer_version:
|
|
190
|
+
fact.normalizer_version ?? sessionEnvelope?.normalizer_version ?? undefined,
|
|
191
|
+
normalized_at: fact.normalized_at ?? sessionEnvelope?.normalized_at ?? undefined,
|
|
192
|
+
platform: fact.platform ?? sessionEnvelope?.platform ?? undefined,
|
|
193
|
+
capture_mode: fact.capture_mode ?? sessionEnvelope?.capture_mode ?? undefined,
|
|
194
|
+
raw_source_ref:
|
|
195
|
+
safeParseJson(fact.raw_source_ref as string | null) ??
|
|
196
|
+
safeParseJson(sessionEnvelope?.raw_source_ref as string | null) ??
|
|
197
|
+
undefined,
|
|
198
|
+
source_session_kind: sessionEnvelope?.source_session_kind ?? undefined,
|
|
199
|
+
session_id: fact.session_id,
|
|
200
|
+
execution_fact_id: String(fact.execution_fact_id),
|
|
201
|
+
occurred_at: fact.occurred_at,
|
|
202
|
+
prompt_id: fact.prompt_id ?? undefined,
|
|
203
|
+
tool_calls_json: safeParseJson(fact.tool_calls_json as string | null) ?? {},
|
|
204
|
+
total_tool_calls: fact.total_tool_calls,
|
|
205
|
+
assistant_turns: fact.assistant_turns,
|
|
206
|
+
errors_encountered: fact.errors_encountered,
|
|
207
|
+
input_tokens: fact.input_tokens ?? undefined,
|
|
208
|
+
output_tokens: fact.output_tokens ?? undefined,
|
|
209
|
+
duration_ms: fact.duration_ms ?? undefined,
|
|
210
|
+
completion_status: fact.completion_status ?? undefined,
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return records;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export function getLastUploadError(
|
|
218
|
+
db: Database,
|
|
219
|
+
): { last_error: string | null; updated_at: string } | null {
|
|
220
|
+
try {
|
|
221
|
+
const row = db
|
|
222
|
+
.query(
|
|
223
|
+
`SELECT last_error, updated_at
|
|
224
|
+
FROM upload_queue
|
|
225
|
+
WHERE status = 'failed'
|
|
226
|
+
ORDER BY updated_at DESC
|
|
227
|
+
LIMIT 1`,
|
|
228
|
+
)
|
|
229
|
+
.get() as { last_error: string | null; updated_at: string } | null;
|
|
230
|
+
return row ?? null;
|
|
231
|
+
} catch {
|
|
232
|
+
return null;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export function getLastUploadSuccess(db: Database): { updated_at: string } | null {
|
|
237
|
+
try {
|
|
238
|
+
const row = db
|
|
239
|
+
.query(
|
|
240
|
+
`SELECT updated_at
|
|
241
|
+
FROM upload_queue
|
|
242
|
+
WHERE status = 'sent'
|
|
243
|
+
ORDER BY updated_at DESC
|
|
244
|
+
LIMIT 1`,
|
|
245
|
+
)
|
|
246
|
+
.get() as { updated_at: string } | null;
|
|
247
|
+
return row ?? null;
|
|
248
|
+
} catch {
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export function getOldestPendingAge(db: Database): number | null {
|
|
254
|
+
try {
|
|
255
|
+
const row = db
|
|
256
|
+
.query(
|
|
257
|
+
`SELECT created_at
|
|
258
|
+
FROM upload_queue
|
|
259
|
+
WHERE status = 'pending'
|
|
260
|
+
ORDER BY created_at ASC
|
|
261
|
+
LIMIT 1`,
|
|
262
|
+
)
|
|
263
|
+
.get() as { created_at: string } | null;
|
|
264
|
+
if (!row) return null;
|
|
265
|
+
const ageMs = Date.now() - new Date(row.created_at).getTime();
|
|
266
|
+
return Math.floor(ageMs / 1000);
|
|
267
|
+
} catch {
|
|
268
|
+
return null;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
import type { Database } from "bun:sqlite";
|
|
2
|
+
|
|
3
|
+
import type { AttentionItem, AutonomousDecision, DecisionKind } from "../../dashboard-contract.js";
|
|
4
|
+
import { safeParseJson } from "./json.js";
|
|
5
|
+
import { getPendingProposals } from "./evolution.js";
|
|
6
|
+
|
|
7
|
+
export interface SkillTrustSummary {
|
|
8
|
+
skill_name: string;
|
|
9
|
+
total_checks: number;
|
|
10
|
+
triggered_count: number;
|
|
11
|
+
miss_rate: number;
|
|
12
|
+
system_like_count: number;
|
|
13
|
+
system_like_rate: number;
|
|
14
|
+
prompt_link_rate: number;
|
|
15
|
+
latest_action: string | null;
|
|
16
|
+
pass_rate: number;
|
|
17
|
+
last_seen: string | null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface TrustedSkillObservationRow {
|
|
21
|
+
skill_name: string;
|
|
22
|
+
session_id: string;
|
|
23
|
+
occurred_at: string | null;
|
|
24
|
+
triggered: number;
|
|
25
|
+
matched_prompt_id: string | null;
|
|
26
|
+
confidence: number | null;
|
|
27
|
+
invocation_mode: string | null;
|
|
28
|
+
query_text: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function queryTrustedSkillObservationRows(db: Database): TrustedSkillObservationRow[] {
|
|
32
|
+
const SYSTEM_LIKE_PREFIXES = ["<system_instruction>", "<system-instruction>", "<command-name>"];
|
|
33
|
+
const INTERNAL_EVAL_MARKERS = [
|
|
34
|
+
"you are an evaluation assistant",
|
|
35
|
+
"you are a skill description optimizer",
|
|
36
|
+
"would each query trigger this skill",
|
|
37
|
+
"propose an improved description",
|
|
38
|
+
"failure patterns:",
|
|
39
|
+
"output only valid json",
|
|
40
|
+
];
|
|
41
|
+
const isSystemLike = (text: string | null | undefined): boolean => {
|
|
42
|
+
if (!text) return false;
|
|
43
|
+
const trimmed = text.trimStart();
|
|
44
|
+
return SYSTEM_LIKE_PREFIXES.some((prefix) => trimmed.startsWith(prefix));
|
|
45
|
+
};
|
|
46
|
+
const isInternalSelftunePrompt = (
|
|
47
|
+
text: string | null | undefined,
|
|
48
|
+
promptKind: string | null | undefined,
|
|
49
|
+
): boolean => {
|
|
50
|
+
if (!text) return false;
|
|
51
|
+
const lowered = text.toLowerCase();
|
|
52
|
+
return (
|
|
53
|
+
promptKind === "meta" && INTERNAL_EVAL_MARKERS.some((marker) => lowered.includes(marker))
|
|
54
|
+
);
|
|
55
|
+
};
|
|
56
|
+
const isPollutingPrompt = (
|
|
57
|
+
text: string | null | undefined,
|
|
58
|
+
promptKind: string | null | undefined,
|
|
59
|
+
): boolean => isSystemLike(text) || isInternalSelftunePrompt(text, promptKind);
|
|
60
|
+
const classifyObservationKind = (
|
|
61
|
+
skillInvocationId: string,
|
|
62
|
+
captureMode: string | null,
|
|
63
|
+
triggered: number,
|
|
64
|
+
rawSourceRefJson: string | null,
|
|
65
|
+
): "canonical" | "repaired_trigger" | "repaired_contextual_miss" | "legacy_materialized" => {
|
|
66
|
+
if (skillInvocationId.includes(":su:")) return "legacy_materialized";
|
|
67
|
+
if (captureMode === "repair") {
|
|
68
|
+
const rawSourceRef = safeParseJson(rawSourceRefJson) as {
|
|
69
|
+
metadata?: { miss_type?: string };
|
|
70
|
+
} | null;
|
|
71
|
+
if (triggered === 0 && rawSourceRef?.metadata?.miss_type === "contextual_read") {
|
|
72
|
+
return "repaired_contextual_miss";
|
|
73
|
+
}
|
|
74
|
+
return "repaired_trigger";
|
|
75
|
+
}
|
|
76
|
+
return "canonical";
|
|
77
|
+
};
|
|
78
|
+
const normalizeQueryForGrouping = (query: string) =>
|
|
79
|
+
query.replace(/\s+/g, " ").trim().toLowerCase();
|
|
80
|
+
|
|
81
|
+
const rows = db
|
|
82
|
+
.query(
|
|
83
|
+
`SELECT
|
|
84
|
+
si.skill_name,
|
|
85
|
+
si.session_id,
|
|
86
|
+
si.occurred_at,
|
|
87
|
+
si.triggered,
|
|
88
|
+
si.matched_prompt_id,
|
|
89
|
+
si.confidence,
|
|
90
|
+
si.invocation_mode,
|
|
91
|
+
si.skill_invocation_id,
|
|
92
|
+
si.capture_mode,
|
|
93
|
+
si.raw_source_ref,
|
|
94
|
+
si.query,
|
|
95
|
+
p.prompt_text,
|
|
96
|
+
p.prompt_kind
|
|
97
|
+
FROM skill_invocations si
|
|
98
|
+
LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id`,
|
|
99
|
+
)
|
|
100
|
+
.all() as Array<{
|
|
101
|
+
skill_name: string;
|
|
102
|
+
session_id: string;
|
|
103
|
+
occurred_at: string | null;
|
|
104
|
+
triggered: number;
|
|
105
|
+
matched_prompt_id: string | null;
|
|
106
|
+
confidence: number | null;
|
|
107
|
+
invocation_mode: string | null;
|
|
108
|
+
skill_invocation_id: string;
|
|
109
|
+
capture_mode: string | null;
|
|
110
|
+
raw_source_ref: string | null;
|
|
111
|
+
query: string | null;
|
|
112
|
+
prompt_text: string | null;
|
|
113
|
+
prompt_kind: string | null;
|
|
114
|
+
}>;
|
|
115
|
+
|
|
116
|
+
const bySkill = new Map<
|
|
117
|
+
string,
|
|
118
|
+
Array<{
|
|
119
|
+
skill_name: string;
|
|
120
|
+
session_id: string;
|
|
121
|
+
occurred_at: string | null;
|
|
122
|
+
triggered: number;
|
|
123
|
+
matched_prompt_id: string | null;
|
|
124
|
+
confidence: number | null;
|
|
125
|
+
invocation_mode: string | null;
|
|
126
|
+
queryText: string;
|
|
127
|
+
observation_kind:
|
|
128
|
+
| "canonical"
|
|
129
|
+
| "repaired_trigger"
|
|
130
|
+
| "repaired_contextual_miss"
|
|
131
|
+
| "legacy_materialized";
|
|
132
|
+
groupKey: string;
|
|
133
|
+
}>
|
|
134
|
+
>();
|
|
135
|
+
const trustedRows: TrustedSkillObservationRow[] = [];
|
|
136
|
+
|
|
137
|
+
for (const row of rows) {
|
|
138
|
+
const queryText = row.query || row.prompt_text || "";
|
|
139
|
+
const pollutionText = row.prompt_text || row.query || "";
|
|
140
|
+
const observationKind = classifyObservationKind(
|
|
141
|
+
row.skill_invocation_id,
|
|
142
|
+
row.capture_mode,
|
|
143
|
+
row.triggered,
|
|
144
|
+
row.raw_source_ref,
|
|
145
|
+
);
|
|
146
|
+
if (isPollutingPrompt(pollutionText, row.prompt_kind)) continue;
|
|
147
|
+
if (observationKind === "legacy_materialized") continue;
|
|
148
|
+
|
|
149
|
+
const normalizedQuery = normalizeQueryForGrouping(queryText);
|
|
150
|
+
const groupKey =
|
|
151
|
+
normalizedQuery.length > 0
|
|
152
|
+
? `${row.session_id}::${normalizedQuery}`
|
|
153
|
+
: `${row.skill_invocation_id}`;
|
|
154
|
+
const observation = {
|
|
155
|
+
skill_name: row.skill_name,
|
|
156
|
+
session_id: row.session_id,
|
|
157
|
+
occurred_at: row.occurred_at,
|
|
158
|
+
triggered: row.triggered,
|
|
159
|
+
matched_prompt_id: row.matched_prompt_id,
|
|
160
|
+
confidence: row.confidence,
|
|
161
|
+
invocation_mode: row.invocation_mode,
|
|
162
|
+
queryText,
|
|
163
|
+
observation_kind: observationKind,
|
|
164
|
+
groupKey,
|
|
165
|
+
};
|
|
166
|
+
const existing = bySkill.get(row.skill_name);
|
|
167
|
+
if (existing) existing.push(observation);
|
|
168
|
+
else bySkill.set(row.skill_name, [observation]);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
for (const skillRows of bySkill.values()) {
|
|
172
|
+
const grouped = new Map<string, typeof skillRows>();
|
|
173
|
+
for (const row of skillRows) {
|
|
174
|
+
const existing = grouped.get(row.groupKey);
|
|
175
|
+
if (existing) existing.push(row);
|
|
176
|
+
else grouped.set(row.groupKey, [row]);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const deduped = [...grouped.values()].map((group) => {
|
|
180
|
+
const sorted = [...group].sort((a, b) => {
|
|
181
|
+
const aScore =
|
|
182
|
+
(a.triggered === 1 ? 100 : 0) +
|
|
183
|
+
(a.observation_kind === "canonical" ? 20 : 0) +
|
|
184
|
+
(a.observation_kind === "repaired_trigger" ? 15 : 0);
|
|
185
|
+
const bScore =
|
|
186
|
+
(b.triggered === 1 ? 100 : 0) +
|
|
187
|
+
(b.observation_kind === "canonical" ? 20 : 0) +
|
|
188
|
+
(b.observation_kind === "repaired_trigger" ? 15 : 0);
|
|
189
|
+
if (aScore !== bScore) return bScore - aScore;
|
|
190
|
+
return (b.occurred_at ?? "").localeCompare(a.occurred_at ?? "");
|
|
191
|
+
});
|
|
192
|
+
return sorted[0]!;
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
trustedRows.push(
|
|
196
|
+
...deduped.map((row) => ({
|
|
197
|
+
skill_name: row.skill_name,
|
|
198
|
+
session_id: row.session_id,
|
|
199
|
+
occurred_at: row.occurred_at,
|
|
200
|
+
triggered: row.triggered,
|
|
201
|
+
matched_prompt_id: row.matched_prompt_id,
|
|
202
|
+
confidence: row.confidence,
|
|
203
|
+
invocation_mode: row.invocation_mode,
|
|
204
|
+
query_text: row.queryText,
|
|
205
|
+
})),
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return trustedRows;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export function getSkillTrustSummaries(db: Database): SkillTrustSummary[] {
|
|
213
|
+
const rows = queryTrustedSkillObservationRows(db);
|
|
214
|
+
const auditRows = db
|
|
215
|
+
.query(
|
|
216
|
+
`SELECT skill_name, action, timestamp
|
|
217
|
+
FROM evolution_audit
|
|
218
|
+
WHERE skill_name IS NOT NULL
|
|
219
|
+
ORDER BY timestamp DESC`,
|
|
220
|
+
)
|
|
221
|
+
.all() as Array<{
|
|
222
|
+
skill_name: string | null;
|
|
223
|
+
action: string;
|
|
224
|
+
timestamp: string;
|
|
225
|
+
}>;
|
|
226
|
+
|
|
227
|
+
const latestActions = new Map<string, string>();
|
|
228
|
+
for (const row of auditRows) {
|
|
229
|
+
if (row.skill_name && !latestActions.has(row.skill_name)) {
|
|
230
|
+
latestActions.set(row.skill_name, row.action);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const rowsBySkill = new Map<string, typeof rows>();
|
|
235
|
+
for (const row of rows) {
|
|
236
|
+
const existing = rowsBySkill.get(row.skill_name);
|
|
237
|
+
if (existing) existing.push(row);
|
|
238
|
+
else rowsBySkill.set(row.skill_name, [row]);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const summaries: SkillTrustSummary[] = [];
|
|
242
|
+
for (const [skillName, skillRows] of rowsBySkill.entries()) {
|
|
243
|
+
const total = skillRows.length;
|
|
244
|
+
const triggered = skillRows.filter((row) => row.triggered === 1).length;
|
|
245
|
+
const promptLinked = skillRows.filter((row) => row.matched_prompt_id != null).length;
|
|
246
|
+
const lastSeen =
|
|
247
|
+
skillRows
|
|
248
|
+
.map((row) => row.occurred_at)
|
|
249
|
+
.filter((value): value is string => value != null)
|
|
250
|
+
.sort((a, b) => b.localeCompare(a))[0] ?? null;
|
|
251
|
+
|
|
252
|
+
summaries.push({
|
|
253
|
+
skill_name: skillName,
|
|
254
|
+
total_checks: total,
|
|
255
|
+
triggered_count: triggered,
|
|
256
|
+
miss_rate: total > 0 ? (total - triggered) / total : 0,
|
|
257
|
+
system_like_count: 0,
|
|
258
|
+
system_like_rate: 0,
|
|
259
|
+
prompt_link_rate: total > 0 ? promptLinked / total : 0,
|
|
260
|
+
latest_action: latestActions.get(skillName) ?? null,
|
|
261
|
+
pass_rate: total > 0 ? triggered / total : 0,
|
|
262
|
+
last_seen: lastSeen,
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
return summaries;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
export function getAttentionQueue(db: Database): AttentionItem[] {
|
|
270
|
+
const summaries = getSkillTrustSummaries(db);
|
|
271
|
+
const pending = getPendingProposals(db);
|
|
272
|
+
const pendingSkills = new Set(pending.map((proposal) => proposal.skill_name).filter(Boolean));
|
|
273
|
+
|
|
274
|
+
const items: AttentionItem[] = [];
|
|
275
|
+
|
|
276
|
+
for (const summary of summaries) {
|
|
277
|
+
if (summary.latest_action === "rolled_back") {
|
|
278
|
+
items.push({
|
|
279
|
+
skill_name: summary.skill_name,
|
|
280
|
+
category: "needs_review",
|
|
281
|
+
severity: "critical",
|
|
282
|
+
reason: "Rolled back after deployment",
|
|
283
|
+
recommended_action: "Review rollback evidence and decide whether to re-evolve",
|
|
284
|
+
timestamp: summary.last_seen ?? "",
|
|
285
|
+
});
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if (pendingSkills.has(summary.skill_name)) {
|
|
290
|
+
items.push({
|
|
291
|
+
skill_name: summary.skill_name,
|
|
292
|
+
category: "needs_review",
|
|
293
|
+
severity: "info",
|
|
294
|
+
reason: "Proposal awaiting review",
|
|
295
|
+
recommended_action: "Review and approve or reject the pending proposal",
|
|
296
|
+
timestamp: summary.last_seen ?? "",
|
|
297
|
+
});
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (summary.total_checks < 5) continue;
|
|
302
|
+
|
|
303
|
+
if (summary.miss_rate > 0.1) {
|
|
304
|
+
items.push({
|
|
305
|
+
skill_name: summary.skill_name,
|
|
306
|
+
category: "regression",
|
|
307
|
+
severity: "warning",
|
|
308
|
+
reason: `High miss rate (${Math.round(summary.miss_rate * 100)}%)`,
|
|
309
|
+
recommended_action: "Review missed invocations and consider evolving the skill description",
|
|
310
|
+
timestamp: summary.last_seen ?? "",
|
|
311
|
+
});
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
if (summary.system_like_rate > 0.1) {
|
|
316
|
+
items.push({
|
|
317
|
+
skill_name: summary.skill_name,
|
|
318
|
+
category: "polluted",
|
|
319
|
+
severity: "warning",
|
|
320
|
+
reason: `Possible telemetry pollution (${Math.round(summary.system_like_rate * 100)}% system-like)`,
|
|
321
|
+
recommended_action: "Inspect prompts for system-injected noise",
|
|
322
|
+
timestamp: summary.last_seen ?? "",
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return items;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
export function getRecentDecisions(db: Database, limit = 20): AutonomousDecision[] {
|
|
331
|
+
const rows = db
|
|
332
|
+
.query(
|
|
333
|
+
`SELECT timestamp, proposal_id, skill_name, action, details, eval_snapshot_json
|
|
334
|
+
FROM evolution_audit
|
|
335
|
+
WHERE timestamp >= datetime('now', '-7 days')
|
|
336
|
+
ORDER BY timestamp DESC
|
|
337
|
+
LIMIT ?`,
|
|
338
|
+
)
|
|
339
|
+
.all(limit) as Array<{
|
|
340
|
+
timestamp: string;
|
|
341
|
+
proposal_id: string;
|
|
342
|
+
skill_name: string | null;
|
|
343
|
+
action: string;
|
|
344
|
+
details: string;
|
|
345
|
+
eval_snapshot_json: string | null;
|
|
346
|
+
}>;
|
|
347
|
+
|
|
348
|
+
return rows
|
|
349
|
+
.filter((row) => row.skill_name != null)
|
|
350
|
+
.flatMap((row) => {
|
|
351
|
+
const evalSnapshot = safeParseJson(row.eval_snapshot_json) as {
|
|
352
|
+
regressions?: unknown[];
|
|
353
|
+
} | null;
|
|
354
|
+
|
|
355
|
+
let kind: DecisionKind | null;
|
|
356
|
+
switch (row.action) {
|
|
357
|
+
case "proposed":
|
|
358
|
+
case "created":
|
|
359
|
+
kind = "proposal_created";
|
|
360
|
+
break;
|
|
361
|
+
case "rejected":
|
|
362
|
+
kind = "proposal_rejected";
|
|
363
|
+
break;
|
|
364
|
+
case "validated":
|
|
365
|
+
kind =
|
|
366
|
+
evalSnapshot?.regressions && evalSnapshot.regressions.length > 0
|
|
367
|
+
? "validation_failed"
|
|
368
|
+
: "proposal_created";
|
|
369
|
+
break;
|
|
370
|
+
case "deployed":
|
|
371
|
+
kind = "proposal_deployed";
|
|
372
|
+
break;
|
|
373
|
+
case "rolled_back":
|
|
374
|
+
kind = "rollback_triggered";
|
|
375
|
+
break;
|
|
376
|
+
default:
|
|
377
|
+
kind = null;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
if (!kind) return [];
|
|
381
|
+
|
|
382
|
+
return [
|
|
383
|
+
{
|
|
384
|
+
timestamp: row.timestamp,
|
|
385
|
+
kind,
|
|
386
|
+
skill_name: row.skill_name!,
|
|
387
|
+
proposal_id: row.proposal_id,
|
|
388
|
+
summary: row.details ?? "",
|
|
389
|
+
},
|
|
390
|
+
];
|
|
391
|
+
});
|
|
392
|
+
}
|