selftune 0.2.18 → 0.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +9 -4
  2. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +60 -0
  3. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
  6. package/apps/local-dashboard/dist/index.html +5 -5
  7. package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
  8. package/cli/selftune/constants.ts +10 -0
  9. package/cli/selftune/contribute/contribute.ts +30 -2
  10. package/cli/selftune/contribution-config.ts +249 -0
  11. package/cli/selftune/contribution-relay.ts +177 -0
  12. package/cli/selftune/contribution-signals.ts +219 -0
  13. package/cli/selftune/contribution-staging.ts +147 -0
  14. package/cli/selftune/contributions.ts +532 -0
  15. package/cli/selftune/creator-contributions.ts +333 -0
  16. package/cli/selftune/dashboard-contract.ts +209 -1
  17. package/cli/selftune/dashboard-server.ts +45 -11
  18. package/cli/selftune/eval/family-overlap.ts +714 -0
  19. package/cli/selftune/eval/hooks-to-evals.ts +182 -28
  20. package/cli/selftune/eval/synthetic-evals.ts +298 -11
  21. package/cli/selftune/evolution/evidence.ts +5 -0
  22. package/cli/selftune/evolution/evolve-body.ts +62 -2
  23. package/cli/selftune/evolution/evolve.ts +58 -1
  24. package/cli/selftune/evolution/validate-body.ts +10 -0
  25. package/cli/selftune/evolution/validate-host-replay.ts +236 -0
  26. package/cli/selftune/evolution/validate-proposal.ts +10 -0
  27. package/cli/selftune/evolution/validate-routing.ts +112 -5
  28. package/cli/selftune/export.ts +2 -2
  29. package/cli/selftune/index.ts +41 -5
  30. package/cli/selftune/ingestors/codex-rollout.ts +31 -35
  31. package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
  32. package/cli/selftune/localdb/db.ts +2 -2
  33. package/cli/selftune/localdb/direct-write.ts +8 -3
  34. package/cli/selftune/localdb/materialize.ts +7 -2
  35. package/cli/selftune/localdb/queries.ts +712 -31
  36. package/cli/selftune/localdb/schema.ts +30 -1
  37. package/cli/selftune/recover.ts +153 -0
  38. package/cli/selftune/repair/skill-usage.ts +363 -4
  39. package/cli/selftune/routes/actions.ts +35 -1
  40. package/cli/selftune/routes/analytics.ts +14 -0
  41. package/cli/selftune/routes/index.ts +1 -0
  42. package/cli/selftune/routes/overview.ts +112 -4
  43. package/cli/selftune/routes/skill-report.ts +575 -11
  44. package/cli/selftune/status.ts +81 -2
  45. package/cli/selftune/sync.ts +56 -2
  46. package/cli/selftune/trust-model.ts +66 -0
  47. package/cli/selftune/types.ts +103 -0
  48. package/cli/selftune/utils/skill-detection.ts +43 -0
  49. package/cli/selftune/utils/text-similarity.ts +73 -0
  50. package/cli/selftune/watchlist.ts +65 -0
  51. package/package.json +1 -1
  52. package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
  53. package/packages/ui/src/components/EvidenceViewer.tsx +419 -145
  54. package/packages/ui/src/components/EvolutionTimeline.tsx +81 -29
  55. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
  56. package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
  57. package/packages/ui/src/components/section-cards.tsx +12 -9
  58. package/packages/ui/src/primitives/card.tsx +1 -1
  59. package/packages/ui/src/types.ts +4 -0
  60. package/skill/SKILL.md +11 -1
  61. package/skill/Workflows/AlphaUpload.md +4 -0
  62. package/skill/Workflows/Composability.md +78 -0
  63. package/skill/Workflows/Contribute.md +6 -3
  64. package/skill/Workflows/Contributions.md +97 -0
  65. package/skill/Workflows/CreatorContributions.md +74 -0
  66. package/skill/Workflows/Dashboard.md +31 -0
  67. package/skill/Workflows/Evals.md +57 -8
  68. package/skill/Workflows/Evolve.md +23 -0
  69. package/skill/Workflows/Ingest.md +7 -0
  70. package/skill/Workflows/Initialize.md +20 -1
  71. package/skill/Workflows/Recover.md +84 -0
  72. package/skill/Workflows/RepairSkillUsage.md +12 -4
  73. package/skill/Workflows/Sync.md +18 -12
  74. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
  75. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
  76. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
  77. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
@@ -8,8 +8,12 @@
8
8
  import type { Database } from "bun:sqlite";
9
9
 
10
10
  import type {
11
+ AnalyticsResponse,
12
+ AttentionItem,
13
+ AutonomousDecision,
11
14
  CommitRecord,
12
15
  CommitSummary,
16
+ DecisionKind,
13
17
  ExecutionMetrics,
14
18
  OrchestrateRunReport,
15
19
  OverviewPaginatedPayload,
@@ -636,7 +640,43 @@ function paginateSkillReportInvocations(
636
640
  * Get a summary list of all skills with aggregated stats.
637
641
  */
638
642
  export function getSkillsList(db: Database): SkillSummary[] {
639
- const rows = db
643
+ const trustedRows = queryTrustedSkillObservationRows(db);
644
+ const bySkill = new Map<
645
+ string,
646
+ Array<{
647
+ skill_name: string;
648
+ session_id: string;
649
+ occurred_at: string | null;
650
+ triggered: number;
651
+ matched_prompt_id: string | null;
652
+ confidence: number | null;
653
+ }>
654
+ >();
655
+
656
+ for (const row of trustedRows) {
657
+ const arr = bySkill.get(row.skill_name);
658
+ const base = {
659
+ skill_name: row.skill_name,
660
+ session_id: row.session_id,
661
+ occurred_at: row.occurred_at,
662
+ triggered: row.triggered,
663
+ matched_prompt_id: row.matched_prompt_id,
664
+ confidence: row.confidence,
665
+ };
666
+ if (arr) arr.push(base);
667
+ else bySkill.set(row.skill_name, [base]);
668
+ }
669
+
670
+ // Get set of skill names with evidence
671
+ const evidenceSkills = new Set(
672
+ (
673
+ db.query(`SELECT DISTINCT skill_name FROM evolution_evidence`).all() as Array<{
674
+ skill_name: string;
675
+ }>
676
+ ).map((r) => r.skill_name),
677
+ );
678
+
679
+ const skillScopeRows = db
640
680
  .query(
641
681
  `SELECT
642
682
  si.skill_name,
@@ -647,43 +687,203 @@ export function getSkillsList(db: Database): SkillSummary[] {
647
687
  (SELECT su.skill_scope FROM skill_usage su
648
688
  WHERE su.skill_name = si.skill_name AND su.skill_scope IS NOT NULL
649
689
  ORDER BY su.timestamp DESC LIMIT 1)
650
- ) as skill_scope,
651
- COUNT(*) as total_checks,
652
- SUM(CASE WHEN si.triggered = 1 THEN 1 ELSE 0 END) as triggered_count,
653
- COUNT(DISTINCT si.session_id) as unique_sessions,
654
- MAX(si.occurred_at) as last_seen
690
+ ) as skill_scope
655
691
  FROM skill_invocations si
656
- GROUP BY si.skill_name
657
- ORDER BY total_checks DESC`,
692
+ GROUP BY si.skill_name`,
658
693
  )
659
- .all() as Array<{
660
- skill_name: string;
661
- skill_scope: string | null;
662
- total_checks: number;
663
- triggered_count: number;
664
- unique_sessions: number;
665
- last_seen: string | null;
666
- }>;
694
+ .all() as Array<{ skill_name: string; skill_scope: string | null }>;
695
+ const scopeBySkill = new Map(skillScopeRows.map((row) => [row.skill_name, row.skill_scope]));
696
+
697
+ return [...bySkill.entries()]
698
+ .map(([skillName, rows]) => {
699
+ const totalChecks = rows.length;
700
+ const triggeredCount = rows.filter((row) => row.triggered === 1).length;
701
+ const uniqueSessions = new Set(rows.map((row) => row.session_id)).size;
702
+ const lastSeen =
703
+ rows
704
+ .map((row) => row.occurred_at)
705
+ .filter((value): value is string => value != null)
706
+ .sort((a, b) => b.localeCompare(a))[0] ?? null;
707
+ const withConfidence = rows.filter((row) => row.confidence != null);
708
+ const routingConfidence =
709
+ withConfidence.length > 0
710
+ ? withConfidence.reduce((sum, row) => sum + (row.confidence ?? 0), 0) /
711
+ withConfidence.length
712
+ : null;
713
+
714
+ return {
715
+ skill_name: skillName,
716
+ skill_scope: scopeBySkill.get(skillName) ?? null,
717
+ total_checks: totalChecks,
718
+ triggered_count: triggeredCount,
719
+ pass_rate: totalChecks > 0 ? triggeredCount / totalChecks : 0,
720
+ unique_sessions: uniqueSessions,
721
+ last_seen: lastSeen,
722
+ has_evidence: evidenceSkills.has(skillName),
723
+ routing_confidence: routingConfidence,
724
+ confidence_coverage: totalChecks > 0 ? withConfidence.length / totalChecks : 0,
725
+ };
726
+ })
727
+ .sort((a, b) => b.total_checks - a.total_checks);
728
+ }
667
729
 
668
- // Get set of skill names with evidence
669
- const evidenceSkills = new Set(
670
- (
671
- db.query(`SELECT DISTINCT skill_name FROM evolution_evidence`).all() as Array<{
672
- skill_name: string;
673
- }>
674
- ).map((r) => r.skill_name),
675
- );
730
+ /**
731
+ * Build the performance analytics payload from SQLite.
732
+ * Powers the GET /api/v2/analytics endpoint.
733
+ */
734
+ export function getAnalyticsPayload(db: Database): AnalyticsResponse {
735
+ const trustedRows = queryTrustedSkillObservationRows(db);
736
+ const today = new Date();
737
+ const dateKey = (value: string | null): string | null => {
738
+ if (!value) return null;
739
+ const parsed = new Date(value);
740
+ return Number.isNaN(parsed.getTime()) ? null : parsed.toISOString().slice(0, 10);
741
+ };
742
+ const cutoffDate = (days: number): string => {
743
+ const cutoff = new Date(today);
744
+ cutoff.setUTCDate(cutoff.getUTCDate() - days);
745
+ return cutoff.toISOString().slice(0, 10);
746
+ };
676
747
 
677
- return rows.map((row) => ({
748
+ // 1. Pass rate trend — last 90 days, bucketed by day
749
+ const passRateTrendByDate = new Map<string, { triggered: number; total: number }>();
750
+ for (const row of trustedRows) {
751
+ const occurredDate = dateKey(row.occurred_at);
752
+ if (!occurredDate || occurredDate < cutoffDate(90)) continue;
753
+ const counts = passRateTrendByDate.get(occurredDate) ?? { triggered: 0, total: 0 };
754
+ counts.total += 1;
755
+ if (row.triggered === 1) counts.triggered += 1;
756
+ passRateTrendByDate.set(occurredDate, counts);
757
+ }
758
+ const passRateTrendRows = [...passRateTrendByDate.entries()]
759
+ .map(([date, counts]) => ({
760
+ date,
761
+ pass_rate: counts.total > 0 ? counts.triggered / counts.total : 0,
762
+ total_checks: counts.total,
763
+ }))
764
+ .sort((a, b) => a.date.localeCompare(b.date));
765
+
766
+ const pass_rate_trend = passRateTrendRows.map((row) => ({
767
+ date: row.date,
768
+ pass_rate: row.pass_rate,
769
+ total_checks: row.total_checks,
770
+ }));
771
+
772
+ // 2. Skill rankings — all skills with at least 1 check, ordered by pass rate
773
+ const skillRankingMap = new Map<string, { triggered_count: number; total_checks: number }>();
774
+ for (const row of trustedRows) {
775
+ const counts = skillRankingMap.get(row.skill_name) ?? { triggered_count: 0, total_checks: 0 };
776
+ counts.total_checks += 1;
777
+ if (row.triggered === 1) counts.triggered_count += 1;
778
+ skillRankingMap.set(row.skill_name, counts);
779
+ }
780
+ const skillRankingRows = [...skillRankingMap.entries()]
781
+ .map(([skill_name, counts]) => ({
782
+ skill_name,
783
+ pass_rate: counts.total_checks > 0 ? counts.triggered_count / counts.total_checks : 0,
784
+ total_checks: counts.total_checks,
785
+ triggered_count: counts.triggered_count,
786
+ }))
787
+ .sort(
788
+ (a, b) =>
789
+ b.pass_rate - a.pass_rate ||
790
+ b.total_checks - a.total_checks ||
791
+ a.skill_name.localeCompare(b.skill_name),
792
+ );
793
+
794
+ const skill_rankings = skillRankingRows.map((row) => ({
678
795
  skill_name: row.skill_name,
679
- skill_scope: row.skill_scope,
796
+ pass_rate: row.pass_rate,
680
797
  total_checks: row.total_checks,
681
798
  triggered_count: row.triggered_count,
682
- pass_rate: row.total_checks > 0 ? row.triggered_count / row.total_checks : 0,
683
- unique_sessions: row.unique_sessions,
684
- last_seen: row.last_seen,
685
- has_evidence: evidenceSkills.has(row.skill_name),
686
799
  }));
800
+
801
+ // 3. Daily activity — last 84 days (12 weeks) for heatmap
802
+ const dailyActivityByDate = new Map<string, number>();
803
+ for (const row of trustedRows) {
804
+ const occurredDate = dateKey(row.occurred_at);
805
+ if (!occurredDate || occurredDate < cutoffDate(84)) continue;
806
+ dailyActivityByDate.set(occurredDate, (dailyActivityByDate.get(occurredDate) ?? 0) + 1);
807
+ }
808
+ const dailyActivityRows = [...dailyActivityByDate.entries()]
809
+ .map(([date, checks]) => ({ date, checks }))
810
+ .sort((a, b) => a.date.localeCompare(b.date));
811
+
812
+ const daily_activity = dailyActivityRows.map((row) => ({
813
+ date: row.date,
814
+ checks: row.checks,
815
+ }));
816
+
817
+ // 4. Evolution impact — before/after pass rates for deployed evolutions
818
+ const deployedRows = db
819
+ .query(
820
+ `SELECT ea.skill_name, ea.proposal_id, ea.timestamp as deployed_at
821
+ FROM evolution_audit ea
822
+ WHERE ea.action = 'deployed' AND ea.skill_name IS NOT NULL
823
+ ORDER BY ea.timestamp DESC`,
824
+ )
825
+ .all() as Array<{ skill_name: string; proposal_id: string; deployed_at: string }>;
826
+
827
+ const evolution_impact: AnalyticsResponse["evolution_impact"] = [];
828
+ for (const deploy of deployedRows) {
829
+ const beforeRows = trustedRows.filter(
830
+ (row) => row.skill_name === deploy.skill_name && (row.occurred_at ?? "") < deploy.deployed_at,
831
+ );
832
+ const afterRows = trustedRows.filter(
833
+ (row) =>
834
+ row.skill_name === deploy.skill_name && (row.occurred_at ?? "") >= deploy.deployed_at,
835
+ );
836
+
837
+ evolution_impact.push({
838
+ skill_name: deploy.skill_name,
839
+ proposal_id: deploy.proposal_id,
840
+ deployed_at: deploy.deployed_at,
841
+ pass_rate_before:
842
+ beforeRows.length > 0
843
+ ? beforeRows.filter((row) => row.triggered === 1).length / beforeRows.length
844
+ : 0,
845
+ pass_rate_after:
846
+ afterRows.length > 0
847
+ ? afterRows.filter((row) => row.triggered === 1).length / afterRows.length
848
+ : 0,
849
+ });
850
+ }
851
+
852
+ // 5. Summary aggregates
853
+ const totalEvolutionsRow = db
854
+ .query(`SELECT COUNT(*) as c FROM evolution_audit WHERE action = 'deployed'`)
855
+ .get() as { c: number } | null;
856
+
857
+ const checks30dRows = trustedRows.filter((row) => {
858
+ const occurredDate = dateKey(row.occurred_at);
859
+ return occurredDate != null && occurredDate >= cutoffDate(30);
860
+ });
861
+ const activeSkills30d = new Set(checks30dRows.map((row) => row.skill_name));
862
+
863
+ // Average improvement across all deployed evolutions
864
+ let avgImprovement = 0;
865
+ if (evolution_impact.length > 0) {
866
+ const totalImprovement = evolution_impact.reduce(
867
+ (sum, e) => sum + (e.pass_rate_after - e.pass_rate_before),
868
+ 0,
869
+ );
870
+ avgImprovement = totalImprovement / evolution_impact.length;
871
+ }
872
+
873
+ const summary: AnalyticsResponse["summary"] = {
874
+ total_evolutions: totalEvolutionsRow?.c ?? 0,
875
+ avg_improvement: avgImprovement,
876
+ total_checks_30d: checks30dRows.length,
877
+ active_skills: activeSkills30d.size,
878
+ };
879
+
880
+ return {
881
+ pass_rate_trend,
882
+ skill_rankings,
883
+ daily_activity,
884
+ evolution_impact,
885
+ summary,
886
+ };
687
887
  }
688
888
 
689
889
  /**
@@ -930,6 +1130,10 @@ export function queryEvolutionAudit(
930
1130
  action: string;
931
1131
  details: string;
932
1132
  eval_snapshot?: Record<string, unknown>;
1133
+ validation_mode?: string;
1134
+ validation_agent?: string;
1135
+ validation_fixture_id?: string;
1136
+ validation_evidence_ref?: string;
933
1137
  }> {
934
1138
  const sql = skillName
935
1139
  ? `SELECT * FROM evolution_audit
@@ -943,12 +1147,18 @@ export function queryEvolutionAudit(
943
1147
  return rows.map((r) => ({
944
1148
  timestamp: r.timestamp as string,
945
1149
  proposal_id: r.proposal_id as string,
946
- skill_name: r.skill_name as string | undefined,
1150
+ skill_name: typeof r.skill_name === "string" ? r.skill_name : undefined,
947
1151
  action: r.action as string,
948
1152
  details: r.details as string,
949
1153
  eval_snapshot: r.eval_snapshot_json
950
1154
  ? (safeParseJson(r.eval_snapshot_json as string) as Record<string, unknown>)
951
1155
  : undefined,
1156
+ validation_mode: typeof r.validation_mode === "string" ? r.validation_mode : undefined,
1157
+ validation_agent: typeof r.validation_agent === "string" ? r.validation_agent : undefined,
1158
+ validation_fixture_id:
1159
+ typeof r.validation_fixture_id === "string" ? r.validation_fixture_id : undefined,
1160
+ validation_evidence_ref:
1161
+ typeof r.validation_evidence_ref === "string" ? r.validation_evidence_ref : undefined,
952
1162
  }));
953
1163
  }
954
1164
 
@@ -1087,6 +1297,72 @@ export function queryGradingResults(db: Database): Array<{
1087
1297
  }>;
1088
1298
  }
1089
1299
 
1300
+ export function getCreatorContributionStagingCounts(db: Database): Array<{
1301
+ skill_name: string;
1302
+ pending_count: number;
1303
+ }> {
1304
+ return db
1305
+ .query(
1306
+ `SELECT skill_name, COUNT(*) AS pending_count
1307
+ FROM creator_contribution_staging
1308
+ WHERE status = 'pending'
1309
+ GROUP BY skill_name
1310
+ ORDER BY skill_name`,
1311
+ )
1312
+ .all() as Array<{
1313
+ skill_name: string;
1314
+ pending_count: number;
1315
+ }>;
1316
+ }
1317
+
1318
+ export interface CreatorContributionRelayStats {
1319
+ pending: number;
1320
+ sending: number;
1321
+ sent: number;
1322
+ failed: number;
1323
+ }
1324
+
1325
+ export interface CreatorContributionStagingRow {
1326
+ id: number;
1327
+ dedupe_key: string;
1328
+ skill_name: string;
1329
+ creator_id: string;
1330
+ payload_json: string;
1331
+ status: string;
1332
+ staged_at: string;
1333
+ updated_at: string;
1334
+ last_error: string | null;
1335
+ }
1336
+
1337
+ export function getCreatorContributionRelayStats(db: Database): CreatorContributionRelayStats {
1338
+ const row = db
1339
+ .query(
1340
+ `SELECT
1341
+ COALESCE(SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END), 0) AS pending,
1342
+ COALESCE(SUM(CASE WHEN status = 'sending' THEN 1 ELSE 0 END), 0) AS sending,
1343
+ COALESCE(SUM(CASE WHEN status = 'sent' THEN 1 ELSE 0 END), 0) AS sent,
1344
+ COALESCE(SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END), 0) AS failed
1345
+ FROM creator_contribution_staging`,
1346
+ )
1347
+ .get() as CreatorContributionRelayStats | null;
1348
+ return row ?? { pending: 0, sending: 0, sent: 0, failed: 0 };
1349
+ }
1350
+
1351
+ export function getPendingCreatorContributionRows(
1352
+ db: Database,
1353
+ limit = 50,
1354
+ ): CreatorContributionStagingRow[] {
1355
+ return db
1356
+ .query(
1357
+ `SELECT id, dedupe_key, skill_name, creator_id, payload_json, status, staged_at, updated_at, last_error
1358
+ FROM creator_contribution_staging
1359
+ WHERE status = 'pending'
1360
+ ORDER BY id ASC
1361
+ LIMIT ?`,
1362
+ )
1363
+ .all(limit) as CreatorContributionStagingRow[];
1364
+ }
1365
+
1090
1366
  // -- Canonical record staging query -------------------------------------------
1091
1367
 
1092
1368
  /**
@@ -1461,6 +1737,411 @@ export function getSkillCommitSummary(db: Database, skillName: string): CommitSu
1461
1737
 
1462
1738
  // -- Helpers ------------------------------------------------------------------
1463
1739
 
1740
+ // -- Autonomy-first dashboard queries -----------------------------------------
1741
+
1742
+ export interface SkillTrustSummary {
1743
+ skill_name: string;
1744
+ total_checks: number;
1745
+ triggered_count: number;
1746
+ miss_rate: number;
1747
+ system_like_count: number;
1748
+ system_like_rate: number;
1749
+ prompt_link_rate: number;
1750
+ latest_action: string | null;
1751
+ pass_rate: number;
1752
+ last_seen: string | null;
1753
+ }
1754
+
1755
+ export interface TrustedSkillObservationRow {
1756
+ skill_name: string;
1757
+ session_id: string;
1758
+ occurred_at: string | null;
1759
+ triggered: number;
1760
+ matched_prompt_id: string | null;
1761
+ confidence: number | null;
1762
+ invocation_mode: string | null;
1763
+ query_text: string;
1764
+ }
1765
+
1766
+ export function queryTrustedSkillObservationRows(db: Database): TrustedSkillObservationRow[] {
1767
+ const SYSTEM_LIKE_PREFIXES = ["<system_instruction>", "<system-instruction>", "<command-name>"];
1768
+ const INTERNAL_EVAL_MARKERS = [
1769
+ "you are an evaluation assistant",
1770
+ "you are a skill description optimizer",
1771
+ "would each query trigger this skill",
1772
+ "propose an improved description",
1773
+ "failure patterns:",
1774
+ "output only valid json",
1775
+ ];
1776
+ const isSystemLike = (text: string | null | undefined): boolean => {
1777
+ if (!text) return false;
1778
+ const trimmed = text.trimStart();
1779
+ return SYSTEM_LIKE_PREFIXES.some((p) => trimmed.startsWith(p));
1780
+ };
1781
+ const isInternalSelftunePrompt = (
1782
+ text: string | null | undefined,
1783
+ promptKind: string | null | undefined,
1784
+ ): boolean => {
1785
+ if (!text) return false;
1786
+ const lowered = text.toLowerCase();
1787
+ return (
1788
+ promptKind === "meta" && INTERNAL_EVAL_MARKERS.some((marker) => lowered.includes(marker))
1789
+ );
1790
+ };
1791
+ const isPollutingPrompt = (
1792
+ text: string | null | undefined,
1793
+ promptKind: string | null | undefined,
1794
+ ): boolean => isSystemLike(text) || isInternalSelftunePrompt(text, promptKind);
1795
+ const classifyObservationKind = (
1796
+ skillInvocationId: string,
1797
+ captureMode: string | null,
1798
+ triggered: number,
1799
+ rawSourceRefJson: string | null,
1800
+ ): "canonical" | "repaired_trigger" | "repaired_contextual_miss" | "legacy_materialized" => {
1801
+ if (skillInvocationId.includes(":su:")) return "legacy_materialized";
1802
+ if (captureMode === "repair") {
1803
+ const rawSourceRef = safeParseJson(rawSourceRefJson) as {
1804
+ metadata?: { miss_type?: string };
1805
+ } | null;
1806
+ if (triggered === 0 && rawSourceRef?.metadata?.miss_type === "contextual_read") {
1807
+ return "repaired_contextual_miss";
1808
+ }
1809
+ return "repaired_trigger";
1810
+ }
1811
+ return "canonical";
1812
+ };
1813
+ const normalizeQueryForGrouping = (query: string) =>
1814
+ query.replace(/\s+/g, " ").trim().toLowerCase();
1815
+
1816
+ const rows = db
1817
+ .query(
1818
+ `SELECT
1819
+ si.skill_name,
1820
+ si.session_id,
1821
+ si.occurred_at,
1822
+ si.triggered,
1823
+ si.matched_prompt_id,
1824
+ si.confidence,
1825
+ si.invocation_mode,
1826
+ si.skill_invocation_id,
1827
+ si.capture_mode,
1828
+ si.raw_source_ref,
1829
+ si.query,
1830
+ p.prompt_text,
1831
+ p.prompt_kind
1832
+ FROM skill_invocations si
1833
+ LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id`,
1834
+ )
1835
+ .all() as Array<{
1836
+ skill_name: string;
1837
+ session_id: string;
1838
+ occurred_at: string | null;
1839
+ triggered: number;
1840
+ matched_prompt_id: string | null;
1841
+ confidence: number | null;
1842
+ invocation_mode: string | null;
1843
+ skill_invocation_id: string;
1844
+ capture_mode: string | null;
1845
+ raw_source_ref: string | null;
1846
+ query: string | null;
1847
+ prompt_text: string | null;
1848
+ prompt_kind: string | null;
1849
+ }>;
1850
+
1851
+ const bySkill = new Map<
1852
+ string,
1853
+ Array<{
1854
+ skill_name: string;
1855
+ session_id: string;
1856
+ occurred_at: string | null;
1857
+ triggered: number;
1858
+ matched_prompt_id: string | null;
1859
+ confidence: number | null;
1860
+ invocation_mode: string | null;
1861
+ queryText: string;
1862
+ isPolluting: boolean;
1863
+ observation_kind:
1864
+ | "canonical"
1865
+ | "repaired_trigger"
1866
+ | "repaired_contextual_miss"
1867
+ | "legacy_materialized";
1868
+ groupKey: string;
1869
+ }>
1870
+ >();
1871
+ const trustedRows: Array<{
1872
+ skill_name: string;
1873
+ session_id: string;
1874
+ occurred_at: string | null;
1875
+ triggered: number;
1876
+ matched_prompt_id: string | null;
1877
+ confidence: number | null;
1878
+ invocation_mode: string | null;
1879
+ query_text: string;
1880
+ }> = [];
1881
+
1882
+ for (const row of rows) {
1883
+ const queryText = row.query || row.prompt_text || "";
1884
+ const pollutionText = row.prompt_text || row.query || "";
1885
+ const observation_kind = classifyObservationKind(
1886
+ row.skill_invocation_id,
1887
+ row.capture_mode,
1888
+ row.triggered,
1889
+ row.raw_source_ref,
1890
+ );
1891
+ if (isPollutingPrompt(pollutionText, row.prompt_kind)) continue;
1892
+ if (observation_kind === "legacy_materialized") continue;
1893
+
1894
+ const normalizedQuery = normalizeQueryForGrouping(queryText);
1895
+ const groupKey =
1896
+ normalizedQuery.length > 0
1897
+ ? `${row.session_id}::${normalizedQuery}`
1898
+ : `${row.skill_invocation_id}`;
1899
+ const arr = bySkill.get(row.skill_name);
1900
+ const enriched = {
1901
+ skill_name: row.skill_name,
1902
+ session_id: row.session_id,
1903
+ occurred_at: row.occurred_at,
1904
+ triggered: row.triggered,
1905
+ matched_prompt_id: row.matched_prompt_id,
1906
+ confidence: row.confidence,
1907
+ invocation_mode: row.invocation_mode,
1908
+ queryText,
1909
+ isPolluting: false,
1910
+ observation_kind,
1911
+ groupKey,
1912
+ };
1913
+ if (arr) arr.push(enriched);
1914
+ else bySkill.set(row.skill_name, [enriched]);
1915
+ }
1916
+
1917
+ for (const [, skillRows] of bySkill.entries()) {
1918
+ const grouped = new Map<string, typeof skillRows>();
1919
+ for (const row of skillRows) {
1920
+ const arr = grouped.get(row.groupKey);
1921
+ if (arr) arr.push(row);
1922
+ else grouped.set(row.groupKey, [row]);
1923
+ }
1924
+
1925
+ const deduped = [...grouped.values()].map((group) => {
1926
+ const sorted = [...group].sort((a, b) => {
1927
+ const aScore =
1928
+ (a.triggered === 1 ? 100 : 0) +
1929
+ (a.observation_kind === "canonical" ? 20 : 0) +
1930
+ (a.observation_kind === "repaired_trigger" ? 15 : 0);
1931
+ const bScore =
1932
+ (b.triggered === 1 ? 100 : 0) +
1933
+ (b.observation_kind === "canonical" ? 20 : 0) +
1934
+ (b.observation_kind === "repaired_trigger" ? 15 : 0);
1935
+ if (aScore !== bScore) return bScore - aScore;
1936
+ return (b.occurred_at ?? "").localeCompare(a.occurred_at ?? "");
1937
+ });
1938
+ return sorted[0]!;
1939
+ });
1940
+
1941
+ trustedRows.push(
1942
+ ...deduped.map((row) => ({
1943
+ skill_name: row.skill_name,
1944
+ session_id: row.session_id,
1945
+ occurred_at: row.occurred_at,
1946
+ triggered: row.triggered,
1947
+ matched_prompt_id: row.matched_prompt_id,
1948
+ confidence: row.confidence,
1949
+ invocation_mode: row.invocation_mode,
1950
+ query_text: row.queryText,
1951
+ })),
1952
+ );
1953
+ }
1954
+
1955
+ return trustedRows;
1956
+ }
1957
+
1958
+ export function getSkillTrustSummaries(db: Database): SkillTrustSummary[] {
1959
+ const rows = queryTrustedSkillObservationRows(db);
1960
+
1961
+ // Build latest_action map from evolution_audit
1962
+ const auditRows = db
1963
+ .query(
1964
+ `SELECT skill_name, action, timestamp
1965
+ FROM evolution_audit
1966
+ WHERE skill_name IS NOT NULL
1967
+ ORDER BY timestamp DESC`,
1968
+ )
1969
+ .all() as Array<{
1970
+ skill_name: string | null;
1971
+ action: string;
1972
+ timestamp: string;
1973
+ }>;
1974
+
1975
+ const latestActions = new Map<string, string>();
1976
+ for (const row of auditRows) {
1977
+ if (row.skill_name && !latestActions.has(row.skill_name)) {
1978
+ latestActions.set(row.skill_name, row.action);
1979
+ }
1980
+ }
1981
+
1982
+ const rowsBySkill = new Map<string, typeof rows>();
1983
+ for (const row of rows) {
1984
+ const arr = rowsBySkill.get(row.skill_name);
1985
+ if (arr) arr.push(row);
1986
+ else rowsBySkill.set(row.skill_name, [row]);
1987
+ }
1988
+
1989
+ const summaries: SkillTrustSummary[] = [];
1990
+ for (const [skillName, skillRows] of rowsBySkill.entries()) {
1991
+ const total = skillRows.length;
1992
+ const triggered = skillRows.filter((row) => row.triggered === 1).length;
1993
+ const promptLinked = skillRows.filter((row) => row.matched_prompt_id != null).length;
1994
+ const lastSeen =
1995
+ skillRows
1996
+ .map((row) => row.occurred_at)
1997
+ .filter((value): value is string => value != null)
1998
+ .sort((a, b) => b.localeCompare(a))[0] ?? null;
1999
+
2000
+ summaries.push({
2001
+ skill_name: skillName,
2002
+ total_checks: total,
2003
+ triggered_count: triggered,
2004
+ miss_rate: total > 0 ? (total - triggered) / total : 0,
2005
+ system_like_count: 0,
2006
+ system_like_rate: 0,
2007
+ prompt_link_rate: total > 0 ? promptLinked / total : 0,
2008
+ latest_action: latestActions.get(skillName) ?? null,
2009
+ pass_rate: total > 0 ? triggered / total : 0,
2010
+ last_seen: lastSeen,
2011
+ });
2012
+ }
2013
+
2014
+ return summaries;
2015
+ }
2016
+
2017
+ export function getAttentionQueue(db: Database): AttentionItem[] {
2018
+ const summaries = getSkillTrustSummaries(db);
2019
+ const pending = getPendingProposals(db);
2020
+ const pendingSkills = new Set(pending.map((p) => p.skill_name).filter(Boolean));
2021
+
2022
+ const items: AttentionItem[] = [];
2023
+
2024
+ for (const s of summaries) {
2025
+ if (s.latest_action === "rolled_back") {
2026
+ items.push({
2027
+ skill_name: s.skill_name,
2028
+ category: "needs_review",
2029
+ severity: "critical",
2030
+ reason: "Rolled back after deployment",
2031
+ recommended_action: "Review rollback evidence and decide whether to re-evolve",
2032
+ timestamp: s.last_seen ?? "",
2033
+ });
2034
+ continue;
2035
+ }
2036
+
2037
+ if (pendingSkills.has(s.skill_name)) {
2038
+ items.push({
2039
+ skill_name: s.skill_name,
2040
+ category: "needs_review",
2041
+ severity: "info",
2042
+ reason: "Proposal awaiting review",
2043
+ recommended_action: "Review and approve or reject the pending proposal",
2044
+ timestamp: s.last_seen ?? "",
2045
+ });
2046
+ continue;
2047
+ }
2048
+
2049
+ if (s.total_checks < 5) continue;
2050
+
2051
+ if (s.miss_rate > 0.1) {
2052
+ items.push({
2053
+ skill_name: s.skill_name,
2054
+ category: "regression",
2055
+ severity: "warning",
2056
+ reason: `High miss rate (${Math.round(s.miss_rate * 100)}%)`,
2057
+ recommended_action: "Review missed invocations and consider evolving the skill description",
2058
+ timestamp: s.last_seen ?? "",
2059
+ });
2060
+ continue;
2061
+ }
2062
+
2063
+ if (s.system_like_rate > 0.1) {
2064
+ items.push({
2065
+ skill_name: s.skill_name,
2066
+ category: "polluted",
2067
+ severity: "warning",
2068
+ reason: `Possible telemetry pollution (${Math.round(s.system_like_rate * 100)}% system-like)`,
2069
+ recommended_action: "Inspect prompts for system-injected noise",
2070
+ timestamp: s.last_seen ?? "",
2071
+ });
2072
+ continue;
2073
+ }
2074
+ }
2075
+
2076
+ return items;
2077
+ }
2078
+
2079
+ export function getRecentDecisions(db: Database, limit = 20): AutonomousDecision[] {
2080
+ const rows = db
2081
+ .query(
2082
+ `SELECT timestamp, proposal_id, skill_name, action, details, eval_snapshot_json
2083
+ FROM evolution_audit
2084
+ WHERE timestamp >= datetime('now', '-7 days')
2085
+ ORDER BY timestamp DESC
2086
+ LIMIT ?`,
2087
+ )
2088
+ .all(limit) as Array<{
2089
+ timestamp: string;
2090
+ proposal_id: string;
2091
+ skill_name: string | null;
2092
+ action: string;
2093
+ details: string;
2094
+ eval_snapshot_json: string | null;
2095
+ }>;
2096
+
2097
+ return rows
2098
+ .filter((row) => row.skill_name != null)
2099
+ .flatMap((row) => {
2100
+ const evalSnapshot = safeParseJson(row.eval_snapshot_json) as {
2101
+ regressions?: unknown[];
2102
+ } | null;
2103
+
2104
+ let kind: DecisionKind | null;
2105
+ switch (row.action) {
2106
+ case "proposed":
2107
+ case "created":
2108
+ kind = "proposal_created";
2109
+ break;
2110
+ case "rejected":
2111
+ kind = "proposal_rejected";
2112
+ break;
2113
+ case "validated":
2114
+ kind =
2115
+ evalSnapshot?.regressions && evalSnapshot.regressions.length > 0
2116
+ ? "validation_failed"
2117
+ : "proposal_created"; // validated without regressions is still a creation step
2118
+ break;
2119
+ case "deployed":
2120
+ kind = "proposal_deployed";
2121
+ break;
2122
+ case "rolled_back":
2123
+ kind = "rollback_triggered";
2124
+ break;
2125
+ default:
2126
+ kind = null;
2127
+ }
2128
+
2129
+ if (!kind) return [];
2130
+
2131
+ return [
2132
+ {
2133
+ timestamp: row.timestamp,
2134
+ kind,
2135
+ skill_name: row.skill_name!,
2136
+ proposal_id: row.proposal_id,
2137
+ summary: row.details ?? "",
2138
+ },
2139
+ ];
2140
+ });
2141
+ }
2142
+
2143
+ // -- Helpers ------------------------------------------------------------------
2144
+
1464
2145
  export function safeParseJsonArray<T = string>(json: string | null): T[] {
1465
2146
  if (!json) return [];
1466
2147
  try {