selftune 0.2.18 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +9 -4
  2. package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
  3. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
  6. package/apps/local-dashboard/dist/index.html +5 -5
  7. package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
  8. package/cli/selftune/constants.ts +10 -0
  9. package/cli/selftune/contribute/contribute.ts +30 -2
  10. package/cli/selftune/contribution-config.ts +249 -0
  11. package/cli/selftune/contribution-relay.ts +177 -0
  12. package/cli/selftune/contribution-signals.ts +219 -0
  13. package/cli/selftune/contribution-staging.ts +147 -0
  14. package/cli/selftune/contributions.ts +532 -0
  15. package/cli/selftune/creator-contributions.ts +333 -0
  16. package/cli/selftune/dashboard-contract.ts +205 -1
  17. package/cli/selftune/dashboard-server.ts +45 -11
  18. package/cli/selftune/eval/family-overlap.ts +395 -0
  19. package/cli/selftune/eval/hooks-to-evals.ts +182 -28
  20. package/cli/selftune/eval/synthetic-evals.ts +298 -11
  21. package/cli/selftune/export.ts +2 -2
  22. package/cli/selftune/index.ts +41 -5
  23. package/cli/selftune/ingestors/codex-rollout.ts +31 -35
  24. package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
  25. package/cli/selftune/localdb/db.ts +2 -2
  26. package/cli/selftune/localdb/queries.ts +701 -30
  27. package/cli/selftune/localdb/schema.ts +20 -0
  28. package/cli/selftune/recover.ts +153 -0
  29. package/cli/selftune/repair/skill-usage.ts +363 -4
  30. package/cli/selftune/routes/actions.ts +35 -1
  31. package/cli/selftune/routes/analytics.ts +14 -0
  32. package/cli/selftune/routes/index.ts +1 -0
  33. package/cli/selftune/routes/overview.ts +112 -4
  34. package/cli/selftune/routes/skill-report.ts +569 -10
  35. package/cli/selftune/status.ts +81 -2
  36. package/cli/selftune/sync.ts +56 -2
  37. package/cli/selftune/trust-model.ts +66 -0
  38. package/cli/selftune/types.ts +49 -0
  39. package/cli/selftune/utils/skill-detection.ts +43 -0
  40. package/cli/selftune/watchlist.ts +65 -0
  41. package/package.json +1 -1
  42. package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
  43. package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
  44. package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
  45. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
  46. package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
  47. package/packages/ui/src/components/section-cards.tsx +12 -9
  48. package/packages/ui/src/primitives/card.tsx +1 -1
  49. package/skill/SKILL.md +11 -1
  50. package/skill/Workflows/AlphaUpload.md +4 -0
  51. package/skill/Workflows/Composability.md +64 -0
  52. package/skill/Workflows/Contribute.md +6 -3
  53. package/skill/Workflows/Contributions.md +97 -0
  54. package/skill/Workflows/CreatorContributions.md +74 -0
  55. package/skill/Workflows/Dashboard.md +31 -0
  56. package/skill/Workflows/Evals.md +57 -8
  57. package/skill/Workflows/Ingest.md +7 -0
  58. package/skill/Workflows/Initialize.md +20 -1
  59. package/skill/Workflows/Recover.md +84 -0
  60. package/skill/Workflows/RepairSkillUsage.md +12 -4
  61. package/skill/Workflows/Sync.md +18 -12
  62. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
  63. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
  64. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
  65. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
@@ -8,8 +8,12 @@
8
8
  import type { Database } from "bun:sqlite";
9
9
 
10
10
  import type {
11
+ AnalyticsResponse,
12
+ AttentionItem,
13
+ AutonomousDecision,
11
14
  CommitRecord,
12
15
  CommitSummary,
16
+ DecisionKind,
13
17
  ExecutionMetrics,
14
18
  OrchestrateRunReport,
15
19
  OverviewPaginatedPayload,
@@ -636,7 +640,43 @@ function paginateSkillReportInvocations(
636
640
  * Get a summary list of all skills with aggregated stats.
637
641
  */
638
642
  export function getSkillsList(db: Database): SkillSummary[] {
639
- const rows = db
643
+ const trustedRows = queryTrustedSkillObservationRows(db);
644
+ const bySkill = new Map<
645
+ string,
646
+ Array<{
647
+ skill_name: string;
648
+ session_id: string;
649
+ occurred_at: string | null;
650
+ triggered: number;
651
+ matched_prompt_id: string | null;
652
+ confidence: number | null;
653
+ }>
654
+ >();
655
+
656
+ for (const row of trustedRows) {
657
+ const arr = bySkill.get(row.skill_name);
658
+ const base = {
659
+ skill_name: row.skill_name,
660
+ session_id: row.session_id,
661
+ occurred_at: row.occurred_at,
662
+ triggered: row.triggered,
663
+ matched_prompt_id: row.matched_prompt_id,
664
+ confidence: row.confidence,
665
+ };
666
+ if (arr) arr.push(base);
667
+ else bySkill.set(row.skill_name, [base]);
668
+ }
669
+
670
+ // Get set of skill names with evidence
671
+ const evidenceSkills = new Set(
672
+ (
673
+ db.query(`SELECT DISTINCT skill_name FROM evolution_evidence`).all() as Array<{
674
+ skill_name: string;
675
+ }>
676
+ ).map((r) => r.skill_name),
677
+ );
678
+
679
+ const skillScopeRows = db
640
680
  .query(
641
681
  `SELECT
642
682
  si.skill_name,
@@ -647,43 +687,203 @@ export function getSkillsList(db: Database): SkillSummary[] {
647
687
  (SELECT su.skill_scope FROM skill_usage su
648
688
  WHERE su.skill_name = si.skill_name AND su.skill_scope IS NOT NULL
649
689
  ORDER BY su.timestamp DESC LIMIT 1)
650
- ) as skill_scope,
651
- COUNT(*) as total_checks,
652
- SUM(CASE WHEN si.triggered = 1 THEN 1 ELSE 0 END) as triggered_count,
653
- COUNT(DISTINCT si.session_id) as unique_sessions,
654
- MAX(si.occurred_at) as last_seen
690
+ ) as skill_scope
655
691
  FROM skill_invocations si
656
- GROUP BY si.skill_name
657
- ORDER BY total_checks DESC`,
692
+ GROUP BY si.skill_name`,
658
693
  )
659
- .all() as Array<{
660
- skill_name: string;
661
- skill_scope: string | null;
662
- total_checks: number;
663
- triggered_count: number;
664
- unique_sessions: number;
665
- last_seen: string | null;
666
- }>;
694
+ .all() as Array<{ skill_name: string; skill_scope: string | null }>;
695
+ const scopeBySkill = new Map(skillScopeRows.map((row) => [row.skill_name, row.skill_scope]));
696
+
697
+ return [...bySkill.entries()]
698
+ .map(([skillName, rows]) => {
699
+ const totalChecks = rows.length;
700
+ const triggeredCount = rows.filter((row) => row.triggered === 1).length;
701
+ const uniqueSessions = new Set(rows.map((row) => row.session_id)).size;
702
+ const lastSeen =
703
+ rows
704
+ .map((row) => row.occurred_at)
705
+ .filter((value): value is string => value != null)
706
+ .sort((a, b) => b.localeCompare(a))[0] ?? null;
707
+ const withConfidence = rows.filter((row) => row.confidence != null);
708
+ const routingConfidence =
709
+ withConfidence.length > 0
710
+ ? withConfidence.reduce((sum, row) => sum + (row.confidence ?? 0), 0) /
711
+ withConfidence.length
712
+ : null;
713
+
714
+ return {
715
+ skill_name: skillName,
716
+ skill_scope: scopeBySkill.get(skillName) ?? null,
717
+ total_checks: totalChecks,
718
+ triggered_count: triggeredCount,
719
+ pass_rate: totalChecks > 0 ? triggeredCount / totalChecks : 0,
720
+ unique_sessions: uniqueSessions,
721
+ last_seen: lastSeen,
722
+ has_evidence: evidenceSkills.has(skillName),
723
+ routing_confidence: routingConfidence,
724
+ confidence_coverage: totalChecks > 0 ? withConfidence.length / totalChecks : 0,
725
+ };
726
+ })
727
+ .sort((a, b) => b.total_checks - a.total_checks);
728
+ }
667
729
 
668
- // Get set of skill names with evidence
669
- const evidenceSkills = new Set(
670
- (
671
- db.query(`SELECT DISTINCT skill_name FROM evolution_evidence`).all() as Array<{
672
- skill_name: string;
673
- }>
674
- ).map((r) => r.skill_name),
675
- );
730
+ /**
731
+ * Build the performance analytics payload from SQLite.
732
+ * Powers the GET /api/v2/analytics endpoint.
733
+ */
734
+ export function getAnalyticsPayload(db: Database): AnalyticsResponse {
735
+ const trustedRows = queryTrustedSkillObservationRows(db);
736
+ const today = new Date();
737
+ const dateKey = (value: string | null): string | null => {
738
+ if (!value) return null;
739
+ const parsed = new Date(value);
740
+ return Number.isNaN(parsed.getTime()) ? null : parsed.toISOString().slice(0, 10);
741
+ };
742
+ const cutoffDate = (days: number): string => {
743
+ const cutoff = new Date(today);
744
+ cutoff.setUTCDate(cutoff.getUTCDate() - days);
745
+ return cutoff.toISOString().slice(0, 10);
746
+ };
676
747
 
677
- return rows.map((row) => ({
748
+ // 1. Pass rate trend — last 90 days, bucketed by day
749
+ const passRateTrendByDate = new Map<string, { triggered: number; total: number }>();
750
+ for (const row of trustedRows) {
751
+ const occurredDate = dateKey(row.occurred_at);
752
+ if (!occurredDate || occurredDate < cutoffDate(90)) continue;
753
+ const counts = passRateTrendByDate.get(occurredDate) ?? { triggered: 0, total: 0 };
754
+ counts.total += 1;
755
+ if (row.triggered === 1) counts.triggered += 1;
756
+ passRateTrendByDate.set(occurredDate, counts);
757
+ }
758
+ const passRateTrendRows = [...passRateTrendByDate.entries()]
759
+ .map(([date, counts]) => ({
760
+ date,
761
+ pass_rate: counts.total > 0 ? counts.triggered / counts.total : 0,
762
+ total_checks: counts.total,
763
+ }))
764
+ .sort((a, b) => a.date.localeCompare(b.date));
765
+
766
+ const pass_rate_trend = passRateTrendRows.map((row) => ({
767
+ date: row.date,
768
+ pass_rate: row.pass_rate,
769
+ total_checks: row.total_checks,
770
+ }));
771
+
772
+ // 2. Skill rankings — all skills with at least 1 check, ordered by pass rate
773
+ const skillRankingMap = new Map<string, { triggered_count: number; total_checks: number }>();
774
+ for (const row of trustedRows) {
775
+ const counts = skillRankingMap.get(row.skill_name) ?? { triggered_count: 0, total_checks: 0 };
776
+ counts.total_checks += 1;
777
+ if (row.triggered === 1) counts.triggered_count += 1;
778
+ skillRankingMap.set(row.skill_name, counts);
779
+ }
780
+ const skillRankingRows = [...skillRankingMap.entries()]
781
+ .map(([skill_name, counts]) => ({
782
+ skill_name,
783
+ pass_rate: counts.total_checks > 0 ? counts.triggered_count / counts.total_checks : 0,
784
+ total_checks: counts.total_checks,
785
+ triggered_count: counts.triggered_count,
786
+ }))
787
+ .sort(
788
+ (a, b) =>
789
+ b.pass_rate - a.pass_rate ||
790
+ b.total_checks - a.total_checks ||
791
+ a.skill_name.localeCompare(b.skill_name),
792
+ );
793
+
794
+ const skill_rankings = skillRankingRows.map((row) => ({
678
795
  skill_name: row.skill_name,
679
- skill_scope: row.skill_scope,
796
+ pass_rate: row.pass_rate,
680
797
  total_checks: row.total_checks,
681
798
  triggered_count: row.triggered_count,
682
- pass_rate: row.total_checks > 0 ? row.triggered_count / row.total_checks : 0,
683
- unique_sessions: row.unique_sessions,
684
- last_seen: row.last_seen,
685
- has_evidence: evidenceSkills.has(row.skill_name),
686
799
  }));
800
+
801
+ // 3. Daily activity — last 84 days (12 weeks) for heatmap
802
+ const dailyActivityByDate = new Map<string, number>();
803
+ for (const row of trustedRows) {
804
+ const occurredDate = dateKey(row.occurred_at);
805
+ if (!occurredDate || occurredDate < cutoffDate(84)) continue;
806
+ dailyActivityByDate.set(occurredDate, (dailyActivityByDate.get(occurredDate) ?? 0) + 1);
807
+ }
808
+ const dailyActivityRows = [...dailyActivityByDate.entries()]
809
+ .map(([date, checks]) => ({ date, checks }))
810
+ .sort((a, b) => a.date.localeCompare(b.date));
811
+
812
+ const daily_activity = dailyActivityRows.map((row) => ({
813
+ date: row.date,
814
+ checks: row.checks,
815
+ }));
816
+
817
+ // 4. Evolution impact — before/after pass rates for deployed evolutions
818
+ const deployedRows = db
819
+ .query(
820
+ `SELECT ea.skill_name, ea.proposal_id, ea.timestamp as deployed_at
821
+ FROM evolution_audit ea
822
+ WHERE ea.action = 'deployed' AND ea.skill_name IS NOT NULL
823
+ ORDER BY ea.timestamp DESC`,
824
+ )
825
+ .all() as Array<{ skill_name: string; proposal_id: string; deployed_at: string }>;
826
+
827
+ const evolution_impact: AnalyticsResponse["evolution_impact"] = [];
828
+ for (const deploy of deployedRows) {
829
+ const beforeRows = trustedRows.filter(
830
+ (row) => row.skill_name === deploy.skill_name && (row.occurred_at ?? "") < deploy.deployed_at,
831
+ );
832
+ const afterRows = trustedRows.filter(
833
+ (row) =>
834
+ row.skill_name === deploy.skill_name && (row.occurred_at ?? "") >= deploy.deployed_at,
835
+ );
836
+
837
+ evolution_impact.push({
838
+ skill_name: deploy.skill_name,
839
+ proposal_id: deploy.proposal_id,
840
+ deployed_at: deploy.deployed_at,
841
+ pass_rate_before:
842
+ beforeRows.length > 0
843
+ ? beforeRows.filter((row) => row.triggered === 1).length / beforeRows.length
844
+ : 0,
845
+ pass_rate_after:
846
+ afterRows.length > 0
847
+ ? afterRows.filter((row) => row.triggered === 1).length / afterRows.length
848
+ : 0,
849
+ });
850
+ }
851
+
852
+ // 5. Summary aggregates
853
+ const totalEvolutionsRow = db
854
+ .query(`SELECT COUNT(*) as c FROM evolution_audit WHERE action = 'deployed'`)
855
+ .get() as { c: number } | null;
856
+
857
+ const checks30dRows = trustedRows.filter((row) => {
858
+ const occurredDate = dateKey(row.occurred_at);
859
+ return occurredDate != null && occurredDate >= cutoffDate(30);
860
+ });
861
+ const activeSkills30d = new Set(checks30dRows.map((row) => row.skill_name));
862
+
863
+ // Average improvement across all deployed evolutions
864
+ let avgImprovement = 0;
865
+ if (evolution_impact.length > 0) {
866
+ const totalImprovement = evolution_impact.reduce(
867
+ (sum, e) => sum + (e.pass_rate_after - e.pass_rate_before),
868
+ 0,
869
+ );
870
+ avgImprovement = totalImprovement / evolution_impact.length;
871
+ }
872
+
873
+ const summary: AnalyticsResponse["summary"] = {
874
+ total_evolutions: totalEvolutionsRow?.c ?? 0,
875
+ avg_improvement: avgImprovement,
876
+ total_checks_30d: checks30dRows.length,
877
+ active_skills: activeSkills30d.size,
878
+ };
879
+
880
+ return {
881
+ pass_rate_trend,
882
+ skill_rankings,
883
+ daily_activity,
884
+ evolution_impact,
885
+ summary,
886
+ };
687
887
  }
688
888
 
689
889
  /**
@@ -1087,6 +1287,72 @@ export function queryGradingResults(db: Database): Array<{
1087
1287
  }>;
1088
1288
  }
1089
1289
 
1290
+ export function getCreatorContributionStagingCounts(db: Database): Array<{
1291
+ skill_name: string;
1292
+ pending_count: number;
1293
+ }> {
1294
+ return db
1295
+ .query(
1296
+ `SELECT skill_name, COUNT(*) AS pending_count
1297
+ FROM creator_contribution_staging
1298
+ WHERE status = 'pending'
1299
+ GROUP BY skill_name
1300
+ ORDER BY skill_name`,
1301
+ )
1302
+ .all() as Array<{
1303
+ skill_name: string;
1304
+ pending_count: number;
1305
+ }>;
1306
+ }
1307
+
1308
+ export interface CreatorContributionRelayStats {
1309
+ pending: number;
1310
+ sending: number;
1311
+ sent: number;
1312
+ failed: number;
1313
+ }
1314
+
1315
+ export interface CreatorContributionStagingRow {
1316
+ id: number;
1317
+ dedupe_key: string;
1318
+ skill_name: string;
1319
+ creator_id: string;
1320
+ payload_json: string;
1321
+ status: string;
1322
+ staged_at: string;
1323
+ updated_at: string;
1324
+ last_error: string | null;
1325
+ }
1326
+
1327
+ export function getCreatorContributionRelayStats(db: Database): CreatorContributionRelayStats {
1328
+ const row = db
1329
+ .query(
1330
+ `SELECT
1331
+ COALESCE(SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END), 0) AS pending,
1332
+ COALESCE(SUM(CASE WHEN status = 'sending' THEN 1 ELSE 0 END), 0) AS sending,
1333
+ COALESCE(SUM(CASE WHEN status = 'sent' THEN 1 ELSE 0 END), 0) AS sent,
1334
+ COALESCE(SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END), 0) AS failed
1335
+ FROM creator_contribution_staging`,
1336
+ )
1337
+ .get() as CreatorContributionRelayStats | null;
1338
+ return row ?? { pending: 0, sending: 0, sent: 0, failed: 0 };
1339
+ }
1340
+
1341
+ export function getPendingCreatorContributionRows(
1342
+ db: Database,
1343
+ limit = 50,
1344
+ ): CreatorContributionStagingRow[] {
1345
+ return db
1346
+ .query(
1347
+ `SELECT id, dedupe_key, skill_name, creator_id, payload_json, status, staged_at, updated_at, last_error
1348
+ FROM creator_contribution_staging
1349
+ WHERE status = 'pending'
1350
+ ORDER BY id ASC
1351
+ LIMIT ?`,
1352
+ )
1353
+ .all(limit) as CreatorContributionStagingRow[];
1354
+ }
1355
+
1090
1356
  // -- Canonical record staging query -------------------------------------------
1091
1357
 
1092
1358
  /**
@@ -1461,6 +1727,411 @@ export function getSkillCommitSummary(db: Database, skillName: string): CommitSu
1461
1727
 
1462
1728
  // -- Helpers ------------------------------------------------------------------
1463
1729
 
1730
+ // -- Autonomy-first dashboard queries -----------------------------------------
1731
+
1732
+ export interface SkillTrustSummary {
1733
+ skill_name: string;
1734
+ total_checks: number;
1735
+ triggered_count: number;
1736
+ miss_rate: number;
1737
+ system_like_count: number;
1738
+ system_like_rate: number;
1739
+ prompt_link_rate: number;
1740
+ latest_action: string | null;
1741
+ pass_rate: number;
1742
+ last_seen: string | null;
1743
+ }
1744
+
1745
+ export interface TrustedSkillObservationRow {
1746
+ skill_name: string;
1747
+ session_id: string;
1748
+ occurred_at: string | null;
1749
+ triggered: number;
1750
+ matched_prompt_id: string | null;
1751
+ confidence: number | null;
1752
+ invocation_mode: string | null;
1753
+ query_text: string;
1754
+ }
1755
+
1756
+ export function queryTrustedSkillObservationRows(db: Database): TrustedSkillObservationRow[] {
1757
+ const SYSTEM_LIKE_PREFIXES = ["<system_instruction>", "<system-instruction>", "<command-name>"];
1758
+ const INTERNAL_EVAL_MARKERS = [
1759
+ "you are an evaluation assistant",
1760
+ "you are a skill description optimizer",
1761
+ "would each query trigger this skill",
1762
+ "propose an improved description",
1763
+ "failure patterns:",
1764
+ "output only valid json",
1765
+ ];
1766
+ const isSystemLike = (text: string | null | undefined): boolean => {
1767
+ if (!text) return false;
1768
+ const trimmed = text.trimStart();
1769
+ return SYSTEM_LIKE_PREFIXES.some((p) => trimmed.startsWith(p));
1770
+ };
1771
+ const isInternalSelftunePrompt = (
1772
+ text: string | null | undefined,
1773
+ promptKind: string | null | undefined,
1774
+ ): boolean => {
1775
+ if (!text) return false;
1776
+ const lowered = text.toLowerCase();
1777
+ return (
1778
+ promptKind === "meta" && INTERNAL_EVAL_MARKERS.some((marker) => lowered.includes(marker))
1779
+ );
1780
+ };
1781
+ const isPollutingPrompt = (
1782
+ text: string | null | undefined,
1783
+ promptKind: string | null | undefined,
1784
+ ): boolean => isSystemLike(text) || isInternalSelftunePrompt(text, promptKind);
1785
+ const classifyObservationKind = (
1786
+ skillInvocationId: string,
1787
+ captureMode: string | null,
1788
+ triggered: number,
1789
+ rawSourceRefJson: string | null,
1790
+ ): "canonical" | "repaired_trigger" | "repaired_contextual_miss" | "legacy_materialized" => {
1791
+ if (skillInvocationId.includes(":su:")) return "legacy_materialized";
1792
+ if (captureMode === "repair") {
1793
+ const rawSourceRef = safeParseJson(rawSourceRefJson) as {
1794
+ metadata?: { miss_type?: string };
1795
+ } | null;
1796
+ if (triggered === 0 && rawSourceRef?.metadata?.miss_type === "contextual_read") {
1797
+ return "repaired_contextual_miss";
1798
+ }
1799
+ return "repaired_trigger";
1800
+ }
1801
+ return "canonical";
1802
+ };
1803
+ const normalizeQueryForGrouping = (query: string) =>
1804
+ query.replace(/\s+/g, " ").trim().toLowerCase();
1805
+
1806
+ const rows = db
1807
+ .query(
1808
+ `SELECT
1809
+ si.skill_name,
1810
+ si.session_id,
1811
+ si.occurred_at,
1812
+ si.triggered,
1813
+ si.matched_prompt_id,
1814
+ si.confidence,
1815
+ si.invocation_mode,
1816
+ si.skill_invocation_id,
1817
+ si.capture_mode,
1818
+ si.raw_source_ref,
1819
+ si.query,
1820
+ p.prompt_text,
1821
+ p.prompt_kind
1822
+ FROM skill_invocations si
1823
+ LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id`,
1824
+ )
1825
+ .all() as Array<{
1826
+ skill_name: string;
1827
+ session_id: string;
1828
+ occurred_at: string | null;
1829
+ triggered: number;
1830
+ matched_prompt_id: string | null;
1831
+ confidence: number | null;
1832
+ invocation_mode: string | null;
1833
+ skill_invocation_id: string;
1834
+ capture_mode: string | null;
1835
+ raw_source_ref: string | null;
1836
+ query: string | null;
1837
+ prompt_text: string | null;
1838
+ prompt_kind: string | null;
1839
+ }>;
1840
+
1841
+ const bySkill = new Map<
1842
+ string,
1843
+ Array<{
1844
+ skill_name: string;
1845
+ session_id: string;
1846
+ occurred_at: string | null;
1847
+ triggered: number;
1848
+ matched_prompt_id: string | null;
1849
+ confidence: number | null;
1850
+ invocation_mode: string | null;
1851
+ queryText: string;
1852
+ isPolluting: boolean;
1853
+ observation_kind:
1854
+ | "canonical"
1855
+ | "repaired_trigger"
1856
+ | "repaired_contextual_miss"
1857
+ | "legacy_materialized";
1858
+ groupKey: string;
1859
+ }>
1860
+ >();
1861
+ const trustedRows: Array<{
1862
+ skill_name: string;
1863
+ session_id: string;
1864
+ occurred_at: string | null;
1865
+ triggered: number;
1866
+ matched_prompt_id: string | null;
1867
+ confidence: number | null;
1868
+ invocation_mode: string | null;
1869
+ query_text: string;
1870
+ }> = [];
1871
+
1872
+ for (const row of rows) {
1873
+ const queryText = row.query || row.prompt_text || "";
1874
+ const pollutionText = row.prompt_text || row.query || "";
1875
+ const observation_kind = classifyObservationKind(
1876
+ row.skill_invocation_id,
1877
+ row.capture_mode,
1878
+ row.triggered,
1879
+ row.raw_source_ref,
1880
+ );
1881
+ if (isPollutingPrompt(pollutionText, row.prompt_kind)) continue;
1882
+ if (observation_kind === "legacy_materialized") continue;
1883
+
1884
+ const normalizedQuery = normalizeQueryForGrouping(queryText);
1885
+ const groupKey =
1886
+ normalizedQuery.length > 0
1887
+ ? `${row.session_id}::${normalizedQuery}`
1888
+ : `${row.skill_invocation_id}`;
1889
+ const arr = bySkill.get(row.skill_name);
1890
+ const enriched = {
1891
+ skill_name: row.skill_name,
1892
+ session_id: row.session_id,
1893
+ occurred_at: row.occurred_at,
1894
+ triggered: row.triggered,
1895
+ matched_prompt_id: row.matched_prompt_id,
1896
+ confidence: row.confidence,
1897
+ invocation_mode: row.invocation_mode,
1898
+ queryText,
1899
+ isPolluting: false,
1900
+ observation_kind,
1901
+ groupKey,
1902
+ };
1903
+ if (arr) arr.push(enriched);
1904
+ else bySkill.set(row.skill_name, [enriched]);
1905
+ }
1906
+
1907
+ for (const [, skillRows] of bySkill.entries()) {
1908
+ const grouped = new Map<string, typeof skillRows>();
1909
+ for (const row of skillRows) {
1910
+ const arr = grouped.get(row.groupKey);
1911
+ if (arr) arr.push(row);
1912
+ else grouped.set(row.groupKey, [row]);
1913
+ }
1914
+
1915
+ const deduped = [...grouped.values()].map((group) => {
1916
+ const sorted = [...group].sort((a, b) => {
1917
+ const aScore =
1918
+ (a.triggered === 1 ? 100 : 0) +
1919
+ (a.observation_kind === "canonical" ? 20 : 0) +
1920
+ (a.observation_kind === "repaired_trigger" ? 15 : 0);
1921
+ const bScore =
1922
+ (b.triggered === 1 ? 100 : 0) +
1923
+ (b.observation_kind === "canonical" ? 20 : 0) +
1924
+ (b.observation_kind === "repaired_trigger" ? 15 : 0);
1925
+ if (aScore !== bScore) return bScore - aScore;
1926
+ return (b.occurred_at ?? "").localeCompare(a.occurred_at ?? "");
1927
+ });
1928
+ return sorted[0]!;
1929
+ });
1930
+
1931
+ trustedRows.push(
1932
+ ...deduped.map((row) => ({
1933
+ skill_name: row.skill_name,
1934
+ session_id: row.session_id,
1935
+ occurred_at: row.occurred_at,
1936
+ triggered: row.triggered,
1937
+ matched_prompt_id: row.matched_prompt_id,
1938
+ confidence: row.confidence,
1939
+ invocation_mode: row.invocation_mode,
1940
+ query_text: row.queryText,
1941
+ })),
1942
+ );
1943
+ }
1944
+
1945
+ return trustedRows;
1946
+ }
1947
+
1948
+ export function getSkillTrustSummaries(db: Database): SkillTrustSummary[] {
1949
+ const rows = queryTrustedSkillObservationRows(db);
1950
+
1951
+ // Build latest_action map from evolution_audit
1952
+ const auditRows = db
1953
+ .query(
1954
+ `SELECT skill_name, action, timestamp
1955
+ FROM evolution_audit
1956
+ WHERE skill_name IS NOT NULL
1957
+ ORDER BY timestamp DESC`,
1958
+ )
1959
+ .all() as Array<{
1960
+ skill_name: string | null;
1961
+ action: string;
1962
+ timestamp: string;
1963
+ }>;
1964
+
1965
+ const latestActions = new Map<string, string>();
1966
+ for (const row of auditRows) {
1967
+ if (row.skill_name && !latestActions.has(row.skill_name)) {
1968
+ latestActions.set(row.skill_name, row.action);
1969
+ }
1970
+ }
1971
+
1972
+ const rowsBySkill = new Map<string, typeof rows>();
1973
+ for (const row of rows) {
1974
+ const arr = rowsBySkill.get(row.skill_name);
1975
+ if (arr) arr.push(row);
1976
+ else rowsBySkill.set(row.skill_name, [row]);
1977
+ }
1978
+
1979
+ const summaries: SkillTrustSummary[] = [];
1980
+ for (const [skillName, skillRows] of rowsBySkill.entries()) {
1981
+ const total = skillRows.length;
1982
+ const triggered = skillRows.filter((row) => row.triggered === 1).length;
1983
+ const promptLinked = skillRows.filter((row) => row.matched_prompt_id != null).length;
1984
+ const lastSeen =
1985
+ skillRows
1986
+ .map((row) => row.occurred_at)
1987
+ .filter((value): value is string => value != null)
1988
+ .sort((a, b) => b.localeCompare(a))[0] ?? null;
1989
+
1990
+ summaries.push({
1991
+ skill_name: skillName,
1992
+ total_checks: total,
1993
+ triggered_count: triggered,
1994
+ miss_rate: total > 0 ? (total - triggered) / total : 0,
1995
+ system_like_count: 0,
1996
+ system_like_rate: 0,
1997
+ prompt_link_rate: total > 0 ? promptLinked / total : 0,
1998
+ latest_action: latestActions.get(skillName) ?? null,
1999
+ pass_rate: total > 0 ? triggered / total : 0,
2000
+ last_seen: lastSeen,
2001
+ });
2002
+ }
2003
+
2004
+ return summaries;
2005
+ }
2006
+
2007
+ export function getAttentionQueue(db: Database): AttentionItem[] {
2008
+ const summaries = getSkillTrustSummaries(db);
2009
+ const pending = getPendingProposals(db);
2010
+ const pendingSkills = new Set(pending.map((p) => p.skill_name).filter(Boolean));
2011
+
2012
+ const items: AttentionItem[] = [];
2013
+
2014
+ for (const s of summaries) {
2015
+ if (s.latest_action === "rolled_back") {
2016
+ items.push({
2017
+ skill_name: s.skill_name,
2018
+ category: "needs_review",
2019
+ severity: "critical",
2020
+ reason: "Rolled back after deployment",
2021
+ recommended_action: "Review rollback evidence and decide whether to re-evolve",
2022
+ timestamp: s.last_seen ?? "",
2023
+ });
2024
+ continue;
2025
+ }
2026
+
2027
+ if (pendingSkills.has(s.skill_name)) {
2028
+ items.push({
2029
+ skill_name: s.skill_name,
2030
+ category: "needs_review",
2031
+ severity: "info",
2032
+ reason: "Proposal awaiting review",
2033
+ recommended_action: "Review and approve or reject the pending proposal",
2034
+ timestamp: s.last_seen ?? "",
2035
+ });
2036
+ continue;
2037
+ }
2038
+
2039
+ if (s.total_checks < 5) continue;
2040
+
2041
+ if (s.miss_rate > 0.1) {
2042
+ items.push({
2043
+ skill_name: s.skill_name,
2044
+ category: "regression",
2045
+ severity: "warning",
2046
+ reason: `High miss rate (${Math.round(s.miss_rate * 100)}%)`,
2047
+ recommended_action: "Review missed invocations and consider evolving the skill description",
2048
+ timestamp: s.last_seen ?? "",
2049
+ });
2050
+ continue;
2051
+ }
2052
+
2053
+ if (s.system_like_rate > 0.1) {
2054
+ items.push({
2055
+ skill_name: s.skill_name,
2056
+ category: "polluted",
2057
+ severity: "warning",
2058
+ reason: `Possible telemetry pollution (${Math.round(s.system_like_rate * 100)}% system-like)`,
2059
+ recommended_action: "Inspect prompts for system-injected noise",
2060
+ timestamp: s.last_seen ?? "",
2061
+ });
2062
+ continue;
2063
+ }
2064
+ }
2065
+
2066
+ return items;
2067
+ }
2068
+
2069
+ export function getRecentDecisions(db: Database, limit = 20): AutonomousDecision[] {
2070
+ const rows = db
2071
+ .query(
2072
+ `SELECT timestamp, proposal_id, skill_name, action, details, eval_snapshot_json
2073
+ FROM evolution_audit
2074
+ WHERE timestamp >= datetime('now', '-7 days')
2075
+ ORDER BY timestamp DESC
2076
+ LIMIT ?`,
2077
+ )
2078
+ .all(limit) as Array<{
2079
+ timestamp: string;
2080
+ proposal_id: string;
2081
+ skill_name: string | null;
2082
+ action: string;
2083
+ details: string;
2084
+ eval_snapshot_json: string | null;
2085
+ }>;
2086
+
2087
+ return rows
2088
+ .filter((row) => row.skill_name != null)
2089
+ .flatMap((row) => {
2090
+ const evalSnapshot = safeParseJson(row.eval_snapshot_json) as {
2091
+ regressions?: unknown[];
2092
+ } | null;
2093
+
2094
+ let kind: DecisionKind | null;
2095
+ switch (row.action) {
2096
+ case "proposed":
2097
+ case "created":
2098
+ kind = "proposal_created";
2099
+ break;
2100
+ case "rejected":
2101
+ kind = "proposal_rejected";
2102
+ break;
2103
+ case "validated":
2104
+ kind =
2105
+ evalSnapshot?.regressions && evalSnapshot.regressions.length > 0
2106
+ ? "validation_failed"
2107
+ : "proposal_created"; // validated without regressions is still a creation step
2108
+ break;
2109
+ case "deployed":
2110
+ kind = "proposal_deployed";
2111
+ break;
2112
+ case "rolled_back":
2113
+ kind = "rollback_triggered";
2114
+ break;
2115
+ default:
2116
+ kind = null;
2117
+ }
2118
+
2119
+ if (!kind) return [];
2120
+
2121
+ return [
2122
+ {
2123
+ timestamp: row.timestamp,
2124
+ kind,
2125
+ skill_name: row.skill_name!,
2126
+ proposal_id: row.proposal_id,
2127
+ summary: row.details ?? "",
2128
+ },
2129
+ ];
2130
+ });
2131
+ }
2132
+
2133
+ // -- Helpers ------------------------------------------------------------------
2134
+
1464
2135
  export function safeParseJsonArray<T = string>(json: string | null): T[] {
1465
2136
  if (!json) return [];
1466
2137
  try {