selftune 0.2.31 → 0.2.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -56
- package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
- package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/command-surface.ts +613 -2
- package/cli/selftune/create/baseline.ts +429 -0
- package/cli/selftune/create/check.ts +35 -0
- package/cli/selftune/create/init.ts +115 -0
- package/cli/selftune/create/package-candidate-state.ts +771 -0
- package/cli/selftune/create/package-evaluator.ts +710 -0
- package/cli/selftune/create/package-fingerprint.ts +142 -0
- package/cli/selftune/create/package-search.ts +377 -0
- package/cli/selftune/create/publish.ts +431 -0
- package/cli/selftune/create/readiness.ts +495 -0
- package/cli/selftune/create/replay.ts +330 -0
- package/cli/selftune/create/report.ts +74 -0
- package/cli/selftune/create/scaffold.ts +121 -0
- package/cli/selftune/create/skills-ref-adapter.ts +177 -0
- package/cli/selftune/create/status.ts +33 -0
- package/cli/selftune/create/templates.ts +249 -0
- package/cli/selftune/cron/setup.ts +1 -1
- package/cli/selftune/dashboard-action-events.ts +4 -1
- package/cli/selftune/dashboard-action-result.ts +789 -24
- package/cli/selftune/dashboard-action-stream.ts +80 -0
- package/cli/selftune/dashboard-contract.ts +146 -3
- package/cli/selftune/dashboard-server.ts +5 -4
- package/cli/selftune/eval/hooks-to-evals.ts +58 -35
- package/cli/selftune/eval/synthetic-evals.ts +145 -17
- package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
- package/cli/selftune/evolution/evolve-body.ts +9 -36
- package/cli/selftune/evolution/evolve.ts +8 -72
- package/cli/selftune/evolution/stopping-criteria.ts +5 -13
- package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
- package/cli/selftune/evolution/validate-host-replay.ts +115 -15
- package/cli/selftune/improve.ts +206 -0
- package/cli/selftune/index.ts +123 -6
- package/cli/selftune/init.ts +1 -1
- package/cli/selftune/localdb/queries/dashboard.ts +30 -0
- package/cli/selftune/localdb/schema.ts +52 -0
- package/cli/selftune/monitoring/watch.ts +257 -23
- package/cli/selftune/orchestrate/execute.ts +300 -1
- package/cli/selftune/orchestrate/finalize.ts +14 -0
- package/cli/selftune/orchestrate/plan.ts +22 -5
- package/cli/selftune/orchestrate/prepare.ts +59 -4
- package/cli/selftune/orchestrate/report.ts +1 -1
- package/cli/selftune/orchestrate.ts +34 -1
- package/cli/selftune/publish.ts +35 -0
- package/cli/selftune/routes/actions.ts +81 -15
- package/cli/selftune/routes/overview.ts +1 -1
- package/cli/selftune/routes/skill-report.ts +147 -2
- package/cli/selftune/run.ts +18 -0
- package/cli/selftune/schedule.ts +3 -3
- package/cli/selftune/search-run.ts +703 -0
- package/cli/selftune/status.ts +35 -11
- package/cli/selftune/testing-readiness.ts +431 -40
- package/cli/selftune/types.ts +316 -0
- package/cli/selftune/utils/eval-readiness.ts +1 -0
- package/cli/selftune/utils/json-output.ts +11 -0
- package/cli/selftune/utils/lifecycle-surface.ts +48 -0
- package/cli/selftune/utils/query-filter.ts +82 -1
- package/cli/selftune/utils/tui.ts +85 -2
- package/cli/selftune/verify.ts +205 -0
- package/cli/selftune/workflows/proposals.ts +1 -1
- package/cli/selftune/workflows/skill-scaffold.ts +141 -63
- package/cli/selftune/workflows/workflows.ts +4 -4
- package/package.json +1 -1
- package/skill/SKILL.md +148 -85
- package/skill/references/cli-quick-reference.md +16 -1
- package/skill/references/creator-playbook.md +31 -10
- package/skill/workflows/Baseline.md +8 -9
- package/skill/workflows/Contributions.md +4 -4
- package/skill/workflows/Create.md +173 -0
- package/skill/workflows/CreateTestDeploy.md +34 -30
- package/skill/workflows/Cron.md +2 -2
- package/skill/workflows/Dashboard.md +3 -3
- package/skill/workflows/Evals.md +13 -7
- package/skill/workflows/Evolve.md +75 -32
- package/skill/workflows/EvolveBody.md +22 -15
- package/skill/workflows/Hook.md +1 -1
- package/skill/workflows/Improve.md +168 -0
- package/skill/workflows/Initialize.md +3 -3
- package/skill/workflows/Orchestrate.md +49 -12
- package/skill/workflows/Publish.md +100 -0
- package/skill/workflows/Run.md +72 -0
- package/skill/workflows/Schedule.md +2 -2
- package/skill/workflows/SearchRun.md +89 -0
- package/skill/workflows/SignalsDashboard.md +2 -2
- package/skill/workflows/UnitTest.md +13 -4
- package/skill/workflows/Verify.md +136 -0
- package/skill/workflows/Watch.md +114 -47
- package/skill/workflows/Workflows.md +13 -8
- package/apps/local-dashboard/dist/assets/index-B7v_o1WC.js +0 -15
- package/apps/local-dashboard/dist/assets/index-CrO77SVi.css +0 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1
package/cli/selftune/types.ts
CHANGED
|
@@ -745,6 +745,7 @@ export interface BodyEvolutionProposal {
|
|
|
745
745
|
export type ValidationGate = "structural" | "trigger_accuracy" | "quality";
|
|
746
746
|
|
|
747
747
|
export type ValidationMode = "structural_guard" | "host_replay" | "llm_judge";
|
|
748
|
+
export type ReplayStagingMode = "routing" | "package";
|
|
748
749
|
|
|
749
750
|
export interface RoutingReplayFixture {
|
|
750
751
|
fixture_id: string;
|
|
@@ -753,6 +754,7 @@ export interface RoutingReplayFixture {
|
|
|
753
754
|
target_skill_path: string;
|
|
754
755
|
competing_skill_paths: string[];
|
|
755
756
|
workspace_root?: string;
|
|
757
|
+
skill_staging_mode?: ReplayStagingMode;
|
|
756
758
|
}
|
|
757
759
|
|
|
758
760
|
export interface RoutingReplayEntryResult {
|
|
@@ -761,6 +763,30 @@ export interface RoutingReplayEntryResult {
|
|
|
761
763
|
triggered: boolean;
|
|
762
764
|
passed: boolean;
|
|
763
765
|
evidence?: string;
|
|
766
|
+
runtime_metrics?: RuntimeReplayEntryMetrics;
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
export interface RuntimeReplayEntryMetrics {
|
|
770
|
+
input_tokens: number | null;
|
|
771
|
+
output_tokens: number | null;
|
|
772
|
+
cache_creation_input_tokens: number | null;
|
|
773
|
+
cache_read_input_tokens: number | null;
|
|
774
|
+
total_cost_usd: number | null;
|
|
775
|
+
duration_ms: number | null;
|
|
776
|
+
num_turns: number | null;
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
export interface RuntimeReplayAggregateMetrics {
|
|
780
|
+
eval_runs: number;
|
|
781
|
+
usage_observations: number;
|
|
782
|
+
total_duration_ms: number;
|
|
783
|
+
avg_duration_ms: number;
|
|
784
|
+
total_input_tokens: number | null;
|
|
785
|
+
total_output_tokens: number | null;
|
|
786
|
+
total_cache_creation_input_tokens: number | null;
|
|
787
|
+
total_cache_read_input_tokens: number | null;
|
|
788
|
+
total_cost_usd: number | null;
|
|
789
|
+
total_turns: number | null;
|
|
764
790
|
}
|
|
765
791
|
|
|
766
792
|
/** Result of validating a body evolution proposal. */
|
|
@@ -808,11 +834,193 @@ export interface BaselineResult {
|
|
|
808
834
|
with_skill: boolean;
|
|
809
835
|
triggered: boolean;
|
|
810
836
|
pass: boolean;
|
|
837
|
+
evidence?: string;
|
|
811
838
|
latency_ms?: number;
|
|
812
839
|
tokens?: TokenUsageMetrics;
|
|
813
840
|
measured_at: string;
|
|
814
841
|
}
|
|
815
842
|
|
|
843
|
+
export type CreatePackageEvaluationStatus = "passed" | "replay_failed" | "baseline_failed";
|
|
844
|
+
|
|
845
|
+
export interface CreatePackageReplaySummary {
|
|
846
|
+
mode: ReplayStagingMode;
|
|
847
|
+
validation_mode: "host_replay";
|
|
848
|
+
agent: string;
|
|
849
|
+
proposal_id: string;
|
|
850
|
+
fixture_id: string;
|
|
851
|
+
total: number;
|
|
852
|
+
passed: number;
|
|
853
|
+
failed: number;
|
|
854
|
+
pass_rate: number;
|
|
855
|
+
runtime_metrics?: RuntimeReplayAggregateMetrics;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
export interface CreatePackageBaselineSummary {
|
|
859
|
+
mode: ReplayStagingMode;
|
|
860
|
+
baseline_pass_rate: number;
|
|
861
|
+
with_skill_pass_rate: number;
|
|
862
|
+
lift: number;
|
|
863
|
+
adds_value: boolean;
|
|
864
|
+
measured_at: string;
|
|
865
|
+
sample_size?: number;
|
|
866
|
+
runtime_metrics?: {
|
|
867
|
+
with_skill: RuntimeReplayAggregateMetrics;
|
|
868
|
+
without_skill: RuntimeReplayAggregateMetrics;
|
|
869
|
+
};
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
export interface CreatePackageEvaluationEvidenceSample {
|
|
873
|
+
query: string;
|
|
874
|
+
evidence: string | null;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
export interface CreatePackageEvaluationEvidenceSummary {
|
|
878
|
+
replay_failures: number;
|
|
879
|
+
baseline_wins: number;
|
|
880
|
+
baseline_regressions: number;
|
|
881
|
+
replay_failure_samples: CreatePackageEvaluationEvidenceSample[];
|
|
882
|
+
baseline_win_samples: CreatePackageEvaluationEvidenceSample[];
|
|
883
|
+
baseline_regression_samples: CreatePackageEvaluationEvidenceSample[];
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
export interface CreatePackageEvaluationEfficiencySummary {
|
|
887
|
+
with_skill: RuntimeReplayAggregateMetrics;
|
|
888
|
+
without_skill: RuntimeReplayAggregateMetrics;
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
export interface CreatePackageEvaluationWatchEfficiencyRegressionSummary {
|
|
892
|
+
sample_size: number;
|
|
893
|
+
baseline_avg_duration_ms: number | null;
|
|
894
|
+
observed_avg_duration_ms: number | null;
|
|
895
|
+
duration_delta_ratio: number | null;
|
|
896
|
+
baseline_avg_input_tokens: number | null;
|
|
897
|
+
observed_avg_input_tokens: number | null;
|
|
898
|
+
input_tokens_delta_ratio: number | null;
|
|
899
|
+
baseline_avg_output_tokens: number | null;
|
|
900
|
+
observed_avg_output_tokens: number | null;
|
|
901
|
+
output_tokens_delta_ratio: number | null;
|
|
902
|
+
baseline_avg_turns: number | null;
|
|
903
|
+
observed_avg_turns: number | null;
|
|
904
|
+
turns_delta_ratio: number | null;
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
export interface CreatePackageEvaluationWatchSummary {
|
|
908
|
+
snapshot: MonitoringSnapshot;
|
|
909
|
+
alert: string | null;
|
|
910
|
+
rolled_back: boolean;
|
|
911
|
+
recommendation: string;
|
|
912
|
+
recommended_command: string | null;
|
|
913
|
+
grade_alert: string | null;
|
|
914
|
+
grade_regression: { before: number; after: number; delta: number } | null;
|
|
915
|
+
efficiency_alert?: string | null;
|
|
916
|
+
efficiency_regression?: CreatePackageEvaluationWatchEfficiencyRegressionSummary | null;
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
export interface CreatePackageEvaluationGradingBaselineSummary {
|
|
920
|
+
proposal_id: string | null;
|
|
921
|
+
measured_at: string;
|
|
922
|
+
pass_rate: number;
|
|
923
|
+
mean_score: number | null;
|
|
924
|
+
sample_size: number;
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
export interface CreatePackageEvaluationGradingRecentSummary {
|
|
928
|
+
sample_size: number;
|
|
929
|
+
average_pass_rate: number | null;
|
|
930
|
+
average_mean_score: number | null;
|
|
931
|
+
newest_graded_at: string | null;
|
|
932
|
+
oldest_graded_at: string | null;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
export interface CreatePackageEvaluationGradingSummary {
|
|
936
|
+
baseline: CreatePackageEvaluationGradingBaselineSummary | null;
|
|
937
|
+
recent: CreatePackageEvaluationGradingRecentSummary | null;
|
|
938
|
+
pass_rate_delta: number | null;
|
|
939
|
+
mean_score_delta: number | null;
|
|
940
|
+
regressed: boolean | null;
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
export interface CreatePackageEvaluationUnitTestFailureSummary {
|
|
944
|
+
test_id: string;
|
|
945
|
+
error: string | null;
|
|
946
|
+
failed_assertions: string[];
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
export interface CreatePackageEvaluationUnitTestSummary {
|
|
950
|
+
total: number;
|
|
951
|
+
passed: number;
|
|
952
|
+
failed: number;
|
|
953
|
+
pass_rate: number;
|
|
954
|
+
run_at: string;
|
|
955
|
+
failing_tests: CreatePackageEvaluationUnitTestFailureSummary[];
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
export interface CreatePackageBodySummary {
|
|
959
|
+
structural_valid: boolean;
|
|
960
|
+
structural_reason: string;
|
|
961
|
+
quality_score: number | null;
|
|
962
|
+
quality_reason: string | null;
|
|
963
|
+
quality_threshold: number;
|
|
964
|
+
quality_passed: boolean | null;
|
|
965
|
+
valid: boolean;
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
export type CreatePackageEvaluationSource = "fresh" | "artifact_cache" | "candidate_cache";
|
|
969
|
+
export type CreatePackageCandidateAcceptanceDecision = "root" | "accepted" | "rejected";
|
|
970
|
+
|
|
971
|
+
export interface CreatePackageCandidateAcceptanceSummary {
|
|
972
|
+
decision: CreatePackageCandidateAcceptanceDecision;
|
|
973
|
+
compared_to_candidate_id: string | null;
|
|
974
|
+
decided_at: string;
|
|
975
|
+
rationale: string;
|
|
976
|
+
replay_pass_rate_delta: number | null;
|
|
977
|
+
routing_pass_rate_delta: number | null;
|
|
978
|
+
baseline_lift_delta: number | null;
|
|
979
|
+
body_quality_delta: number | null;
|
|
980
|
+
unit_test_pass_rate_delta: number | null;
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
export interface CreatePackageEvaluationSummary {
|
|
984
|
+
skill_name: string;
|
|
985
|
+
skill_path: string;
|
|
986
|
+
mode: ReplayStagingMode;
|
|
987
|
+
package_fingerprint?: string;
|
|
988
|
+
candidate_id?: string;
|
|
989
|
+
parent_candidate_id?: string | null;
|
|
990
|
+
candidate_generation?: number | null;
|
|
991
|
+
evaluation_source?: CreatePackageEvaluationSource;
|
|
992
|
+
status: CreatePackageEvaluationStatus;
|
|
993
|
+
evaluation_passed: boolean;
|
|
994
|
+
next_command: string | null;
|
|
995
|
+
replay: CreatePackageReplaySummary;
|
|
996
|
+
routing?: CreatePackageReplaySummary;
|
|
997
|
+
baseline: CreatePackageBaselineSummary;
|
|
998
|
+
evidence?: CreatePackageEvaluationEvidenceSummary;
|
|
999
|
+
efficiency?: CreatePackageEvaluationEfficiencySummary;
|
|
1000
|
+
grading?: CreatePackageEvaluationGradingSummary;
|
|
1001
|
+
body?: CreatePackageBodySummary;
|
|
1002
|
+
unit_tests?: CreatePackageEvaluationUnitTestSummary;
|
|
1003
|
+
watch?: CreatePackageEvaluationWatchSummary;
|
|
1004
|
+
candidate_acceptance?: CreatePackageCandidateAcceptanceSummary;
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
export interface CreatePackageCandidateRecord {
|
|
1008
|
+
candidate_id: string;
|
|
1009
|
+
skill_name: string;
|
|
1010
|
+
skill_path: string;
|
|
1011
|
+
package_fingerprint: string;
|
|
1012
|
+
parent_candidate_id: string | null;
|
|
1013
|
+
candidate_generation: number;
|
|
1014
|
+
evaluation_count: number;
|
|
1015
|
+
first_evaluated_at: string;
|
|
1016
|
+
last_evaluated_at: string;
|
|
1017
|
+
latest_status: CreatePackageEvaluationStatus;
|
|
1018
|
+
latest_evaluation_source: CreatePackageEvaluationSource | null;
|
|
1019
|
+
latest_acceptance_decision: CreatePackageCandidateAcceptanceDecision | null;
|
|
1020
|
+
artifact_path: string | null;
|
|
1021
|
+
summary: CreatePackageEvaluationSummary;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
816
1024
|
// ---------------------------------------------------------------------------
|
|
817
1025
|
// Skill unit test types
|
|
818
1026
|
// ---------------------------------------------------------------------------
|
|
@@ -863,6 +1071,78 @@ export interface UnitTestSuiteResult {
|
|
|
863
1071
|
run_at: string;
|
|
864
1072
|
}
|
|
865
1073
|
|
|
1074
|
+
export interface AgentSkillValidationIssue {
|
|
1075
|
+
level: "error" | "warning";
|
|
1076
|
+
code: string;
|
|
1077
|
+
message: string;
|
|
1078
|
+
path?: string;
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
export interface AgentSkillValidationResult {
|
|
1082
|
+
ok: boolean;
|
|
1083
|
+
issues: AgentSkillValidationIssue[];
|
|
1084
|
+
raw_stdout: string;
|
|
1085
|
+
raw_stderr: string;
|
|
1086
|
+
exit_code: number | null;
|
|
1087
|
+
validator: "skills-ref";
|
|
1088
|
+
command: string | null;
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
export type CreateCheckState =
|
|
1092
|
+
| "blocked_spec_validation"
|
|
1093
|
+
| "needs_spec_validation"
|
|
1094
|
+
| "needs_package_resources"
|
|
1095
|
+
| "needs_evals"
|
|
1096
|
+
| "needs_unit_tests"
|
|
1097
|
+
| "needs_routing_replay"
|
|
1098
|
+
| "needs_baseline"
|
|
1099
|
+
| "ready_to_publish";
|
|
1100
|
+
|
|
1101
|
+
export interface CreateCheckChecks {
|
|
1102
|
+
skill_md: boolean;
|
|
1103
|
+
frontmatter_present: boolean;
|
|
1104
|
+
skill_name_matches_dir: boolean;
|
|
1105
|
+
description_present: boolean;
|
|
1106
|
+
description_within_budget: boolean;
|
|
1107
|
+
skill_md_within_line_budget: boolean;
|
|
1108
|
+
manifest_present: boolean;
|
|
1109
|
+
workflow_entry: boolean;
|
|
1110
|
+
references_present: boolean;
|
|
1111
|
+
scripts_present: boolean;
|
|
1112
|
+
assets_present: boolean;
|
|
1113
|
+
evals_present: boolean;
|
|
1114
|
+
unit_tests_present: boolean;
|
|
1115
|
+
routing_replay_ready: boolean;
|
|
1116
|
+
routing_replay_recorded: boolean;
|
|
1117
|
+
package_replay_ready: boolean;
|
|
1118
|
+
baseline_present: boolean;
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
export interface CreateCheckReadiness {
|
|
1122
|
+
ok: boolean;
|
|
1123
|
+
state: CreateCheckState;
|
|
1124
|
+
summary: string;
|
|
1125
|
+
next_command: string | null;
|
|
1126
|
+
checks: CreateCheckChecks;
|
|
1127
|
+
skill_name: string;
|
|
1128
|
+
skill_dir: string;
|
|
1129
|
+
skill_path: string;
|
|
1130
|
+
entry_workflow: string;
|
|
1131
|
+
manifest_present: boolean;
|
|
1132
|
+
description_quality: DescriptionQualityScore;
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
export interface CreateCheckResult {
|
|
1136
|
+
skill: string;
|
|
1137
|
+
skill_dir: string;
|
|
1138
|
+
skill_path: string;
|
|
1139
|
+
ok: boolean;
|
|
1140
|
+
state: CreateCheckState;
|
|
1141
|
+
next_command: string | null;
|
|
1142
|
+
spec_validation: AgentSkillValidationResult;
|
|
1143
|
+
readiness: CreateCheckReadiness;
|
|
1144
|
+
}
|
|
1145
|
+
|
|
866
1146
|
// ---------------------------------------------------------------------------
|
|
867
1147
|
// Composability types
|
|
868
1148
|
// ---------------------------------------------------------------------------
|
|
@@ -1034,3 +1314,39 @@ export interface WorkflowDiscoveryReport {
|
|
|
1034
1314
|
total_sessions_analyzed: number;
|
|
1035
1315
|
generated_at: string;
|
|
1036
1316
|
}
|
|
1317
|
+
|
|
1318
|
+
// ---------------------------------------------------------------------------
|
|
1319
|
+
// Package search types (bounded package evolution)
|
|
1320
|
+
// ---------------------------------------------------------------------------
|
|
1321
|
+
|
|
1322
|
+
/** Provenance trail for a package search run. */
|
|
1323
|
+
export interface PackageSearchProvenance {
|
|
1324
|
+
frontier_size: number;
|
|
1325
|
+
parent_selection_method: string;
|
|
1326
|
+
candidate_fingerprints: string[];
|
|
1327
|
+
surface_plan?: {
|
|
1328
|
+
routing_count: number;
|
|
1329
|
+
body_count: number;
|
|
1330
|
+
weakness_source: string;
|
|
1331
|
+
routing_weakness: number | null;
|
|
1332
|
+
body_weakness: number | null;
|
|
1333
|
+
};
|
|
1334
|
+
evaluation_summaries: Array<{
|
|
1335
|
+
candidate_id: string;
|
|
1336
|
+
decision: string;
|
|
1337
|
+
rationale: string;
|
|
1338
|
+
}>;
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
/** Result of a bounded package search run. */
|
|
1342
|
+
export interface PackageSearchRunResult {
|
|
1343
|
+
search_id: string;
|
|
1344
|
+
skill_name: string;
|
|
1345
|
+
parent_candidate_id: string | null;
|
|
1346
|
+
candidates_evaluated: number;
|
|
1347
|
+
winner_candidate_id: string | null;
|
|
1348
|
+
winner_rationale: string | null;
|
|
1349
|
+
started_at: string;
|
|
1350
|
+
completed_at: string;
|
|
1351
|
+
provenance: PackageSearchProvenance;
|
|
1352
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export const MIN_LOG_READY_POSITIVES = 3;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export function extractJsonObject(text: string): Record<string, unknown> | null {
|
|
2
|
+
const trimmed = text.trim();
|
|
3
|
+
if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
|
|
4
|
+
|
|
5
|
+
try {
|
|
6
|
+
const parsed = JSON.parse(trimmed) as unknown;
|
|
7
|
+
return parsed && typeof parsed === "object" ? (parsed as Record<string, unknown>) : null;
|
|
8
|
+
} catch {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export function normalizeLifecycleCommand(command: string | null | undefined): string | null {
|
|
2
|
+
if (!command) return null;
|
|
3
|
+
|
|
4
|
+
let normalized = command;
|
|
5
|
+
normalized = normalized.replace(/\bselftune create replay\b/g, "selftune verify");
|
|
6
|
+
normalized = normalized.replace(/\bselftune create baseline\b/g, "selftune verify");
|
|
7
|
+
normalized = normalized.replace(/\bselftune create check\b/g, "selftune verify");
|
|
8
|
+
normalized = normalized.replace(/\bselftune create publish\b/g, "selftune publish");
|
|
9
|
+
normalized = normalized.replace(/\bselftune evolve[- ]body\b/g, "selftune improve --scope body");
|
|
10
|
+
normalized = normalized.replace(/\bselftune evolve\b/g, "selftune improve");
|
|
11
|
+
normalized = normalized.replace(/\bselftune search-run\b/g, "selftune improve --scope package");
|
|
12
|
+
normalized = normalized.replace(/\bselftune orchestrate\b/g, "selftune run");
|
|
13
|
+
normalized = normalized.replace(/\s+--watch(?=\s|$)/g, "");
|
|
14
|
+
normalized = normalized.replace(/\s{2,}/g, " ").trim();
|
|
15
|
+
|
|
16
|
+
return normalized;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function normalizeLifecycleText(text: string | null | undefined): string {
|
|
20
|
+
if (!text) return "";
|
|
21
|
+
|
|
22
|
+
return text
|
|
23
|
+
.replace(/\bRun create replay\b/g, "Run verify")
|
|
24
|
+
.replace(/\brun create replay\b/g, "run verify")
|
|
25
|
+
.replace(/\bcreate replay\b/g, "verify")
|
|
26
|
+
.replace(/\bCreate replay\b/g, "Verify")
|
|
27
|
+
.replace(/\bRun create baseline\b/g, "Run verify")
|
|
28
|
+
.replace(/\brun create baseline\b/g, "run verify")
|
|
29
|
+
.replace(/\bcreate baseline\b/g, "verify")
|
|
30
|
+
.replace(/\bCreate baseline\b/g, "Verify")
|
|
31
|
+
.replace(/\bRun create check\b/g, "Run verify")
|
|
32
|
+
.replace(/\brun create check\b/g, "run verify")
|
|
33
|
+
.replace(/\bcreate check\b/g, "verify")
|
|
34
|
+
.replace(/\bCreate check\b/g, "Verify")
|
|
35
|
+
.replace(/\bRun create publish\b/g, "Run publish")
|
|
36
|
+
.replace(/\brun create publish\b/g, "run publish")
|
|
37
|
+
.replace(/\bcreate publish\b/g, "publish")
|
|
38
|
+
.replace(/\bCreate publish\b/g, "Publish")
|
|
39
|
+
.replace(/\bevolve body\b/g, "improve --scope body")
|
|
40
|
+
.replace(/\bEvolve body\b/g, "Improve --scope body")
|
|
41
|
+
.replace(/\bevolve\b/g, "improve")
|
|
42
|
+
.replace(/\bEvolve\b/g, "Improve")
|
|
43
|
+
.replace(/\bsearch-run\b/g, "improve --scope package")
|
|
44
|
+
.replace(/\bSearch-run\b/g, "Improve --scope package")
|
|
45
|
+
.replace(/\bselftune orchestrate\b/g, "selftune run")
|
|
46
|
+
.replace(/\bOrchestrate\b/g, "Run")
|
|
47
|
+
.replace(/\borchestrate\b/g, "run");
|
|
48
|
+
}
|
|
@@ -53,6 +53,42 @@ const LEADING_WRAPPED_QUERY_TAGS = [
|
|
|
53
53
|
"local-command-stdout",
|
|
54
54
|
"local-command-stderr",
|
|
55
55
|
"command-name",
|
|
56
|
+
"command-message",
|
|
57
|
+
"command-args",
|
|
58
|
+
] as const;
|
|
59
|
+
|
|
60
|
+
const SKILL_MAINTENANCE_VERBS = [
|
|
61
|
+
"grade",
|
|
62
|
+
"review",
|
|
63
|
+
"audit",
|
|
64
|
+
"inspect",
|
|
65
|
+
"analyze",
|
|
66
|
+
"analyse",
|
|
67
|
+
"understand",
|
|
68
|
+
"explain",
|
|
69
|
+
"find",
|
|
70
|
+
"locate",
|
|
71
|
+
"update",
|
|
72
|
+
"fix",
|
|
73
|
+
"repair",
|
|
74
|
+
"improve",
|
|
75
|
+
"debug",
|
|
76
|
+
"document",
|
|
77
|
+
"publish api",
|
|
78
|
+
] as const;
|
|
79
|
+
|
|
80
|
+
const SKILL_MAINTENANCE_NOUNS = [
|
|
81
|
+
"skill",
|
|
82
|
+
"skills",
|
|
83
|
+
"readme",
|
|
84
|
+
"docs",
|
|
85
|
+
"documentation",
|
|
86
|
+
"workflow",
|
|
87
|
+
"workflows",
|
|
88
|
+
"reference",
|
|
89
|
+
"references",
|
|
90
|
+
"files",
|
|
91
|
+
"format",
|
|
56
92
|
] as const;
|
|
57
93
|
|
|
58
94
|
function stripLeadingWrappedQueryText(query: string): string {
|
|
@@ -81,7 +117,7 @@ export function extractActionableQueryText(query: string): string | null {
|
|
|
81
117
|
const trimmed = query.trim();
|
|
82
118
|
if (!trimmed || trimmed === "-" || trimmed === "(query not found)") return null;
|
|
83
119
|
|
|
84
|
-
const candidate = stripLeadingWrappedQueryText(trimmed)
|
|
120
|
+
const candidate = stripLeadingWrappedQueryText(trimmed);
|
|
85
121
|
if (!candidate || candidate === "-" || candidate === "(query not found)") return null;
|
|
86
122
|
|
|
87
123
|
const isBlocked =
|
|
@@ -92,6 +128,51 @@ export function extractActionableQueryText(query: string): string | null {
|
|
|
92
128
|
return isBlocked ? null : candidate;
|
|
93
129
|
}
|
|
94
130
|
|
|
131
|
+
function normalizeSkillNameVariants(skillName: string): string[] {
|
|
132
|
+
const trimmed = skillName.trim();
|
|
133
|
+
if (!trimmed) return [];
|
|
134
|
+
|
|
135
|
+
const variants = new Set<string>();
|
|
136
|
+
const lower = trimmed.toLowerCase();
|
|
137
|
+
variants.add(lower);
|
|
138
|
+
variants.add(lower.replace(/[-_]+/g, " "));
|
|
139
|
+
variants.add(lower.replace(/[-_\s]+/g, ""));
|
|
140
|
+
variants.add(
|
|
141
|
+
trimmed
|
|
142
|
+
.replace(/([a-z0-9])([A-Z])/g, "$1 $2")
|
|
143
|
+
.replace(/[-_]+/g, " ")
|
|
144
|
+
.toLowerCase(),
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
return [...variants].filter(Boolean);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export function isLikelySkillMaintenanceQuery(query: string, skillName?: string): boolean {
|
|
151
|
+
const candidate = extractActionableQueryText(query);
|
|
152
|
+
if (!candidate) return false;
|
|
153
|
+
|
|
154
|
+
const lowered = candidate.toLowerCase().replace(/\s+/g, " ").trim();
|
|
155
|
+
const mentionsMaintenanceVerb = SKILL_MAINTENANCE_VERBS.some((verb) => lowered.includes(verb));
|
|
156
|
+
const mentionsMaintenanceNoun = SKILL_MAINTENANCE_NOUNS.some((noun) => lowered.includes(noun));
|
|
157
|
+
const mentionsHowItWorks = /\bhow\b[\s\S]{0,80}\bworks?\b/.test(lowered);
|
|
158
|
+
const mentionsSkillName = skillName
|
|
159
|
+
? normalizeSkillNameVariants(skillName).some(
|
|
160
|
+
(variant) => variant.length > 0 && lowered.includes(variant),
|
|
161
|
+
)
|
|
162
|
+
: false;
|
|
163
|
+
|
|
164
|
+
if (mentionsHowItWorks && mentionsSkillName) return true;
|
|
165
|
+
if (mentionsMaintenanceVerb && mentionsMaintenanceNoun) return true;
|
|
166
|
+
if (mentionsMaintenanceVerb && mentionsSkillName) return true;
|
|
167
|
+
return false;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export function extractPositiveEvalQueryText(query: string, skillName?: string): string | null {
|
|
171
|
+
const candidate = extractActionableQueryText(query);
|
|
172
|
+
if (!candidate) return null;
|
|
173
|
+
return isLikelySkillMaintenanceQuery(candidate, skillName) ? null : candidate;
|
|
174
|
+
}
|
|
175
|
+
|
|
95
176
|
export function isActionableQueryText(query: string): boolean {
|
|
96
177
|
return extractActionableQueryText(query) !== null;
|
|
97
178
|
}
|
|
@@ -26,13 +26,96 @@ function createNoopTUI(): EvolveTUI {
|
|
|
26
26
|
return { step() {}, done() {}, fail() {}, finish() {}, destroy() {} };
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
function createPlainTextTUI(opts: { skillName: string; model: string }): EvolveTUI {
|
|
30
|
+
const write = (s: string) => process.stderr.write(s);
|
|
31
|
+
let stepStartTime = Date.now();
|
|
32
|
+
let currentLabel = "";
|
|
33
|
+
let hasActiveStep = false;
|
|
34
|
+
let destroyed = false;
|
|
35
|
+
|
|
36
|
+
const checkMark = process.env.NO_COLOR ? "+" : "\u2713";
|
|
37
|
+
const crossMark = process.env.NO_COLOR ? "x" : "\u2717";
|
|
38
|
+
|
|
39
|
+
write(`\n selftune evolve \u2500\u2500 ${opts.skillName} \u2500\u2500 ${opts.model}\n\n`);
|
|
40
|
+
|
|
41
|
+
function formatTime(ms: number): string {
|
|
42
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function writeStartedLine(label: string): void {
|
|
46
|
+
write(` -> ${label}\n`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function writeCompletedLine(marker: string, label: string, elapsed: number): void {
|
|
50
|
+
const time = formatTime(elapsed);
|
|
51
|
+
const padding = Math.max(1, 48 - label.length);
|
|
52
|
+
write(` ${marker} ${label}${" ".repeat(padding)}${time}\n`);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function completeCurrentStep(marker: string, label: string): void {
|
|
56
|
+
const elapsed = Date.now() - stepStartTime;
|
|
57
|
+
hasActiveStep = false;
|
|
58
|
+
writeCompletedLine(marker, label, elapsed);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
step(label: string): void {
|
|
63
|
+
if (destroyed) return;
|
|
64
|
+
if (hasActiveStep) {
|
|
65
|
+
completeCurrentStep(checkMark, currentLabel);
|
|
66
|
+
}
|
|
67
|
+
currentLabel = label;
|
|
68
|
+
stepStartTime = Date.now();
|
|
69
|
+
hasActiveStep = true;
|
|
70
|
+
writeStartedLine(label);
|
|
71
|
+
},
|
|
72
|
+
|
|
73
|
+
done(label: string): void {
|
|
74
|
+
if (destroyed) return;
|
|
75
|
+
if (hasActiveStep) {
|
|
76
|
+
completeCurrentStep(checkMark, label);
|
|
77
|
+
} else {
|
|
78
|
+
writeCompletedLine(checkMark, label, 0);
|
|
79
|
+
}
|
|
80
|
+
currentLabel = "";
|
|
81
|
+
},
|
|
82
|
+
|
|
83
|
+
fail(label: string): void {
|
|
84
|
+
if (destroyed) return;
|
|
85
|
+
if (hasActiveStep) {
|
|
86
|
+
completeCurrentStep(crossMark, label);
|
|
87
|
+
} else {
|
|
88
|
+
writeCompletedLine(crossMark, label, 0);
|
|
89
|
+
}
|
|
90
|
+
currentLabel = "";
|
|
91
|
+
},
|
|
92
|
+
|
|
93
|
+
finish(summary: string): void {
|
|
94
|
+
if (destroyed) return;
|
|
95
|
+
if (hasActiveStep) {
|
|
96
|
+
completeCurrentStep(checkMark, currentLabel);
|
|
97
|
+
}
|
|
98
|
+
write(`\n ${summary}\n`);
|
|
99
|
+
destroyed = true;
|
|
100
|
+
},
|
|
101
|
+
|
|
102
|
+
destroy(): void {
|
|
103
|
+
destroyed = true;
|
|
104
|
+
hasActiveStep = false;
|
|
105
|
+
currentLabel = "";
|
|
106
|
+
},
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
29
110
|
export function createEvolveTUI(opts: { skillName: string; model: string }): EvolveTUI {
|
|
30
111
|
const noColor = !!process.env.NO_COLOR;
|
|
31
112
|
const isTTY = !!process.stderr.isTTY;
|
|
113
|
+
const isTestEnvironment = process.env.BUN_ENV?.includes("test");
|
|
32
114
|
|
|
33
|
-
//
|
|
115
|
+
// Non-interactive agent runs still need durable progress lines. Keep tests
|
|
116
|
+
// silent by default unless explicitly forced.
|
|
34
117
|
if (!isTTY && !process.env.SELFTUNE_TUI_FORCE) {
|
|
35
|
-
return createNoopTUI();
|
|
118
|
+
return isTestEnvironment ? createNoopTUI() : createPlainTextTUI(opts);
|
|
36
119
|
}
|
|
37
120
|
|
|
38
121
|
const write = (s: string) => process.stderr.write(s);
|