@mainahq/core 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -0
- package/package.json +37 -0
- package/src/ai/__tests__/ai.test.ts +207 -0
- package/src/ai/__tests__/design-approaches.test.ts +192 -0
- package/src/ai/__tests__/spec-questions.test.ts +191 -0
- package/src/ai/__tests__/tiers.test.ts +110 -0
- package/src/ai/commit-msg.ts +28 -0
- package/src/ai/design-approaches.ts +76 -0
- package/src/ai/index.ts +205 -0
- package/src/ai/pr-summary.ts +60 -0
- package/src/ai/spec-questions.ts +74 -0
- package/src/ai/tiers.ts +52 -0
- package/src/ai/try-generate.ts +89 -0
- package/src/ai/validate.ts +66 -0
- package/src/benchmark/__tests__/reporter.test.ts +525 -0
- package/src/benchmark/__tests__/runner.test.ts +113 -0
- package/src/benchmark/__tests__/story-loader.test.ts +152 -0
- package/src/benchmark/reporter.ts +332 -0
- package/src/benchmark/runner.ts +91 -0
- package/src/benchmark/story-loader.ts +88 -0
- package/src/benchmark/types.ts +95 -0
- package/src/cache/__tests__/keys.test.ts +97 -0
- package/src/cache/__tests__/manager.test.ts +312 -0
- package/src/cache/__tests__/ttl.test.ts +94 -0
- package/src/cache/keys.ts +44 -0
- package/src/cache/manager.ts +231 -0
- package/src/cache/ttl.ts +77 -0
- package/src/config/__tests__/config.test.ts +376 -0
- package/src/config/index.ts +198 -0
- package/src/context/__tests__/budget.test.ts +179 -0
- package/src/context/__tests__/engine.test.ts +163 -0
- package/src/context/__tests__/episodic.test.ts +291 -0
- package/src/context/__tests__/relevance.test.ts +323 -0
- package/src/context/__tests__/retrieval.test.ts +143 -0
- package/src/context/__tests__/selector.test.ts +174 -0
- package/src/context/__tests__/semantic.test.ts +252 -0
- package/src/context/__tests__/treesitter.test.ts +229 -0
- package/src/context/__tests__/working.test.ts +236 -0
- package/src/context/budget.ts +130 -0
- package/src/context/engine.ts +394 -0
- package/src/context/episodic.ts +251 -0
- package/src/context/relevance.ts +325 -0
- package/src/context/retrieval.ts +325 -0
- package/src/context/selector.ts +93 -0
- package/src/context/semantic.ts +331 -0
- package/src/context/treesitter.ts +216 -0
- package/src/context/working.ts +192 -0
- package/src/db/__tests__/db.test.ts +151 -0
- package/src/db/index.ts +211 -0
- package/src/db/schema.ts +84 -0
- package/src/design/__tests__/design.test.ts +310 -0
- package/src/design/__tests__/generate-hld-lld.test.ts +109 -0
- package/src/design/__tests__/review.test.ts +561 -0
- package/src/design/index.ts +297 -0
- package/src/design/review.ts +327 -0
- package/src/explain/__tests__/explain.test.ts +173 -0
- package/src/explain/index.ts +181 -0
- package/src/features/__tests__/analyzer.test.ts +358 -0
- package/src/features/__tests__/checklist.test.ts +454 -0
- package/src/features/__tests__/numbering.test.ts +319 -0
- package/src/features/__tests__/quality.test.ts +295 -0
- package/src/features/__tests__/traceability.test.ts +147 -0
- package/src/features/analyzer.ts +445 -0
- package/src/features/checklist.ts +366 -0
- package/src/features/index.ts +18 -0
- package/src/features/numbering.ts +404 -0
- package/src/features/quality.ts +349 -0
- package/src/features/test-stubs.ts +157 -0
- package/src/features/traceability.ts +260 -0
- package/src/feedback/__tests__/async-feedback.test.ts +52 -0
- package/src/feedback/__tests__/collector.test.ts +219 -0
- package/src/feedback/__tests__/compress.test.ts +150 -0
- package/src/feedback/__tests__/preferences.test.ts +169 -0
- package/src/feedback/collector.ts +135 -0
- package/src/feedback/compress.ts +92 -0
- package/src/feedback/preferences.ts +108 -0
- package/src/git/__tests__/git.test.ts +62 -0
- package/src/git/index.ts +110 -0
- package/src/hooks/__tests__/runner.test.ts +266 -0
- package/src/hooks/index.ts +8 -0
- package/src/hooks/runner.ts +130 -0
- package/src/index.ts +356 -0
- package/src/init/__tests__/init.test.ts +228 -0
- package/src/init/index.ts +364 -0
- package/src/language/__tests__/detect.test.ts +77 -0
- package/src/language/__tests__/profile.test.ts +51 -0
- package/src/language/detect.ts +70 -0
- package/src/language/profile.ts +110 -0
- package/src/prompts/__tests__/defaults.test.ts +52 -0
- package/src/prompts/__tests__/engine.test.ts +183 -0
- package/src/prompts/__tests__/evolution-resolve.test.ts +169 -0
- package/src/prompts/__tests__/evolution.test.ts +187 -0
- package/src/prompts/__tests__/loader.test.ts +105 -0
- package/src/prompts/candidates/review-v2.md +55 -0
- package/src/prompts/defaults/ai-review.md +49 -0
- package/src/prompts/defaults/commit.md +30 -0
- package/src/prompts/defaults/context.md +26 -0
- package/src/prompts/defaults/design-approaches.md +57 -0
- package/src/prompts/defaults/design-hld-lld.md +55 -0
- package/src/prompts/defaults/design.md +53 -0
- package/src/prompts/defaults/explain.md +31 -0
- package/src/prompts/defaults/fix.md +32 -0
- package/src/prompts/defaults/index.ts +38 -0
- package/src/prompts/defaults/review.md +41 -0
- package/src/prompts/defaults/spec-questions.md +59 -0
- package/src/prompts/defaults/tests.md +72 -0
- package/src/prompts/engine.ts +137 -0
- package/src/prompts/evolution.ts +409 -0
- package/src/prompts/loader.ts +71 -0
- package/src/review/__tests__/review.test.ts +288 -0
- package/src/review/comprehensive.ts +362 -0
- package/src/review/index.ts +417 -0
- package/src/stats/__tests__/tracker.test.ts +323 -0
- package/src/stats/index.ts +11 -0
- package/src/stats/tracker.ts +492 -0
- package/src/ticket/__tests__/ticket.test.ts +273 -0
- package/src/ticket/index.ts +185 -0
- package/src/utils.ts +87 -0
- package/src/verify/__tests__/ai-review.test.ts +242 -0
- package/src/verify/__tests__/coverage.test.ts +83 -0
- package/src/verify/__tests__/detect.test.ts +175 -0
- package/src/verify/__tests__/diff-filter.test.ts +338 -0
- package/src/verify/__tests__/fix.test.ts +478 -0
- package/src/verify/__tests__/linters/clippy.test.ts +45 -0
- package/src/verify/__tests__/linters/go-vet.test.ts +27 -0
- package/src/verify/__tests__/linters/ruff.test.ts +64 -0
- package/src/verify/__tests__/mutation.test.ts +141 -0
- package/src/verify/__tests__/pipeline.test.ts +553 -0
- package/src/verify/__tests__/proof.test.ts +97 -0
- package/src/verify/__tests__/secretlint.test.ts +190 -0
- package/src/verify/__tests__/semgrep.test.ts +217 -0
- package/src/verify/__tests__/slop.test.ts +366 -0
- package/src/verify/__tests__/sonar.test.ts +113 -0
- package/src/verify/__tests__/syntax-guard.test.ts +227 -0
- package/src/verify/__tests__/trivy.test.ts +191 -0
- package/src/verify/__tests__/visual.test.ts +139 -0
- package/src/verify/ai-review.ts +276 -0
- package/src/verify/coverage.ts +134 -0
- package/src/verify/detect.ts +171 -0
- package/src/verify/diff-filter.ts +183 -0
- package/src/verify/fix.ts +317 -0
- package/src/verify/linters/clippy.ts +52 -0
- package/src/verify/linters/go-vet.ts +32 -0
- package/src/verify/linters/ruff.ts +47 -0
- package/src/verify/mutation.ts +143 -0
- package/src/verify/pipeline.ts +328 -0
- package/src/verify/proof.ts +277 -0
- package/src/verify/secretlint.ts +168 -0
- package/src/verify/semgrep.ts +170 -0
- package/src/verify/slop.ts +493 -0
- package/src/verify/sonar.ts +146 -0
- package/src/verify/syntax-guard.ts +251 -0
- package/src/verify/trivy.ts +161 -0
- package/src/verify/visual.ts +460 -0
- package/src/workflow/__tests__/context.test.ts +110 -0
- package/src/workflow/context.ts +81 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { hashContent } from "../cache/keys";
|
|
2
|
+
import { getFeedbackDb } from "../db/index";
|
|
3
|
+
import { loadDefault, type PromptTask } from "./defaults/index";
|
|
4
|
+
import { abTest } from "./evolution";
|
|
5
|
+
import {
|
|
6
|
+
loadConstitution,
|
|
7
|
+
loadUserOverride,
|
|
8
|
+
mergePrompts,
|
|
9
|
+
renderTemplate,
|
|
10
|
+
} from "./loader";
|
|
11
|
+
|
|
12
|
+
export interface BuiltPrompt {
|
|
13
|
+
prompt: string;
|
|
14
|
+
hash: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface FeedbackOutcome {
|
|
18
|
+
accepted: boolean;
|
|
19
|
+
command: string;
|
|
20
|
+
context?: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface PromptStat {
|
|
24
|
+
promptHash: string;
|
|
25
|
+
totalUsage: number;
|
|
26
|
+
acceptRate: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Assembles a full system prompt for a given task.
|
|
31
|
+
* Loads default template, applies user overrides, injects constitution,
|
|
32
|
+
* renders template variables, and returns the prompt with its content hash.
|
|
33
|
+
*/
|
|
34
|
+
export async function buildSystemPrompt(
|
|
35
|
+
task: string,
|
|
36
|
+
mainaDir: string,
|
|
37
|
+
variables: Record<string, string> = {},
|
|
38
|
+
): Promise<BuiltPrompt> {
|
|
39
|
+
// Load constitution (always injected)
|
|
40
|
+
const constitution = await loadConstitution(mainaDir);
|
|
41
|
+
|
|
42
|
+
// Load default prompt template
|
|
43
|
+
const defaultPrompt = await loadDefault(task as PromptTask);
|
|
44
|
+
|
|
45
|
+
// A/B test: check if a candidate prompt should be used (20% traffic)
|
|
46
|
+
const abResult = abTest(mainaDir, task);
|
|
47
|
+
if (abResult.variant === "candidate" && abResult.hash) {
|
|
48
|
+
// Candidate selected — load its content from prompt_versions
|
|
49
|
+
const dbResult = getFeedbackDb(mainaDir);
|
|
50
|
+
if (dbResult.ok) {
|
|
51
|
+
const row = dbResult.value.db
|
|
52
|
+
.query("SELECT content FROM prompt_versions WHERE hash = ? LIMIT 1")
|
|
53
|
+
.get(abResult.hash) as { content: string } | null;
|
|
54
|
+
if (row?.content) {
|
|
55
|
+
const allVariables: Record<string, string> = {
|
|
56
|
+
constitution,
|
|
57
|
+
...variables,
|
|
58
|
+
};
|
|
59
|
+
const prompt = renderTemplate(row.content, allVariables);
|
|
60
|
+
return { prompt, hash: hashContent(prompt) };
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Load user override from .maina/prompts/<task>.md
|
|
66
|
+
const userOverride = await loadUserOverride(mainaDir, task);
|
|
67
|
+
|
|
68
|
+
// Merge: user overrides replace default entirely if present
|
|
69
|
+
const merged = mergePrompts(defaultPrompt, userOverride);
|
|
70
|
+
|
|
71
|
+
// Render template variables — constitution is always available
|
|
72
|
+
const allVariables: Record<string, string> = {
|
|
73
|
+
constitution,
|
|
74
|
+
...variables,
|
|
75
|
+
};
|
|
76
|
+
const prompt = renderTemplate(merged, allVariables);
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
prompt,
|
|
80
|
+
hash: hashContent(prompt),
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Records a prompt outcome (accepted/rejected) to the feedback database.
|
|
86
|
+
* Used to drive prompt evolution via the `maina learn` command.
|
|
87
|
+
*/
|
|
88
|
+
export function recordOutcome(
|
|
89
|
+
mainaDir: string,
|
|
90
|
+
promptHash: string,
|
|
91
|
+
outcome: FeedbackOutcome,
|
|
92
|
+
): void {
|
|
93
|
+
const dbResult = getFeedbackDb(mainaDir);
|
|
94
|
+
if (!dbResult.ok) return;
|
|
95
|
+
|
|
96
|
+
const { db } = dbResult.value;
|
|
97
|
+
const id = `${promptHash}-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
98
|
+
|
|
99
|
+
db.prepare(
|
|
100
|
+
`INSERT INTO feedback (id, prompt_hash, command, accepted, context, created_at)
|
|
101
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
102
|
+
).run(
|
|
103
|
+
id,
|
|
104
|
+
promptHash,
|
|
105
|
+
outcome.command,
|
|
106
|
+
outcome.accepted ? 1 : 0,
|
|
107
|
+
outcome.context ?? null,
|
|
108
|
+
new Date().toISOString(),
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Returns per-prompt-hash statistics: total usage and accept rate.
|
|
114
|
+
*/
|
|
115
|
+
export function getPromptStats(mainaDir: string): PromptStat[] {
|
|
116
|
+
const dbResult = getFeedbackDb(mainaDir);
|
|
117
|
+
if (!dbResult.ok) return [];
|
|
118
|
+
|
|
119
|
+
const { db } = dbResult.value;
|
|
120
|
+
|
|
121
|
+
const rows = db
|
|
122
|
+
.query(
|
|
123
|
+
`SELECT prompt_hash, COUNT(*) as total, SUM(CASE WHEN accepted = 1 THEN 1 ELSE 0 END) as accepted_count
|
|
124
|
+
FROM feedback GROUP BY prompt_hash`,
|
|
125
|
+
)
|
|
126
|
+
.all() as Array<{
|
|
127
|
+
prompt_hash: string;
|
|
128
|
+
total: number;
|
|
129
|
+
accepted_count: number;
|
|
130
|
+
}>;
|
|
131
|
+
|
|
132
|
+
return rows.map((row) => ({
|
|
133
|
+
promptHash: row.prompt_hash,
|
|
134
|
+
totalUsage: row.total,
|
|
135
|
+
acceptRate: row.total > 0 ? row.accepted_count / row.total : 0,
|
|
136
|
+
}));
|
|
137
|
+
}
|
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
import { hashContent } from "../cache/keys";
|
|
2
|
+
import { getFeedbackDb } from "../db/index";
|
|
3
|
+
|
|
4
|
+
// ── Types ────────────────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
export interface FeedbackAnalysis {
|
|
7
|
+
task: string;
|
|
8
|
+
totalSamples: number;
|
|
9
|
+
acceptRate: number;
|
|
10
|
+
needsImprovement: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface CandidatePrompt {
|
|
14
|
+
task: string;
|
|
15
|
+
content: string;
|
|
16
|
+
hash: string;
|
|
17
|
+
status: "candidate" | "active" | "retired";
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface AbTestResult {
|
|
21
|
+
variant: "active" | "candidate";
|
|
22
|
+
hash?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface ABResolution {
|
|
26
|
+
task: string;
|
|
27
|
+
action: "promoted" | "retired" | "continuing";
|
|
28
|
+
reason: string;
|
|
29
|
+
candidateAcceptRate?: number;
|
|
30
|
+
incumbentAcceptRate?: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface WorkflowStepAnalysis {
|
|
34
|
+
step: string;
|
|
35
|
+
totalSamples: number;
|
|
36
|
+
acceptRate: number;
|
|
37
|
+
needsImprovement: boolean;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface WorkflowRunSummary {
|
|
41
|
+
workflowId: string;
|
|
42
|
+
totalSteps: number;
|
|
43
|
+
passedSteps: number;
|
|
44
|
+
successRate: number;
|
|
45
|
+
createdAt: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ── Constants ────────────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
/** Accept rate below this threshold triggers needsImprovement flag */
|
|
51
|
+
const IMPROVEMENT_THRESHOLD = 0.6;
|
|
52
|
+
|
|
53
|
+
/** Minimum samples before we judge a prompt's performance */
|
|
54
|
+
const MIN_SAMPLES = 10;
|
|
55
|
+
|
|
56
|
+
/** Minimum samples for A/B test resolution */
|
|
57
|
+
const MIN_AB_SAMPLES = 30;
|
|
58
|
+
|
|
59
|
+
/** Accept rate margin: candidate must beat incumbent by this much to promote */
|
|
60
|
+
const AB_MARGIN = 0.05;
|
|
61
|
+
|
|
62
|
+
/** Candidate traffic split (20% goes to candidate) */
|
|
63
|
+
const CANDIDATE_RATIO = 0.2;
|
|
64
|
+
|
|
65
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
function ensurePromptVersionsTable(mainaDir: string) {
|
|
68
|
+
const dbResult = getFeedbackDb(mainaDir);
|
|
69
|
+
if (!dbResult.ok) return null;
|
|
70
|
+
return dbResult.value;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ── Public API ───────────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Analyses feedback for a specific task (command).
|
|
77
|
+
* Returns sample count, accept rate, and whether improvement is needed.
|
|
78
|
+
*/
|
|
79
|
+
export function analyseFeedback(
|
|
80
|
+
mainaDir: string,
|
|
81
|
+
task: string,
|
|
82
|
+
): FeedbackAnalysis {
|
|
83
|
+
const handle = ensurePromptVersionsTable(mainaDir);
|
|
84
|
+
if (!handle) {
|
|
85
|
+
return { task, totalSamples: 0, acceptRate: 0, needsImprovement: false };
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const { db } = handle;
|
|
89
|
+
const row = db
|
|
90
|
+
.query(
|
|
91
|
+
`SELECT COUNT(*) as total,
|
|
92
|
+
SUM(CASE WHEN accepted = 1 THEN 1 ELSE 0 END) as accepted_count
|
|
93
|
+
FROM feedback WHERE command = ?`,
|
|
94
|
+
)
|
|
95
|
+
.get(task) as { total: number; accepted_count: number } | null;
|
|
96
|
+
|
|
97
|
+
if (!row || row.total === 0) {
|
|
98
|
+
return { task, totalSamples: 0, acceptRate: 0, needsImprovement: false };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const acceptRate = row.accepted_count / row.total;
|
|
102
|
+
const needsImprovement =
|
|
103
|
+
row.total >= MIN_SAMPLES && acceptRate < IMPROVEMENT_THRESHOLD;
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
task,
|
|
107
|
+
totalSamples: row.total,
|
|
108
|
+
acceptRate,
|
|
109
|
+
needsImprovement,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Analyse feedback grouped by workflow step.
|
|
115
|
+
* Returns per-step metrics using the workflow_step column.
|
|
116
|
+
*/
|
|
117
|
+
export function analyseWorkflowFeedback(
|
|
118
|
+
mainaDir: string,
|
|
119
|
+
): WorkflowStepAnalysis[] {
|
|
120
|
+
const handle = ensurePromptVersionsTable(mainaDir);
|
|
121
|
+
if (!handle) return [];
|
|
122
|
+
|
|
123
|
+
const { db } = handle;
|
|
124
|
+
|
|
125
|
+
const rows = db
|
|
126
|
+
.query(
|
|
127
|
+
`SELECT workflow_step, COUNT(*) as total,
|
|
128
|
+
SUM(CASE WHEN accepted = 1 THEN 1 ELSE 0 END) as accepted_count
|
|
129
|
+
FROM feedback
|
|
130
|
+
WHERE workflow_step IS NOT NULL
|
|
131
|
+
GROUP BY workflow_step
|
|
132
|
+
ORDER BY workflow_step`,
|
|
133
|
+
)
|
|
134
|
+
.all() as Array<{
|
|
135
|
+
workflow_step: string;
|
|
136
|
+
total: number;
|
|
137
|
+
accepted_count: number;
|
|
138
|
+
}>;
|
|
139
|
+
|
|
140
|
+
return rows.map((row) => {
|
|
141
|
+
const acceptRate = row.total > 0 ? row.accepted_count / row.total : 0;
|
|
142
|
+
return {
|
|
143
|
+
step: row.workflow_step,
|
|
144
|
+
totalSamples: row.total,
|
|
145
|
+
acceptRate,
|
|
146
|
+
needsImprovement:
|
|
147
|
+
row.total >= MIN_SAMPLES && acceptRate < IMPROVEMENT_THRESHOLD,
|
|
148
|
+
};
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Analyse workflow runs grouped by workflow_id.
|
|
154
|
+
* Returns per-run summary showing how many steps passed.
|
|
155
|
+
*/
|
|
156
|
+
export function analyseWorkflowRuns(
|
|
157
|
+
mainaDir: string,
|
|
158
|
+
limit = 5,
|
|
159
|
+
): WorkflowRunSummary[] {
|
|
160
|
+
const handle = ensurePromptVersionsTable(mainaDir);
|
|
161
|
+
if (!handle) return [];
|
|
162
|
+
|
|
163
|
+
const { db } = handle;
|
|
164
|
+
|
|
165
|
+
const rows = db
|
|
166
|
+
.query(
|
|
167
|
+
`SELECT workflow_id, COUNT(*) as total_steps,
|
|
168
|
+
SUM(CASE WHEN accepted = 1 THEN 1 ELSE 0 END) as passed_steps,
|
|
169
|
+
MIN(created_at) as created_at
|
|
170
|
+
FROM feedback
|
|
171
|
+
WHERE workflow_id IS NOT NULL
|
|
172
|
+
GROUP BY workflow_id
|
|
173
|
+
ORDER BY created_at DESC
|
|
174
|
+
LIMIT ?`,
|
|
175
|
+
)
|
|
176
|
+
.all(limit) as Array<{
|
|
177
|
+
workflow_id: string;
|
|
178
|
+
total_steps: number;
|
|
179
|
+
passed_steps: number;
|
|
180
|
+
created_at: string;
|
|
181
|
+
}>;
|
|
182
|
+
|
|
183
|
+
return rows.map((row) => ({
|
|
184
|
+
workflowId: row.workflow_id,
|
|
185
|
+
totalSteps: row.total_steps,
|
|
186
|
+
passedSteps: row.passed_steps,
|
|
187
|
+
successRate: row.total_steps > 0 ? row.passed_steps / row.total_steps : 0,
|
|
188
|
+
createdAt: row.created_at,
|
|
189
|
+
}));
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Creates a candidate prompt version for A/B testing.
|
|
194
|
+
* Stored in prompt_versions table with status 'candidate'.
|
|
195
|
+
*/
|
|
196
|
+
export function createCandidate(
|
|
197
|
+
mainaDir: string,
|
|
198
|
+
task: string,
|
|
199
|
+
content: string,
|
|
200
|
+
): CandidatePrompt {
|
|
201
|
+
const hash = hashContent(content);
|
|
202
|
+
const handle = ensurePromptVersionsTable(mainaDir);
|
|
203
|
+
|
|
204
|
+
if (handle) {
|
|
205
|
+
const { db } = handle;
|
|
206
|
+
const id = `${task}-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
207
|
+
|
|
208
|
+
// Get next version number for this task
|
|
209
|
+
const maxRow = db
|
|
210
|
+
.query(
|
|
211
|
+
`SELECT MAX(version) as max_ver FROM prompt_versions WHERE task = ?`,
|
|
212
|
+
)
|
|
213
|
+
.get(task) as { max_ver: number | null } | null;
|
|
214
|
+
const nextVersion = (maxRow?.max_ver ?? 0) + 1;
|
|
215
|
+
|
|
216
|
+
db.prepare(
|
|
217
|
+
`INSERT INTO prompt_versions (id, task, hash, content, version, accept_rate, usage_count, created_at)
|
|
218
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
219
|
+
).run(
|
|
220
|
+
id,
|
|
221
|
+
task,
|
|
222
|
+
hash,
|
|
223
|
+
content,
|
|
224
|
+
nextVersion,
|
|
225
|
+
null,
|
|
226
|
+
0,
|
|
227
|
+
new Date().toISOString(),
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return { task, content, hash, status: "candidate" };
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* A/B test: returns 'active' (80%) or 'candidate' (20%).
|
|
236
|
+
* If no candidate exists for the task, always returns 'active'.
|
|
237
|
+
*/
|
|
238
|
+
export function abTest(mainaDir: string, task: string): AbTestResult {
|
|
239
|
+
const handle = ensurePromptVersionsTable(mainaDir);
|
|
240
|
+
if (!handle) {
|
|
241
|
+
return { variant: "active" };
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const { db } = handle;
|
|
245
|
+
|
|
246
|
+
// Find the most recent candidate for this task
|
|
247
|
+
const candidate = db
|
|
248
|
+
.query(
|
|
249
|
+
`SELECT hash, content FROM prompt_versions
|
|
250
|
+
WHERE task = ? ORDER BY version DESC LIMIT 1`,
|
|
251
|
+
)
|
|
252
|
+
.get(task) as { hash: string; content: string } | null;
|
|
253
|
+
|
|
254
|
+
if (!candidate) {
|
|
255
|
+
return { variant: "active" };
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// 80/20 split
|
|
259
|
+
if (Math.random() < CANDIDATE_RATIO) {
|
|
260
|
+
return { variant: "candidate", hash: candidate.hash };
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return { variant: "active" };
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Promotes a candidate to active by removing it from the candidates table.
|
|
268
|
+
* The promoted content should be written to .maina/prompts/<task>.md by the caller.
|
|
269
|
+
*/
|
|
270
|
+
export function promote(mainaDir: string, hash: string): boolean {
|
|
271
|
+
const handle = ensurePromptVersionsTable(mainaDir);
|
|
272
|
+
if (!handle) return false;
|
|
273
|
+
|
|
274
|
+
const { db } = handle;
|
|
275
|
+
const result = db
|
|
276
|
+
.prepare(`DELETE FROM prompt_versions WHERE hash = ?`)
|
|
277
|
+
.run(hash);
|
|
278
|
+
return result.changes > 0;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Retires a candidate without promoting it.
|
|
283
|
+
* Removes it from the candidates table.
|
|
284
|
+
*/
|
|
285
|
+
export function retire(mainaDir: string, hash: string): boolean {
|
|
286
|
+
const handle = ensurePromptVersionsTable(mainaDir);
|
|
287
|
+
if (!handle) return false;
|
|
288
|
+
|
|
289
|
+
const { db } = handle;
|
|
290
|
+
const result = db
|
|
291
|
+
.prepare(`DELETE FROM prompt_versions WHERE hash = ?`)
|
|
292
|
+
.run(hash);
|
|
293
|
+
return result.changes > 0;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Check all active A/B tests and promote/retire based on performance.
|
|
298
|
+
* Returns a list of actions taken.
|
|
299
|
+
*
|
|
300
|
+
* Logic:
|
|
301
|
+
* - For each task with a candidate in prompt_versions, query feedback
|
|
302
|
+
* - If candidate has < MIN_AB_SAMPLES samples: continue
|
|
303
|
+
* - If candidate accept rate > incumbent + AB_MARGIN: promote
|
|
304
|
+
* - If candidate accept rate < incumbent - AB_MARGIN: retire
|
|
305
|
+
* - Otherwise: continue (within margin)
|
|
306
|
+
*/
|
|
307
|
+
export function resolveABTests(mainaDir: string): ABResolution[] {
|
|
308
|
+
const handle = ensurePromptVersionsTable(mainaDir);
|
|
309
|
+
if (!handle) return [];
|
|
310
|
+
|
|
311
|
+
const { db } = handle;
|
|
312
|
+
|
|
313
|
+
// Find all candidates (one per task, most recent)
|
|
314
|
+
const candidates = db
|
|
315
|
+
.query(
|
|
316
|
+
`SELECT task, hash FROM prompt_versions
|
|
317
|
+
ORDER BY version DESC`,
|
|
318
|
+
)
|
|
319
|
+
.all() as Array<{ task: string; hash: string }>;
|
|
320
|
+
|
|
321
|
+
if (candidates.length === 0) return [];
|
|
322
|
+
|
|
323
|
+
// Deduplicate: keep only the most recent candidate per task
|
|
324
|
+
const taskMap = new Map<string, string>();
|
|
325
|
+
for (const c of candidates) {
|
|
326
|
+
if (!taskMap.has(c.task)) {
|
|
327
|
+
taskMap.set(c.task, c.hash);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const resolutions: ABResolution[] = [];
|
|
332
|
+
|
|
333
|
+
for (const [task, candidateHash] of taskMap) {
|
|
334
|
+
// Get candidate feedback stats
|
|
335
|
+
const candidateRow = db
|
|
336
|
+
.query(
|
|
337
|
+
`SELECT COUNT(*) as total,
|
|
338
|
+
SUM(CASE WHEN accepted = 1 THEN 1 ELSE 0 END) as accepted_count
|
|
339
|
+
FROM feedback WHERE command = ? AND prompt_hash = ?`,
|
|
340
|
+
)
|
|
341
|
+
.get(task, candidateHash) as {
|
|
342
|
+
total: number;
|
|
343
|
+
accepted_count: number;
|
|
344
|
+
} | null;
|
|
345
|
+
|
|
346
|
+
const candidateTotal = candidateRow?.total ?? 0;
|
|
347
|
+
const candidateAccepted = candidateRow?.accepted_count ?? 0;
|
|
348
|
+
|
|
349
|
+
if (candidateTotal < MIN_AB_SAMPLES) {
|
|
350
|
+
resolutions.push({
|
|
351
|
+
task,
|
|
352
|
+
action: "continuing",
|
|
353
|
+
reason: `Insufficient samples (${candidateTotal}/${MIN_AB_SAMPLES})`,
|
|
354
|
+
candidateAcceptRate:
|
|
355
|
+
candidateTotal > 0 ? candidateAccepted / candidateTotal : 0,
|
|
356
|
+
});
|
|
357
|
+
continue;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
const candidateAcceptRate = candidateAccepted / candidateTotal;
|
|
361
|
+
|
|
362
|
+
// Get recent incumbent feedback (last 100 samples, excluding candidate)
|
|
363
|
+
const incumbentRows = db
|
|
364
|
+
.prepare(
|
|
365
|
+
`SELECT accepted FROM feedback WHERE command = ? AND prompt_hash != ? ORDER BY created_at DESC LIMIT 100`,
|
|
366
|
+
)
|
|
367
|
+
.all(task, candidateHash) as Array<{ accepted: number }>;
|
|
368
|
+
|
|
369
|
+
const incumbentTotal = incumbentRows.length;
|
|
370
|
+
const incumbentAccepted = incumbentRows.filter(
|
|
371
|
+
(r) => r.accepted === 1,
|
|
372
|
+
).length;
|
|
373
|
+
const incumbentAcceptRate =
|
|
374
|
+
incumbentTotal > 0 ? incumbentAccepted / incumbentTotal : 0.5;
|
|
375
|
+
|
|
376
|
+
if (candidateAcceptRate > incumbentAcceptRate + AB_MARGIN) {
|
|
377
|
+
// Candidate outperforms — promote
|
|
378
|
+
promote(mainaDir, candidateHash);
|
|
379
|
+
resolutions.push({
|
|
380
|
+
task,
|
|
381
|
+
action: "promoted",
|
|
382
|
+
reason: `Candidate (${(candidateAcceptRate * 100).toFixed(1)}%) outperforms incumbent (${(incumbentAcceptRate * 100).toFixed(1)}%) by >${(AB_MARGIN * 100).toFixed(0)}%`,
|
|
383
|
+
candidateAcceptRate,
|
|
384
|
+
incumbentAcceptRate,
|
|
385
|
+
});
|
|
386
|
+
} else if (candidateAcceptRate < incumbentAcceptRate - AB_MARGIN) {
|
|
387
|
+
// Candidate underperforms — retire
|
|
388
|
+
retire(mainaDir, candidateHash);
|
|
389
|
+
resolutions.push({
|
|
390
|
+
task,
|
|
391
|
+
action: "retired",
|
|
392
|
+
reason: `Candidate (${(candidateAcceptRate * 100).toFixed(1)}%) underperforms incumbent (${(incumbentAcceptRate * 100).toFixed(1)}%) by >${(AB_MARGIN * 100).toFixed(0)}%`,
|
|
393
|
+
candidateAcceptRate,
|
|
394
|
+
incumbentAcceptRate,
|
|
395
|
+
});
|
|
396
|
+
} else {
|
|
397
|
+
// Within margin — continue
|
|
398
|
+
resolutions.push({
|
|
399
|
+
task,
|
|
400
|
+
action: "continuing",
|
|
401
|
+
reason: `Within margin: candidate ${(candidateAcceptRate * 100).toFixed(1)}% vs incumbent ${(incumbentAcceptRate * 100).toFixed(1)}%`,
|
|
402
|
+
candidateAcceptRate,
|
|
403
|
+
incumbentAcceptRate,
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
return resolutions;
|
|
409
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Reads .maina/constitution.md, returns empty string if not found.
|
|
5
|
+
* Never throws.
|
|
6
|
+
*/
|
|
7
|
+
export async function loadConstitution(mainaDir: string): Promise<string> {
|
|
8
|
+
try {
|
|
9
|
+
const filePath = join(mainaDir, "constitution.md");
|
|
10
|
+
const file = Bun.file(filePath);
|
|
11
|
+
const exists = await file.exists();
|
|
12
|
+
if (!exists) {
|
|
13
|
+
return "";
|
|
14
|
+
}
|
|
15
|
+
return await file.text();
|
|
16
|
+
} catch {
|
|
17
|
+
return "";
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Reads .maina/prompts/<task>.md, returns null if not found.
|
|
23
|
+
* Never throws.
|
|
24
|
+
*/
|
|
25
|
+
export async function loadUserOverride(
|
|
26
|
+
mainaDir: string,
|
|
27
|
+
task: string,
|
|
28
|
+
): Promise<string | null> {
|
|
29
|
+
try {
|
|
30
|
+
const filePath = join(mainaDir, "prompts", `${task}.md`);
|
|
31
|
+
const file = Bun.file(filePath);
|
|
32
|
+
const exists = await file.exists();
|
|
33
|
+
if (!exists) {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
return await file.text();
|
|
37
|
+
} catch {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Merges default prompt with user override.
|
|
44
|
+
* If userOverride is null, returns default.
|
|
45
|
+
* If userOverride exists, it REPLACES the default entirely (user has full control).
|
|
46
|
+
*/
|
|
47
|
+
export function mergePrompts(
|
|
48
|
+
defaultPrompt: string,
|
|
49
|
+
userOverride: string | null,
|
|
50
|
+
): string {
|
|
51
|
+
if (userOverride === null) {
|
|
52
|
+
return defaultPrompt;
|
|
53
|
+
}
|
|
54
|
+
return userOverride;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Replaces all {{variableName}} placeholders with values from the variables object.
|
|
59
|
+
* Unreplaced variables are left as-is.
|
|
60
|
+
*/
|
|
61
|
+
export function renderTemplate(
|
|
62
|
+
template: string,
|
|
63
|
+
variables: Record<string, string>,
|
|
64
|
+
): string {
|
|
65
|
+
return template.replace(/\{\{(\w+)\}\}/g, (match, key: string) => {
|
|
66
|
+
if (Object.hasOwn(variables, key)) {
|
|
67
|
+
return variables[key] ?? match;
|
|
68
|
+
}
|
|
69
|
+
return match;
|
|
70
|
+
});
|
|
71
|
+
}
|