wiggum-cli 0.17.3 → 0.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -12
- package/dist/agent/orchestrator.d.ts +21 -3
- package/dist/agent/orchestrator.js +394 -202
- package/dist/agent/resolve-config.js +1 -1
- package/dist/agent/scheduler.d.ts +29 -0
- package/dist/agent/scheduler.js +1149 -0
- package/dist/agent/tools/backlog.d.ts +6 -0
- package/dist/agent/tools/backlog.js +16 -1
- package/dist/agent/types.d.ts +113 -0
- package/dist/ai/conversation/url-fetcher.js +46 -13
- package/dist/ai/enhancer.js +1 -2
- package/dist/ai/providers.js +4 -4
- package/dist/commands/agent.d.ts +1 -0
- package/dist/commands/agent.js +53 -1
- package/dist/commands/config.js +8 -8
- package/dist/commands/sync.js +2 -2
- package/dist/index.js +4 -2
- package/dist/templates/scripts/feature-loop.sh.tmpl +73 -18
- package/dist/tui/app.js +10 -1
- package/dist/tui/components/HeaderContent.d.ts +4 -1
- package/dist/tui/components/HeaderContent.js +4 -2
- package/dist/tui/hooks/useAgentOrchestrator.d.ts +2 -1
- package/dist/tui/hooks/useAgentOrchestrator.js +83 -39
- package/dist/tui/screens/AgentScreen.js +3 -1
- package/dist/tui/utils/polishGoal.js +14 -1
- package/dist/utils/env.js +7 -1
- package/dist/utils/github.d.ts +13 -0
- package/dist/utils/github.js +63 -4
- package/dist/utils/logger.js +1 -1
- package/package.json +9 -7
- package/src/templates/scripts/feature-loop.sh.tmpl +73 -18
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { join } from 'node:path';
|
|
1
2
|
import { MemoryStore } from './memory/store.js';
|
|
2
3
|
import { ingestStrategicDocs } from './memory/ingest.js';
|
|
3
4
|
import { createBacklogTools } from './tools/backlog.js';
|
|
@@ -5,133 +6,94 @@ import { createMemoryTools, REFLECT_TOOL_NAME } from './tools/memory.js';
|
|
|
5
6
|
import { createExecutionTools } from './tools/execution.js';
|
|
6
7
|
import { createReportingTools } from './tools/reporting.js';
|
|
7
8
|
import { createIntrospectionTools } from './tools/introspection.js';
|
|
8
|
-
import { createDryRunExecutionTools,
|
|
9
|
+
import { createDryRunExecutionTools, createDryRunFeatureStateTools, createDryRunReportingTools } from './tools/dry-run.js';
|
|
9
10
|
import { createFeatureStateTools } from './tools/feature-state.js';
|
|
10
|
-
import {
|
|
11
|
+
import { buildRankedBacklog, createSchedulerRunCache, invalidateSchedulerRunCache, toIssueStates } from './scheduler.js';
|
|
11
12
|
import { logger } from '../utils/logger.js';
|
|
12
13
|
import { getTracedAI } from '../utils/tracing.js';
|
|
13
|
-
export const AGENT_SYSTEM_PROMPT = `You are wiggum's autonomous development
|
|
14
|
-
|
|
15
|
-
## Workflow
|
|
16
|
-
|
|
17
|
-
1. Read memory to recall previous work and context
|
|
18
|
-
- Use listStrategicDocs to see available project documentation
|
|
19
|
-
- Use readStrategicDoc to read full documents relevant to the current task (architecture, design, implementation plans)
|
|
20
|
-
2. List open issues and cross-reference with memory
|
|
21
|
-
- Consider: PM priority labels (P0 > P1 > P2), dependencies, strategic context
|
|
22
|
-
- **Housekeeping:** If memory says an issue was already completed (outcome "success" or "skipped") but it's still open:
|
|
23
|
-
1. Call assessFeatureState with the featureName and issueNumber
|
|
24
|
-
2. If recommendation is "pr_merged" or "linked_pr_merged": close it with closeIssue. Reflect with outcome "skipped". Does NOT count against maxItems.
|
|
25
|
-
3. If recommendation is anything else (e.g., "resume_implementation", "start_fresh", "resume_pr_phase"): the issue was NOT actually shipped. Do NOT close it. Instead, prioritize it as your next work item and follow the Feature State Decision Tree. This counts against maxItems.
|
|
26
|
-
- **Retry:** If memory records a previous attempt at an issue with outcome "failure" or "partial", and it's still open, prioritize it over new issues. Bugs that caused the failure may have been fixed, and existing work (branch, spec, plan) should not be abandoned. Call assessFeatureState to determine the right action — usually resume_implementation. This counts against maxItems.
|
|
27
|
-
3. For the chosen issue (one NOT already completed):
|
|
28
|
-
a. Read the full issue details
|
|
29
|
-
b. Derive a featureName from the issue title (lowercase, hyphens, no spaces)
|
|
30
|
-
c. **Assess feature state** using assessFeatureState — MANDATORY before any action
|
|
31
|
-
d. Follow the Feature State Decision Tree based on the recommendation field
|
|
32
|
-
e. Monitor progress with checkLoopStatus and readLoopLog
|
|
33
|
-
f. Report results by commenting on the issue
|
|
34
|
-
|
|
35
|
-
## Feature State Decision Tree
|
|
36
|
-
|
|
37
|
-
After calling assessFeatureState, follow the recommendation:
|
|
38
|
-
|
|
39
|
-
| recommendation | action |
|
|
40
|
-
|---|---|
|
|
41
|
-
| start_fresh | generateSpec → runLoop (fresh) |
|
|
42
|
-
| generate_plan | runLoop without resume (spec exists, needs planning) |
|
|
43
|
-
| resume_implementation | runLoop with resume: true (plan has pending tasks) |
|
|
44
|
-
| resume_pr_phase | runLoop with resume: true (all tasks done, needs PR) |
|
|
45
|
-
| pr_exists_open | Comment on issue, do NOT re-run loop |
|
|
46
|
-
| pr_merged | Verify PR is merged, close issue with closeIssue, reflect with outcome "skipped", move on |
|
|
47
|
-
| pr_closed | Decide: restart from scratch or skip |
|
|
48
|
-
| linked_pr_merged | Verify the linked PR is merged, close issue with closeIssue (comment "shipped via PR #N"), reflect with outcome "skipped", move on |
|
|
49
|
-
| linked_pr_open | Work in progress under a different branch — comment "in progress via PR #N", do NOT re-run loop |
|
|
50
|
-
|
|
51
|
-
**Critical:**
|
|
52
|
-
- When recommendation is resume_implementation or resume_pr_phase, you MUST pass resume: true to runLoop
|
|
53
|
-
- When recommendation is generate_plan, do NOT pass resume (fresh branch needed)
|
|
54
|
-
- When recommendation is start_fresh, generate a spec first, then run the loop without resume
|
|
55
|
-
- ALWAYS pass issueNumber to assessFeatureState so it can detect work shipped under a different branch name
|
|
56
|
-
- Derive short, stable feature names (2-4 words, kebab-case) from the issue title — e.g. "config-module" not "config-module-toml-read-write-with-secret-masking"
|
|
57
|
-
4. After the loop completes (successfully or with failure) — MANDATORY for EVERY issue, including subsequent ones:
|
|
58
|
-
a. Call readLoopLog to get the actual log content
|
|
59
|
-
b. Call assessFeatureState to check the actual state — do NOT rely solely on loop log output
|
|
60
|
-
c. **Blocker detection (MANDATORY):** Scan the log for pre-existing test failures (lines like "All N test failure(s) are pre-existing"). If found:
|
|
61
|
-
1. Call listIssues with labels ["bug"] to check for existing bug issues covering these failures
|
|
62
|
-
2. If no existing issue covers them, you MUST call createIssue with title "Fix N pre-existing test failures", body listing the failing files, and labels ["bug"]. If a "P0" label exists on the repo you may add it; if not, just use ["bug"].
|
|
63
|
-
3. Do NOT skip this step just because the loop succeeded — pre-existing failures degrade CI and must be tracked
|
|
64
|
-
d. Only close the issue if assessFeatureState confirms a PR was merged (recommendation: "pr_merged" or "linked_pr_merged")
|
|
65
|
-
e. When closing: check off acceptance criteria with checkAllBoxes, then close with closeIssue
|
|
66
|
-
f. If the loop produced code but no PR was created/merged, run the loop again with resume: true to trigger the PR phase
|
|
67
|
-
g. If the loop failed and code exists on the branch without a PR, this is incomplete work — do NOT close the issue
|
|
68
|
-
h. Steps 4–6 are MANDATORY after every runLoop — including the 2nd, 3rd, etc. issue. Do NOT summarize or stop after runLoop returns. The next tool call must be readLoopLog.
|
|
69
|
-
5. Reflect on the outcome:
|
|
70
|
-
- Call reflectOnWork with structured observations
|
|
71
|
-
- Use outcome "skipped" for issues that were already complete (no real work done) — these do NOT count against maxItems
|
|
72
|
-
- Use outcome "success"/"partial"/"failure" for issues where real work was performed
|
|
73
|
-
- Note what worked, what failed, any patterns discovered
|
|
74
|
-
6. Continue to next issue — MANDATORY tool call sequence:
|
|
75
|
-
a. Call listIssues (with NO label filter) to get the full backlog
|
|
76
|
-
b. Cross-reference with memory to avoid re-doing completed work
|
|
77
|
-
c. If actionable issues remain and no stop condition is met, immediately call assessFeatureState for the next priority issue — do NOT generate text
|
|
78
|
-
d. When assessFeatureState returns, follow the Feature State Decision Tree (step 3d) for that issue — e.g. start_fresh → generateSpec → runLoop. This begins a full new work cycle (steps 3–6). Do NOT stop after assessFeatureState.
|
|
79
|
-
e. Only produce a text-only response (final summary) when the backlog is empty or a stop condition is met
|
|
80
|
-
f. ANY text without a tool call terminates the session — there is no "ask for permission" step
|
|
81
|
-
|
|
82
|
-
## Model forwarding
|
|
83
|
-
|
|
84
|
-
When calling generateSpec, ALWAYS forward the model and provider so the spec generation uses the same AI model as this agent session. The values are provided in the Runtime Config section below.
|
|
14
|
+
export const AGENT_SYSTEM_PROMPT = `You are wiggum's per-issue autonomous development worker.
|
|
85
15
|
|
|
86
|
-
|
|
16
|
+
You are given exactly one backlog issue that has already been selected by a higher-level orchestrator. Your job is to ship that issue or perform the required housekeeping for it. Do not select another issue.
|
|
87
17
|
|
|
88
|
-
|
|
89
|
-
- 'manual': stop at PR creation (default)
|
|
90
|
-
- 'auto': create PR + run automated review (no merge)
|
|
91
|
-
- 'merge': create PR + review + merge if approved
|
|
92
|
-
|
|
93
|
-
## Prioritization
|
|
94
|
-
|
|
95
|
-
Use hybrid reasoning: respect PM labels (P0 > P1 > P2) but apply your own judgment for ordering within the same priority tier.
|
|
96
|
-
|
|
97
|
-
**Ordering rules (in priority order):**
|
|
98
|
-
1. PM priority labels: P0 > P1 > P2 > unlabeled
|
|
99
|
-
2. Explicit dependencies: if readIssue returns a \`dependsOn\` array (parsed from "depends on #N" / "blocked by #N" in the issue body), complete those issues first
|
|
100
|
-
3. Lower-numbered issues first: within the same priority tier, prefer lower issue numbers — they are typically more foundational (scaffolding, setup, core infrastructure)
|
|
101
|
-
4. Prefer issues with existing branches: if assessFeatureState shows a branch exists with commits ahead, prefer that issue over one without a branch — existing branches diverge further from main with every merge, increasing conflict risk
|
|
102
|
-
5. Strategic context from memory and what you learned from previous iterations
|
|
103
|
-
|
|
104
|
-
## When to stop
|
|
105
|
-
|
|
106
|
-
Stop the loop when:
|
|
107
|
-
- Backlog has no more actionable open issues
|
|
108
|
-
- You've completed the maximum number of items (if configured)
|
|
109
|
-
- A critical failure requires human attention
|
|
110
|
-
- The user has signaled to stop
|
|
111
|
-
|
|
112
|
-
IMPORTANT: Generating text without tool calls terminates the session immediately. After completing an issue, you MUST call listIssues (step 6) — never ask "should I continue?" or summarize before checking. After listIssues returns, scan the results for issues matching your constraints (if any). If actionable issues remain, immediately call assessFeatureState — do NOT generate a summary. After assessFeatureState returns for the next issue, you MUST follow the Feature State Decision Tree and call the next tool (e.g. generateSpec for start_fresh). Stopping after assessFeatureState is a bug — the result tells you what to do next. After runLoop returns, you MUST execute steps 4–6 (readLoopLog → assessFeatureState → close/comment → reflectOnWork → listIssues). Stopping after runLoop is a bug — there is always post-loop work to do. Your only text-only response is the final summary when ALL constrained issues are processed or a stop condition is met. If you were given specific issue numbers to work on, you MUST process ALL of them before stopping.
|
|
113
|
-
|
|
114
|
-
## Learning
|
|
115
|
-
|
|
116
|
-
After each issue, always call reflectOnWork. Your memory entries make you progressively better at this specific codebase. Be specific and narrative in what you record. Focus on: what patterns work here, what gotchas exist, which approaches produce better specs and fewer loop iterations.
|
|
117
|
-
|
|
118
|
-
## Error recovery
|
|
18
|
+
## Workflow
|
|
119
19
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
20
|
+
1. Read memory and strategic docs to recover relevant context.
|
|
21
|
+
2. Read the selected issue in full.
|
|
22
|
+
3. Derive a short kebab-case feature name from the issue title.
|
|
23
|
+
4. Call assessFeatureState before taking any action.
|
|
24
|
+
5. Follow the feature-state decision tree:
|
|
25
|
+
- start_fresh -> generateSpec -> runLoop
|
|
26
|
+
- generate_plan -> runLoop without resume
|
|
27
|
+
- resume_implementation -> runLoop with resume: true
|
|
28
|
+
- resume_pr_phase -> runLoop with resume: true
|
|
29
|
+
- pr_closed -> comment about the closed PR, then re-triage:
|
|
30
|
+
- if branch commits or a plan already exist, runLoop with resume: true
|
|
31
|
+
- otherwise restart with generateSpec -> runLoop without resume
|
|
32
|
+
- pr_exists_open / linked_pr_open -> comment and stop
|
|
33
|
+
- pr_merged / linked_pr_merged -> check boxes, close issue, reflect with outcome "skipped", stop
|
|
34
|
+
6. After every runLoop:
|
|
35
|
+
- readLoopLog
|
|
36
|
+
- assessFeatureState again
|
|
37
|
+
- create blocker issues for pre-existing/systemic failures when needed
|
|
38
|
+
- only close the issue if work is merged
|
|
39
|
+
7. Always call reflectOnWork before stopping.
|
|
131
40
|
|
|
132
|
-
##
|
|
41
|
+
## Important rules
|
|
133
42
|
|
|
134
|
-
|
|
43
|
+
- You must stay within the selected issue.
|
|
44
|
+
- You must pass issueNumber to assessFeatureState.
|
|
45
|
+
- You must pass resume: true for resume_implementation and resume_pr_phase.
|
|
46
|
+
- You must not force pr_closed into resume mode when there is no branch or plan state to resume.
|
|
47
|
+
- You must forward Runtime Config values using the tool schemas:
|
|
48
|
+
- pass model and provider to generateSpec when they are set
|
|
49
|
+
- pass reviewMode to runLoop when it is set
|
|
50
|
+
- You must not close an issue unless assessFeatureState confirms merged work.
|
|
51
|
+
- If a loop fails, quote or summarize readLoopLog evidence in your issue comment. Do not guess.
|
|
52
|
+
- You may use listIssues(labels: ["bug"]) only for blocker detection and duplicate checking.
|
|
53
|
+
- Your only text response is a brief final summary after the selected issue is fully handled.`;
|
|
54
|
+
function shouldCountTowardCompletedBudget(issue, outcome) {
|
|
55
|
+
return outcome !== 'skipped'
|
|
56
|
+
&& outcome !== 'unknown'
|
|
57
|
+
&& issue.scopeOrigin !== 'dependency'
|
|
58
|
+
&& issue.actionability !== 'waiting_pr';
|
|
59
|
+
}
|
|
60
|
+
function isRecoverableListingError(error) {
|
|
61
|
+
return error.startsWith('GitHub issue listing failed:');
|
|
62
|
+
}
|
|
63
|
+
const MAX_STALLED_CONTINUATION_SELECTIONS = 4;
|
|
64
|
+
function needsFollowUpAfterSuccess(candidate) {
|
|
65
|
+
return candidate.recommendation === 'resume_pr_phase'
|
|
66
|
+
|| candidate.recommendation === 'pr_closed'
|
|
67
|
+
|| candidate.recommendation === 'pr_merged'
|
|
68
|
+
|| candidate.recommendation === 'linked_pr_merged';
|
|
69
|
+
}
|
|
70
|
+
function canResumeWithinRun(candidate, prior) {
|
|
71
|
+
if (!prior)
|
|
72
|
+
return true;
|
|
73
|
+
if (prior.outcome === 'success') {
|
|
74
|
+
return needsFollowUpAfterSuccess(candidate);
|
|
75
|
+
}
|
|
76
|
+
if (prior.outcome !== 'partial' && prior.outcome !== 'failure')
|
|
77
|
+
return false;
|
|
78
|
+
return candidate.recommendation === 'resume_implementation'
|
|
79
|
+
|| candidate.recommendation === 'resume_pr_phase'
|
|
80
|
+
|| candidate.recommendation === 'pr_closed';
|
|
81
|
+
}
|
|
82
|
+
function isAbortError(err, abortSignal) {
|
|
83
|
+
if (abortSignal?.aborted)
|
|
84
|
+
return true;
|
|
85
|
+
if (err instanceof Error) {
|
|
86
|
+
return err.name === 'AbortError' || err.message === 'Aborted';
|
|
87
|
+
}
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
function hasExceededWithinRunContinuationLimit(candidate, prior) {
|
|
91
|
+
if (!prior)
|
|
92
|
+
return false;
|
|
93
|
+
if (!canResumeWithinRun(candidate, prior))
|
|
94
|
+
return false;
|
|
95
|
+
return prior.selections >= MAX_STALLED_CONTINUATION_SELECTIONS;
|
|
96
|
+
}
|
|
135
97
|
export function buildRuntimeConfig(config) {
|
|
136
98
|
const lines = [];
|
|
137
99
|
if (config.modelId)
|
|
@@ -147,40 +109,65 @@ export function buildRuntimeConfig(config) {
|
|
|
147
109
|
export function buildConstraints(config) {
|
|
148
110
|
const lines = [];
|
|
149
111
|
if (config.maxItems != null) {
|
|
150
|
-
lines.push(`-
|
|
112
|
+
lines.push(`- Stop after completing ${config.maxItems} issue(s).`);
|
|
151
113
|
}
|
|
152
114
|
if (config.labels?.length) {
|
|
153
|
-
lines.push(`-
|
|
115
|
+
lines.push(`- Initial backlog scope is limited to labels: ${config.labels.join(', ')}.`);
|
|
154
116
|
}
|
|
155
117
|
if (config.issues?.length) {
|
|
156
|
-
lines.push(`-
|
|
118
|
+
lines.push(`- Initial backlog scope is limited to issues: ${config.issues.map(n => `#${n}`).join(', ')}.`);
|
|
157
119
|
}
|
|
158
120
|
if (config.dryRun) {
|
|
159
|
-
lines.push('- DRY RUN MODE:
|
|
121
|
+
lines.push('- DRY RUN MODE: execution and reporting tools are simulated.');
|
|
160
122
|
}
|
|
161
123
|
return lines.length > 0
|
|
162
124
|
? `\n\n## Constraints\n\n${lines.join('\n')}`
|
|
163
125
|
: '';
|
|
164
126
|
}
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
127
|
+
function mapToolResults(toolResults) {
|
|
128
|
+
return toolResults.map((tr) => ({ toolName: tr.toolName, result: tr.output }));
|
|
129
|
+
}
|
|
130
|
+
function createWorkerStepHandler(config, tracker) {
|
|
131
|
+
return async ({ toolCalls, toolResults }) => {
|
|
132
|
+
try {
|
|
133
|
+
for (const tc of toolCalls) {
|
|
134
|
+
if (tc.toolName === REFLECT_TOOL_NAME && toolResults.some(tr => tr.toolName === REFLECT_TOOL_NAME)) {
|
|
135
|
+
const input = tc.input;
|
|
136
|
+
tracker.outcome = input.outcome ?? 'unknown';
|
|
137
|
+
tracker.reflected = true;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const stepEvent = {
|
|
141
|
+
toolCalls: toolCalls.map((tc) => ({ toolName: tc.toolName, args: tc.input })),
|
|
142
|
+
toolResults: mapToolResults(toolResults),
|
|
143
|
+
completedItems: tracker.outcome !== 'unknown' ? 1 : 0,
|
|
144
|
+
};
|
|
145
|
+
config.onStepUpdate?.(stepEvent);
|
|
146
|
+
}
|
|
147
|
+
catch (err) {
|
|
148
|
+
logger.warn(`worker onStepFinish failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
149
|
+
}
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
function createWorkerAgent(config, store) {
|
|
153
|
+
const backlog = createBacklogTools(config.owner, config.repo, {
|
|
170
154
|
defaultLabels: config.labels,
|
|
171
155
|
issueNumbers: config.issues,
|
|
156
|
+
scopeListIssuesToIssueNumbers: true,
|
|
157
|
+
scopeReadIssueToIssueNumbers: true,
|
|
158
|
+
allowGlobalBugDuplicateChecks: true,
|
|
172
159
|
});
|
|
173
|
-
const memory = createMemoryTools(store, projectRoot);
|
|
160
|
+
const memory = createMemoryTools(store, config.projectRoot);
|
|
174
161
|
const execution = config.dryRun
|
|
175
162
|
? createDryRunExecutionTools()
|
|
176
|
-
: createExecutionTools(projectRoot, { onProgress: config.onProgress });
|
|
163
|
+
: createExecutionTools(config.projectRoot, { onProgress: config.onProgress });
|
|
177
164
|
const reporting = config.dryRun
|
|
178
165
|
? createDryRunReportingTools()
|
|
179
|
-
: createReportingTools(owner, repo);
|
|
180
|
-
const introspection = createIntrospectionTools(projectRoot);
|
|
166
|
+
: createReportingTools(config.owner, config.repo);
|
|
167
|
+
const introspection = createIntrospectionTools(config.projectRoot);
|
|
181
168
|
const featureState = config.dryRun
|
|
182
169
|
? createDryRunFeatureStateTools()
|
|
183
|
-
: createFeatureStateTools(projectRoot);
|
|
170
|
+
: createFeatureStateTools(config.projectRoot);
|
|
184
171
|
const tools = {
|
|
185
172
|
...backlog,
|
|
186
173
|
...memory,
|
|
@@ -189,99 +176,304 @@ export function createAgentOrchestrator(config) {
|
|
|
189
176
|
...introspection,
|
|
190
177
|
...featureState,
|
|
191
178
|
};
|
|
192
|
-
const
|
|
193
|
-
const
|
|
194
|
-
const constraints = buildConstraints(constraintConfig);
|
|
195
|
-
const runtimeConfig = buildRuntimeConfig(config);
|
|
196
|
-
const fullPrompt = AGENT_SYSTEM_PROMPT + runtimeConfig + constraints;
|
|
197
|
-
const completedIssues = new Set();
|
|
198
|
-
const issueNumberSet = config.issues?.length ? new Set(config.issues) : undefined;
|
|
199
|
-
const maxSteps = config.maxSteps ?? 200;
|
|
200
|
-
// Use traced ToolLoopAgent so Braintrust automatically captures
|
|
201
|
-
// all LLM calls, tool executions, and agent steps.
|
|
179
|
+
const fullPrompt = AGENT_SYSTEM_PROMPT + buildRuntimeConfig(config) + buildConstraints(config);
|
|
180
|
+
const tracker = { outcome: 'unknown', reflected: false };
|
|
202
181
|
const { ToolLoopAgent: TracedToolLoopAgent } = getTracedAI();
|
|
203
|
-
|
|
204
|
-
model,
|
|
182
|
+
const agent = new TracedToolLoopAgent({
|
|
183
|
+
model: config.model,
|
|
205
184
|
instructions: fullPrompt,
|
|
206
185
|
tools,
|
|
207
186
|
experimental_telemetry: {
|
|
208
187
|
isEnabled: true,
|
|
209
|
-
functionId: 'agent-
|
|
210
|
-
metadata: {
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
if (effectiveMaxItems != null && completedIssues.size >= effectiveMaxItems)
|
|
216
|
-
return true;
|
|
217
|
-
return false;
|
|
188
|
+
functionId: 'agent-worker',
|
|
189
|
+
metadata: {
|
|
190
|
+
owner: config.owner,
|
|
191
|
+
repo: config.repo,
|
|
192
|
+
dryRun: String(config.dryRun ?? false),
|
|
193
|
+
},
|
|
218
194
|
},
|
|
195
|
+
stopWhen: ({ steps }) => steps.length >= (config.maxSteps ?? 200),
|
|
219
196
|
prepareStep: async ({ steps }) => {
|
|
220
197
|
try {
|
|
221
198
|
if (steps.length === 0) {
|
|
222
|
-
await ingestStrategicDocs(projectRoot, store);
|
|
199
|
+
await ingestStrategicDocs(config.projectRoot, store);
|
|
223
200
|
await store.prune();
|
|
224
201
|
}
|
|
225
202
|
const all = await store.read({ limit: 50 });
|
|
226
|
-
const
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
// Strategic docs are injected as lightweight catalog entries (filename + summary).
|
|
230
|
-
// The agent reads full content on-demand via readStrategicDoc tool.
|
|
231
|
-
const strategic = all.filter(e => e.type === 'strategic_context');
|
|
232
|
-
const memoryContext = [
|
|
233
|
-
...recentLogs.map(e => `[work] ${e.content}`),
|
|
234
|
-
...knowledge.map(e => `[knowledge] ${e.content}`),
|
|
235
|
-
...decisions.map(e => `[decision] ${e.content}`),
|
|
236
|
-
...strategic.map(e => `[strategic-doc] ${e.content}`),
|
|
237
|
-
].join('\n');
|
|
203
|
+
const memoryContext = all
|
|
204
|
+
.map((entry) => `[${entry.type}] ${entry.content}`)
|
|
205
|
+
.join('\n');
|
|
238
206
|
if (!memoryContext)
|
|
239
207
|
return undefined;
|
|
240
208
|
return {
|
|
241
|
-
system: [
|
|
242
|
-
fullPrompt,
|
|
243
|
-
`## Current Memory\n\n${memoryContext}`,
|
|
244
|
-
].join('\n\n'),
|
|
209
|
+
system: [fullPrompt, `## Current Memory\n\n${memoryContext}`].join('\n\n'),
|
|
245
210
|
};
|
|
246
211
|
}
|
|
247
212
|
catch (err) {
|
|
248
|
-
logger.warn(`prepareStep failed, continuing without memory: ${err instanceof Error ? err.message : String(err)}`);
|
|
213
|
+
logger.warn(`worker prepareStep failed, continuing without memory: ${err instanceof Error ? err.message : String(err)}`);
|
|
249
214
|
return undefined;
|
|
250
215
|
}
|
|
251
216
|
},
|
|
252
|
-
onStepFinish:
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
217
|
+
onStepFinish: createWorkerStepHandler(config, tracker),
|
|
218
|
+
});
|
|
219
|
+
return { agent, tools, tracker };
|
|
220
|
+
}
|
|
221
|
+
function formatSelectionReasons(issue) {
|
|
222
|
+
const reasons = issue.selectionReasons ?? [];
|
|
223
|
+
if (reasons.length === 0)
|
|
224
|
+
return 'No additional scheduler rationale.';
|
|
225
|
+
return reasons
|
|
226
|
+
.slice(0, 5)
|
|
227
|
+
.map((reason) => `- ${reason.message}`)
|
|
228
|
+
.join('\n');
|
|
229
|
+
}
|
|
230
|
+
function buildWorkerPrompt(issue) {
|
|
231
|
+
return `Selected issue:
|
|
232
|
+
|
|
233
|
+
#${issue.issueNumber}: ${issue.title}
|
|
234
|
+
Labels: ${issue.labels.join(', ') || 'none'}
|
|
235
|
+
Priority: ${issue.priorityTier ?? 'unlabeled'}
|
|
236
|
+
Actionability: ${issue.actionability ?? 'ready'}
|
|
237
|
+
Current recommendation: ${issue.recommendation ?? 'unknown'}
|
|
238
|
+
Feature name: ${issue.loopFeatureName ?? 'feature'}
|
|
239
|
+
|
|
240
|
+
Scheduler rationale:
|
|
241
|
+
${formatSelectionReasons(issue)}
|
|
242
|
+
|
|
243
|
+
Issue body:
|
|
244
|
+
${issue.body}
|
|
245
|
+
|
|
246
|
+
You must fully handle this selected issue and then stop.`;
|
|
247
|
+
}
|
|
248
|
+
function buildFinalSummary(processed, blocked) {
|
|
249
|
+
const lines = [`Processed ${processed.length} issue(s).`];
|
|
250
|
+
const byOutcome = new Map();
|
|
251
|
+
for (const item of processed) {
|
|
252
|
+
const issues = byOutcome.get(item.outcome) ?? [];
|
|
253
|
+
issues.push(item.issue.issueNumber);
|
|
254
|
+
byOutcome.set(item.outcome, issues);
|
|
255
|
+
}
|
|
256
|
+
const orderedOutcomes = [
|
|
257
|
+
{ outcome: 'success', label: 'Completed' },
|
|
258
|
+
{ outcome: 'partial', label: 'Partial' },
|
|
259
|
+
{ outcome: 'failure', label: 'Failed' },
|
|
260
|
+
{ outcome: 'skipped', label: 'Skipped' },
|
|
261
|
+
{ outcome: 'unknown', label: 'Unknown' },
|
|
262
|
+
];
|
|
263
|
+
for (const { outcome, label } of orderedOutcomes) {
|
|
264
|
+
const issues = byOutcome.get(outcome);
|
|
265
|
+
if (issues?.length) {
|
|
266
|
+
lines.push(`${label}: ${issues.map(issueNumber => `#${issueNumber}`).join(', ')}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
if (blocked.length > 0) {
|
|
270
|
+
lines.push(`Blocked: ${blocked.map(issue => `#${issue.issueNumber} (${issue.actionability})`).join(', ')}`);
|
|
271
|
+
}
|
|
272
|
+
return lines.join('\n');
|
|
273
|
+
}
|
|
274
|
+
async function* oneChunk(text) {
|
|
275
|
+
if (text) {
|
|
276
|
+
yield text;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
class StructuredAgentOrchestrator {
|
|
280
|
+
config;
|
|
281
|
+
version = 'agent-v1';
|
|
282
|
+
id = 'agent-orchestrator';
|
|
283
|
+
tools;
|
|
284
|
+
constructor(config) {
|
|
285
|
+
this.config = config;
|
|
286
|
+
const memoryDir = join(config.projectRoot, '.ralph', 'agent');
|
|
287
|
+
const store = new MemoryStore(memoryDir);
|
|
288
|
+
this.tools = createWorkerAgent(config, store).tools;
|
|
289
|
+
}
|
|
290
|
+
emit(event) {
|
|
291
|
+
this.config.onOrchestratorEvent?.(event);
|
|
292
|
+
}
|
|
293
|
+
async run(options) {
|
|
294
|
+
const memoryDir = join(this.config.projectRoot, '.ralph', 'agent');
|
|
295
|
+
const store = new MemoryStore(memoryDir);
|
|
296
|
+
await ingestStrategicDocs(this.config.projectRoot, store);
|
|
297
|
+
await store.prune();
|
|
298
|
+
const processed = [];
|
|
299
|
+
const attemptedThisRun = new Map();
|
|
300
|
+
const pendingContinuation = new Set();
|
|
301
|
+
const countedBudgetIssues = new Set();
|
|
302
|
+
let completedBudget = 0;
|
|
303
|
+
let blockedSnapshot = [];
|
|
304
|
+
const schedulerCache = createSchedulerRunCache();
|
|
305
|
+
while (true) {
|
|
306
|
+
if (options.abortSignal?.aborted) {
|
|
307
|
+
throw new Error('Aborted');
|
|
308
|
+
}
|
|
309
|
+
if (this.config.maxItems != null && completedBudget >= this.config.maxItems && pendingContinuation.size === 0) {
|
|
310
|
+
return buildFinalSummary(processed, blockedSnapshot);
|
|
311
|
+
}
|
|
312
|
+
const hadPendingContinuation = pendingContinuation.size > 0;
|
|
313
|
+
const ranked = await buildRankedBacklog(this.config, store, schedulerCache);
|
|
314
|
+
if (ranked.errors.length > 0 && hadPendingContinuation) {
|
|
315
|
+
throw new Error(ranked.errors[0]);
|
|
316
|
+
}
|
|
317
|
+
for (const issueNumber of [...pendingContinuation]) {
|
|
318
|
+
const prior = attemptedThisRun.get(issueNumber);
|
|
319
|
+
const stillNeedsFollowUp = ranked.actionable.some(candidate => candidate.issueNumber === issueNumber && canResumeWithinRun(candidate, prior));
|
|
320
|
+
if (!stillNeedsFollowUp) {
|
|
321
|
+
pendingContinuation.delete(issueNumber);
|
|
261
322
|
}
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
323
|
+
}
|
|
324
|
+
if (this.config.maxItems != null && completedBudget >= this.config.maxItems && pendingContinuation.size === 0) {
|
|
325
|
+
return buildFinalSummary(processed, blockedSnapshot);
|
|
326
|
+
}
|
|
327
|
+
if (ranked.errors.length > 0) {
|
|
328
|
+
const canProceedWithRankedQueue = ranked.queue.length > 0
|
|
329
|
+
&& ranked.errors.every(isRecoverableListingError);
|
|
330
|
+
if (this.config.maxItems != null && completedBudget >= this.config.maxItems && pendingContinuation.size > 0) {
|
|
331
|
+
throw new Error(ranked.errors[0]);
|
|
332
|
+
}
|
|
333
|
+
if (!canProceedWithRankedQueue) {
|
|
334
|
+
throw new Error(ranked.errors[0]);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
const queueStates = toIssueStates(ranked.queue);
|
|
338
|
+
blockedSnapshot = queueStates.filter(issue => issue.actionability !== 'ready'
|
|
339
|
+
&& issue.actionability !== 'housekeeping'
|
|
340
|
+
&& issue.actionability !== 'waiting_pr');
|
|
341
|
+
if (ranked.expansions.length > 0) {
|
|
342
|
+
this.emit({ type: 'scope_expanded', expansions: ranked.expansions });
|
|
343
|
+
}
|
|
344
|
+
this.emit({ type: 'backlog_scanned', total: queueStates.length, issues: queueStates });
|
|
345
|
+
for (const candidate of ranked.queue) {
|
|
346
|
+
this.emit({ type: 'candidate_enriched', issue: {
|
|
347
|
+
issueNumber: candidate.issueNumber,
|
|
348
|
+
title: candidate.title,
|
|
349
|
+
labels: candidate.labels,
|
|
350
|
+
phase: candidate.phase,
|
|
351
|
+
actionability: candidate.actionability,
|
|
352
|
+
priorityTier: candidate.priorityTier,
|
|
353
|
+
dependsOn: candidate.dependsOn,
|
|
354
|
+
inferredDependsOn: candidate.inferredDependsOn,
|
|
355
|
+
blockedBy: candidate.blockedBy,
|
|
356
|
+
recommendation: candidate.recommendation,
|
|
357
|
+
selectionReasons: candidate.selectionReasons,
|
|
358
|
+
score: candidate.score,
|
|
359
|
+
attemptState: candidate.attemptState,
|
|
360
|
+
featureState: candidate.featureState,
|
|
361
|
+
loopFeatureName: candidate.loopFeatureName,
|
|
362
|
+
} });
|
|
363
|
+
if (candidate.inferredDependencyEdges.length > 0) {
|
|
364
|
+
this.emit({ type: 'dependencies_inferred', issueNumber: candidate.issueNumber, edges: candidate.inferredDependencyEdges });
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
this.emit({ type: 'queue_ranked', queue: queueStates });
|
|
368
|
+
for (const blocked of blockedSnapshot) {
|
|
369
|
+
this.emit({ type: 'task_blocked', issue: blocked });
|
|
370
|
+
}
|
|
371
|
+
const resumableCandidates = ranked.actionable.filter((candidate) => {
|
|
372
|
+
return pendingContinuation.has(candidate.issueNumber)
|
|
373
|
+
&& canResumeWithinRun(candidate, attemptedThisRun.get(candidate.issueNumber));
|
|
374
|
+
});
|
|
375
|
+
const stalledCandidate = resumableCandidates.find((candidate) => hasExceededWithinRunContinuationLimit(candidate, attemptedThisRun.get(candidate.issueNumber)));
|
|
376
|
+
if (stalledCandidate) {
|
|
377
|
+
throw new Error(`Issue #${stalledCandidate.issueNumber} remained in ${stalledCandidate.recommendation} after ${MAX_STALLED_CONTINUATION_SELECTIONS} attempts in the same run.`);
|
|
378
|
+
}
|
|
379
|
+
const next = (resumableCandidates[0] ?? ranked.actionable.find((candidate) => {
|
|
380
|
+
if (pendingContinuation.size > 0) {
|
|
381
|
+
return false;
|
|
382
|
+
}
|
|
383
|
+
return canResumeWithinRun(candidate, attemptedThisRun.get(candidate.issueNumber));
|
|
384
|
+
}));
|
|
385
|
+
if (!next) {
|
|
386
|
+
return buildFinalSummary(processed, blockedSnapshot);
|
|
387
|
+
}
|
|
388
|
+
const selected = {
|
|
389
|
+
issueNumber: next.issueNumber,
|
|
390
|
+
title: next.title,
|
|
391
|
+
labels: next.labels,
|
|
392
|
+
phase: 'planning',
|
|
393
|
+
scopeOrigin: next.scopeOrigin,
|
|
394
|
+
requestedBy: next.requestedBy,
|
|
395
|
+
actionability: next.actionability,
|
|
396
|
+
priorityTier: next.priorityTier,
|
|
397
|
+
dependsOn: next.dependsOn,
|
|
398
|
+
inferredDependsOn: next.inferredDependsOn,
|
|
399
|
+
blockedBy: next.blockedBy,
|
|
400
|
+
recommendation: next.recommendation,
|
|
401
|
+
selectionReasons: next.selectionReasons,
|
|
402
|
+
score: next.score,
|
|
403
|
+
attemptState: next.attemptState,
|
|
404
|
+
featureState: next.featureState,
|
|
405
|
+
loopFeatureName: next.loopFeatureName,
|
|
406
|
+
};
|
|
407
|
+
this.emit({ type: 'task_selected', issue: selected });
|
|
408
|
+
this.emit({ type: 'task_started', issue: selected });
|
|
409
|
+
const workerConfig = {
|
|
410
|
+
...this.config,
|
|
411
|
+
issues: [selected.issueNumber],
|
|
412
|
+
labels: undefined,
|
|
413
|
+
maxItems: 1,
|
|
414
|
+
};
|
|
415
|
+
const { agent, tracker } = createWorkerAgent(workerConfig, store);
|
|
416
|
+
try {
|
|
417
|
+
const result = await agent.stream({
|
|
418
|
+
prompt: buildWorkerPrompt(next),
|
|
419
|
+
abortSignal: options.abortSignal,
|
|
280
420
|
});
|
|
421
|
+
for await (const _chunk of result.textStream) {
|
|
422
|
+
// Worker text is surfaced only in the final returned summary.
|
|
423
|
+
}
|
|
281
424
|
}
|
|
282
425
|
catch (err) {
|
|
283
|
-
|
|
426
|
+
if (isAbortError(err, options.abortSignal)) {
|
|
427
|
+
throw new Error('Aborted');
|
|
428
|
+
}
|
|
429
|
+
const failed = { ...selected, error: err instanceof Error ? err.message : String(err) };
|
|
430
|
+
const failureOutcome = tracker.outcome === 'unknown' ? 'failure' : tracker.outcome;
|
|
431
|
+
processed.push({ issue: failed, outcome: failureOutcome });
|
|
432
|
+
this.emit({ type: 'task_completed', issue: failed, outcome: failureOutcome });
|
|
433
|
+
throw err;
|
|
284
434
|
}
|
|
285
|
-
|
|
286
|
-
|
|
435
|
+
if (!tracker.reflected) {
|
|
436
|
+
const failed = {
|
|
437
|
+
...selected,
|
|
438
|
+
error: `Worker stopped before calling reflectOnWork for issue #${selected.issueNumber}.`,
|
|
439
|
+
};
|
|
440
|
+
processed.push({ issue: failed, outcome: 'failure' });
|
|
441
|
+
this.emit({ type: 'task_completed', issue: failed, outcome: 'failure' });
|
|
442
|
+
throw new Error(failed.error);
|
|
443
|
+
}
|
|
444
|
+
const completedIssue = {
|
|
445
|
+
...selected,
|
|
446
|
+
phase: 'reflecting',
|
|
447
|
+
};
|
|
448
|
+
processed.push({ issue: completedIssue, outcome: tracker.outcome });
|
|
449
|
+
const prior = attemptedThisRun.get(selected.issueNumber);
|
|
450
|
+
attemptedThisRun.set(selected.issueNumber, {
|
|
451
|
+
outcome: tracker.outcome,
|
|
452
|
+
selections: (prior?.selections ?? 0) + 1,
|
|
453
|
+
});
|
|
454
|
+
if (shouldCountTowardCompletedBudget(selected, tracker.outcome)) {
|
|
455
|
+
pendingContinuation.add(selected.issueNumber);
|
|
456
|
+
}
|
|
457
|
+
else {
|
|
458
|
+
pendingContinuation.delete(selected.issueNumber);
|
|
459
|
+
}
|
|
460
|
+
this.emit({ type: 'task_completed', issue: completedIssue, outcome: tracker.outcome });
|
|
461
|
+
if (shouldCountTowardCompletedBudget(selected, tracker.outcome) && !countedBudgetIssues.has(selected.issueNumber)) {
|
|
462
|
+
completedBudget += 1;
|
|
463
|
+
countedBudgetIssues.add(selected.issueNumber);
|
|
464
|
+
}
|
|
465
|
+
invalidateSchedulerRunCache(schedulerCache, [selected.issueNumber]);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
async generate(options) {
|
|
469
|
+
const text = await this.run({ abortSignal: options.abortSignal });
|
|
470
|
+
return { text };
|
|
471
|
+
}
|
|
472
|
+
async stream(options) {
|
|
473
|
+
const text = await this.run({ abortSignal: options.abortSignal });
|
|
474
|
+
return { textStream: oneChunk(text) };
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
export function createAgentOrchestrator(config) {
|
|
478
|
+
return new StructuredAgentOrchestrator(config);
|
|
287
479
|
}
|