task-while 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +32 -34
  2. package/package.json +2 -2
  3. package/src/adapters/fs/harness-store.ts +84 -0
  4. package/src/agents/claude.ts +159 -9
  5. package/src/agents/codex.ts +68 -4
  6. package/src/agents/event-log.ts +160 -15
  7. package/src/batch/discovery.ts +1 -1
  8. package/src/commands/batch.ts +63 -164
  9. package/src/commands/run-branch-helpers.ts +81 -0
  10. package/src/commands/run-providers.ts +77 -0
  11. package/src/commands/run.ts +121 -177
  12. package/src/core/create-runtime-ports.ts +118 -0
  13. package/src/core/runtime.ts +15 -36
  14. package/src/harness/in-memory-store.ts +45 -0
  15. package/src/harness/kernel.ts +226 -0
  16. package/src/harness/state.ts +47 -0
  17. package/src/harness/store.ts +26 -0
  18. package/src/harness/workflow-builders.ts +87 -0
  19. package/src/harness/workflow-program.ts +86 -0
  20. package/src/ports/agent.ts +17 -0
  21. package/src/ports/code-host.ts +23 -0
  22. package/src/programs/batch.ts +139 -0
  23. package/src/programs/run-direct.ts +209 -0
  24. package/src/programs/run-pr-transitions.ts +81 -0
  25. package/src/programs/run-pr.ts +290 -0
  26. package/src/programs/shared-steps.ts +252 -0
  27. package/src/schedulers/scheduler.ts +208 -0
  28. package/src/session/session.ts +127 -0
  29. package/src/workflow/config.ts +15 -0
  30. package/src/core/engine-helpers.ts +0 -114
  31. package/src/core/engine-outcomes.ts +0 -166
  32. package/src/core/engine.ts +0 -223
  33. package/src/core/orchestrator-helpers.ts +0 -52
  34. package/src/core/orchestrator-integrate-resume.ts +0 -149
  35. package/src/core/orchestrator-review-resume.ts +0 -228
  36. package/src/core/orchestrator-task-attempt.ts +0 -257
  37. package/src/core/orchestrator.ts +0 -99
  38. package/src/runtime/fs-runtime.ts +0 -209
  39. package/src/workflow/direct-preset.ts +0 -44
  40. package/src/workflow/preset.ts +0 -86
  41. package/src/workflow/pull-request-preset.ts +0 -312
@@ -0,0 +1,252 @@
1
+ import { execa } from 'execa'
2
+
3
+ import { buildImplementerPrompt } from '../prompts/implementer'
4
+ import { buildReviewerPrompt } from '../prompts/reviewer'
5
+ import {
6
+ implementOutputSchema,
7
+ reviewOutputSchema,
8
+ validateImplementOutput,
9
+ validateReviewOutput,
10
+ } from '../schema'
11
+
12
+ import type { GitPort } from '../core/runtime'
13
+ import type { Artifact } from '../harness/state'
14
+ import type { AgentPort } from '../ports/agent'
15
+ import type { TaskSourceSession } from '../task-sources/types'
16
+ import type { ImplementOutput, ReviewFinding } from '../types'
17
+
18
+ export interface TaskPrompt {
19
+ instructions: string[]
20
+ sections: { content: string; title: string }[]
21
+ }
22
+
23
+ export interface ContractPayload {
24
+ completionCriteria: string[]
25
+ prompt: TaskPrompt
26
+ }
27
+
28
+ export type ImplementPayload = ImplementOutput
29
+
30
+ export interface IntegratePayload {
31
+ commitSha: string
32
+ }
33
+
34
+ export interface ReviewPayload {
35
+ findings: ReviewFinding[]
36
+ summary: string
37
+ verdict: string
38
+ }
39
+
40
+ export interface VerifyPayload {
41
+ checks: {
42
+ command: string
43
+ durationMs: number
44
+ exitCode: number
45
+ signal: null | string
46
+ }[]
47
+ }
48
+
49
+ export interface RuntimePorts {
50
+ git: GitPort
51
+ taskSource: TaskSourceSession
52
+ }
53
+
54
+ export interface SharedSteps {
55
+ contract: (
56
+ subjectId: string,
57
+ input: { attempt: number; lastFindings: ReviewFinding[] },
58
+ ) => Promise<Artifact<ContractPayload>>
59
+ implement: (
60
+ subjectId: string,
61
+ input: {
62
+ attempt: number
63
+ lastFindings: ReviewFinding[]
64
+ prompt: TaskPrompt
65
+ },
66
+ ) => Promise<Artifact<ImplementPayload>>
67
+ integrate: (subjectId: string) => Promise<Artifact<IntegratePayload>>
68
+ review: (
69
+ subjectId: string,
70
+ input: {
71
+ attempt: number
72
+ implement: ImplementPayload
73
+ lastFindings: ReviewFinding[]
74
+ },
75
+ ) => Promise<Artifact<ReviewPayload>>
76
+ verify: (subjectId: string) => Promise<Artifact<VerifyPayload>>
77
+ }
78
+
79
+ function makeArtifact<T>(
80
+ kind: string,
81
+ subjectId: string,
82
+ payload: T,
83
+ ): Artifact<T> {
84
+ return {
85
+ id: `${kind}-${subjectId}-${Date.now()}`,
86
+ kind,
87
+ payload,
88
+ subjectId,
89
+ timestamp: new Date().toISOString(),
90
+ }
91
+ }
92
+
93
+ export function createSharedSteps(deps: {
94
+ artifactKinds: {
95
+ contract: string
96
+ implementation: string
97
+ integrateResult: string
98
+ reviewResult: string
99
+ verifyResult: string
100
+ }
101
+ implementer: AgentPort
102
+ ports: RuntimePorts
103
+ reviewer: AgentPort
104
+ verifyCommands: string[]
105
+ workspaceRoot: string
106
+ }): SharedSteps {
107
+ const {
108
+ artifactKinds,
109
+ implementer,
110
+ ports,
111
+ reviewer,
112
+ verifyCommands,
113
+ workspaceRoot,
114
+ } = deps
115
+
116
+ return {
117
+ async contract(subjectId, input) {
118
+ const prompt = await ports.taskSource.buildImplementPrompt({
119
+ attempt: input.attempt,
120
+ generation: 1,
121
+ lastFindings: input.lastFindings,
122
+ taskHandle: subjectId,
123
+ })
124
+ const completionCriteria =
125
+ await ports.taskSource.getCompletionCriteria(subjectId)
126
+ const payload: ContractPayload = {
127
+ completionCriteria,
128
+ prompt: {
129
+ instructions: prompt.instructions,
130
+ sections: prompt.sections,
131
+ },
132
+ }
133
+ return makeArtifact(artifactKinds.contract, subjectId, payload)
134
+ },
135
+
136
+ async implement(subjectId, input) {
137
+ const promptText = await buildImplementerPrompt({
138
+ attempt: input.attempt,
139
+ generation: 1,
140
+ lastFindings: input.lastFindings,
141
+ prompt: input.prompt,
142
+ taskHandle: subjectId,
143
+ })
144
+ const raw = await implementer.execute({
145
+ outputSchema: implementOutputSchema,
146
+ prompt: promptText,
147
+ role: 'implementer',
148
+ })
149
+ const validated = validateImplementOutput(raw)
150
+ return makeArtifact(artifactKinds.implementation, subjectId, validated)
151
+ },
152
+
153
+ async integrate(subjectId) {
154
+ const message = ports.taskSource.buildCommitSubject(subjectId)
155
+ const alreadyDone = await ports.taskSource.isTaskCompleted(subjectId)
156
+ if (!alreadyDone) {
157
+ await ports.taskSource.applyTaskCompletion(subjectId)
158
+ }
159
+ try {
160
+ const result = await ports.git.commitTask({ message })
161
+ return makeArtifact(artifactKinds.integrateResult, subjectId, {
162
+ commitSha: result.commitSha,
163
+ })
164
+ } catch (error) {
165
+ if (!alreadyDone) {
166
+ try {
167
+ await ports.taskSource.revertTaskCompletion(subjectId)
168
+ } catch {
169
+ // revert best-effort
170
+ }
171
+ }
172
+ throw error
173
+ }
174
+ },
175
+
176
+ async review(subjectId, input) {
177
+ const changedFiles = await ports.git.getChangedFilesSinceHead()
178
+ const implementOutput: ImplementOutput = {
179
+ assumptions: input.implement.assumptions,
180
+ needsHumanAttention: input.implement.needsHumanAttention,
181
+ notes: input.implement.notes,
182
+ status: input.implement.status,
183
+ summary: input.implement.summary,
184
+ taskHandle: subjectId,
185
+ unresolvedItems: input.implement.unresolvedItems,
186
+ }
187
+ const prompt = await ports.taskSource.buildReviewPrompt({
188
+ actualChangedFiles: changedFiles,
189
+ attempt: input.attempt,
190
+ generation: 1,
191
+ implement: implementOutput,
192
+ lastFindings: input.lastFindings,
193
+ taskHandle: subjectId,
194
+ })
195
+ const promptText = await buildReviewerPrompt({
196
+ actualChangedFiles: changedFiles,
197
+ attempt: input.attempt,
198
+ generation: 1,
199
+ implement: implementOutput,
200
+ lastFindings: input.lastFindings,
201
+ prompt,
202
+ taskHandle: subjectId,
203
+ })
204
+ const raw = await reviewer.execute({
205
+ outputSchema: reviewOutputSchema,
206
+ prompt: promptText,
207
+ role: 'reviewer',
208
+ })
209
+ const validated = validateReviewOutput(raw)
210
+ const verdict =
211
+ validated.verdict === 'pass'
212
+ ? 'approved'
213
+ : validated.verdict === 'blocked'
214
+ ? 'replan_required'
215
+ : 'rejected'
216
+ const payload: ReviewPayload = {
217
+ findings: validated.findings,
218
+ summary: validated.summary,
219
+ verdict,
220
+ }
221
+ return makeArtifact(artifactKinds.reviewResult, subjectId, payload)
222
+ },
223
+
224
+ async verify(subjectId) {
225
+ const checks: VerifyPayload['checks'] = []
226
+ for (const command of verifyCommands) {
227
+ const start = Date.now()
228
+ try {
229
+ const result = await execa(command, {
230
+ cwd: workspaceRoot,
231
+ reject: false,
232
+ shell: true,
233
+ })
234
+ checks.push({
235
+ command,
236
+ durationMs: Date.now() - start,
237
+ exitCode: result.exitCode,
238
+ signal: result.signal ?? null,
239
+ })
240
+ } catch {
241
+ checks.push({
242
+ command,
243
+ durationMs: Date.now() - start,
244
+ exitCode: 1,
245
+ signal: null,
246
+ })
247
+ }
248
+ }
249
+ return makeArtifact(artifactKinds.verifyResult, subjectId, { checks })
250
+ },
251
+ }
252
+ }
@@ -0,0 +1,208 @@
1
+ import { TaskStatus } from '../harness/state'
2
+
3
+ import type { HarnessStore } from '../harness/store'
4
+
5
+ export interface ScheduledSubject {
6
+ resumeFromSuspended?: boolean
7
+ subjectId: string
8
+ }
9
+
10
+ export interface Scheduler {
11
+ markBlocked: (subjectId: string) => Promise<void>
12
+ markDone: (subjectId: string) => Promise<void>
13
+ markSuspended: (subjectId: string) => Promise<void>
14
+ next: () => Promise<null | ScheduledSubject>
15
+ rebuild: () => Promise<{
16
+ blocked: Set<string>
17
+ done: Set<string>
18
+ replan: Set<string>
19
+ suspended: Set<string>
20
+ }>
21
+ }
22
+
23
+ export function createRunGraphScheduler(input: {
24
+ graph: { dependsOn: string[]; subjectId: string }[]
25
+ protocol: string
26
+ store: HarnessStore
27
+ untilTaskHandle?: string
28
+ }): Scheduler {
29
+ const done = new Set<string>()
30
+ const blocked = new Set<string>()
31
+ const replan = new Set<string>()
32
+ const suspended = new Set<string>()
33
+ const deferred = new Set<string>()
34
+
35
+ return {
36
+ async markBlocked(subjectId: string) {
37
+ deferred.delete(subjectId)
38
+ blocked.add(subjectId)
39
+ },
40
+
41
+ async markDone(subjectId: string) {
42
+ deferred.delete(subjectId)
43
+ done.add(subjectId)
44
+ },
45
+
46
+ async markSuspended(subjectId: string) {
47
+ suspended.add(subjectId)
48
+ },
49
+
50
+ async next() {
51
+ if (input.untilTaskHandle && done.has(input.untilTaskHandle)) {
52
+ return null
53
+ }
54
+
55
+ for (const node of input.graph) {
56
+ if (done.has(node.subjectId)) {
57
+ continue
58
+ }
59
+ if (blocked.has(node.subjectId)) {
60
+ continue
61
+ }
62
+ if (suspended.has(node.subjectId)) {
63
+ continue
64
+ }
65
+ if (deferred.has(node.subjectId)) {
66
+ continue
67
+ }
68
+ if (node.dependsOn.every((dep) => done.has(dep))) {
69
+ return { subjectId: node.subjectId }
70
+ }
71
+ }
72
+
73
+ const deferredSubject = deferred.values().next()
74
+ if (!deferredSubject.done) {
75
+ deferred.delete(deferredSubject.value)
76
+ return {
77
+ resumeFromSuspended: true,
78
+ subjectId: deferredSubject.value,
79
+ }
80
+ }
81
+
82
+ return null
83
+ },
84
+
85
+ async rebuild() {
86
+ done.clear()
87
+ blocked.clear()
88
+ suspended.clear()
89
+ deferred.clear()
90
+ replan.clear()
91
+
92
+ for (const node of input.graph) {
93
+ const state = await input.store.loadState(
94
+ input.protocol,
95
+ node.subjectId,
96
+ )
97
+ if (!state) {
98
+ continue
99
+ }
100
+ if (state.status === TaskStatus.Done) {
101
+ done.add(node.subjectId)
102
+ } else if (state.status === TaskStatus.Blocked) {
103
+ blocked.add(node.subjectId)
104
+ } else if (state.status === TaskStatus.Replan) {
105
+ replan.add(node.subjectId)
106
+ } else if (state.status === TaskStatus.Suspended) {
107
+ suspended.add(node.subjectId)
108
+ deferred.add(node.subjectId)
109
+ }
110
+ }
111
+
112
+ return {
113
+ blocked: new Set(blocked),
114
+ done: new Set(done),
115
+ replan: new Set(replan),
116
+ suspended: new Set(suspended),
117
+ }
118
+ },
119
+ }
120
+ }
121
+
122
+ export function createBatchRetryScheduler(input: {
123
+ files: string[]
124
+ protocol: string
125
+ results: Record<string, unknown>
126
+ store: HarnessStore
127
+ }): Scheduler {
128
+ const queue: { resumeFromSuspended: boolean; subjectId: string }[] = []
129
+ const done = new Set<string>()
130
+ const blocked = new Set<string>()
131
+ const replan = new Set<string>()
132
+ const suspended = new Set<string>()
133
+
134
+ return {
135
+ async markBlocked(subjectId: string) {
136
+ const idx = queue.findIndex((item) => item.subjectId === subjectId)
137
+ if (idx !== -1) {
138
+ queue.splice(idx, 1)
139
+ }
140
+ blocked.add(subjectId)
141
+ },
142
+
143
+ async markDone(subjectId: string) {
144
+ const idx = queue.findIndex((item) => item.subjectId === subjectId)
145
+ if (idx !== -1) {
146
+ queue.splice(idx, 1)
147
+ }
148
+ done.add(subjectId)
149
+ },
150
+
151
+ async markSuspended(subjectId: string) {
152
+ const idx = queue.findIndex((item) => item.subjectId === subjectId)
153
+ if (idx !== -1) {
154
+ queue.splice(idx, 1)
155
+ }
156
+ suspended.add(subjectId)
157
+ queue.push({ resumeFromSuspended: true, subjectId })
158
+ },
159
+
160
+ async next() {
161
+ if (queue.length === 0) {
162
+ return null
163
+ }
164
+ return queue[0]!
165
+ },
166
+
167
+ async rebuild() {
168
+ queue.length = 0
169
+ done.clear()
170
+ blocked.clear()
171
+ replan.clear()
172
+ suspended.clear()
173
+
174
+ for (const file of input.files) {
175
+ if (file in input.results) {
176
+ done.add(file)
177
+ continue
178
+ }
179
+
180
+ const state = await input.store.loadState(input.protocol, file)
181
+ if (!state) {
182
+ queue.push({ resumeFromSuspended: false, subjectId: file })
183
+ continue
184
+ }
185
+
186
+ if (state.status === TaskStatus.Done) {
187
+ done.add(file)
188
+ } else if (state.status === TaskStatus.Blocked) {
189
+ blocked.add(file)
190
+ } else if (state.status === TaskStatus.Replan) {
191
+ replan.add(file)
192
+ } else if (state.status === TaskStatus.Suspended) {
193
+ suspended.add(file)
194
+ queue.push({ resumeFromSuspended: true, subjectId: file })
195
+ } else {
196
+ queue.push({ resumeFromSuspended: false, subjectId: file })
197
+ }
198
+ }
199
+
200
+ return {
201
+ blocked: new Set(blocked),
202
+ done: new Set(done),
203
+ replan: new Set(replan),
204
+ suspended: new Set(suspended),
205
+ }
206
+ },
207
+ }
208
+ }
@@ -0,0 +1,127 @@
1
+ import { TaskStatus } from '../harness/state'
2
+
3
+ import type { KernelResult } from '../harness/kernel'
4
+ import type { Scheduler } from '../schedulers/scheduler'
5
+
6
+ export enum SessionEventType {
7
+ SessionDone = 'session.done',
8
+ SessionFailed = 'session.failed',
9
+ SessionStarted = 'session.started',
10
+ SubjectBlocked = 'subject.blocked',
11
+ SubjectDone = 'subject.done',
12
+ SubjectResumed = 'subject.resumed',
13
+ SubjectStarted = 'subject.started',
14
+ SubjectSuspended = 'subject.suspended',
15
+ }
16
+
17
+ export interface SessionEvent {
18
+ detail: unknown
19
+ subjectId: string
20
+ timestamp: string
21
+ type: SessionEventType
22
+ }
23
+
24
+ export interface SessionProgress {
25
+ blocked: number
26
+ completed: number
27
+ suspended: number
28
+ total: number
29
+ }
30
+
31
+ export async function* runSession(input: {
32
+ concurrency?: number
33
+ config: Record<string, unknown>
34
+ kernel: { run: (subjectId: string) => Promise<KernelResult> }
35
+ scheduler: Scheduler
36
+ }): AsyncGenerator<SessionEvent> {
37
+ const { kernel, scheduler } = input
38
+
39
+ const sets = await scheduler.rebuild()
40
+
41
+ const progress: SessionProgress = {
42
+ blocked: sets.blocked.size,
43
+ completed: sets.done.size,
44
+ suspended: sets.suspended.size,
45
+ total: 0,
46
+ }
47
+
48
+ yield {
49
+ detail: { progress },
50
+ subjectId: '',
51
+ timestamp: new Date().toISOString(),
52
+ type: SessionEventType.SessionStarted,
53
+ }
54
+
55
+ try {
56
+ for (;;) {
57
+ const scheduled = await scheduler.next()
58
+ if (!scheduled) {
59
+ break
60
+ }
61
+
62
+ const { subjectId } = scheduled
63
+
64
+ if (scheduled.resumeFromSuspended) {
65
+ yield {
66
+ detail: null,
67
+ subjectId,
68
+ timestamp: new Date().toISOString(),
69
+ type: SessionEventType.SubjectResumed,
70
+ }
71
+ } else {
72
+ yield {
73
+ detail: null,
74
+ subjectId,
75
+ timestamp: new Date().toISOString(),
76
+ type: SessionEventType.SubjectStarted,
77
+ }
78
+ }
79
+
80
+ const result: KernelResult = await kernel.run(subjectId)
81
+
82
+ if (result.status === TaskStatus.Done) {
83
+ await scheduler.markDone(subjectId)
84
+ yield {
85
+ detail: null,
86
+ subjectId,
87
+ timestamp: new Date().toISOString(),
88
+ type: SessionEventType.SubjectDone,
89
+ }
90
+ } else if (result.status === TaskStatus.Suspended) {
91
+ await scheduler.markSuspended(subjectId)
92
+ yield {
93
+ detail: null,
94
+ subjectId,
95
+ timestamp: new Date().toISOString(),
96
+ type: SessionEventType.SubjectSuspended,
97
+ }
98
+ } else if (
99
+ result.status === TaskStatus.Blocked ||
100
+ result.status === TaskStatus.Replan
101
+ ) {
102
+ await scheduler.markBlocked(subjectId)
103
+ yield {
104
+ detail: null,
105
+ subjectId,
106
+ timestamp: new Date().toISOString(),
107
+ type: SessionEventType.SubjectBlocked,
108
+ }
109
+ }
110
+ }
111
+
112
+ yield {
113
+ detail: null,
114
+ subjectId: '',
115
+ timestamp: new Date().toISOString(),
116
+ type: SessionEventType.SessionDone,
117
+ }
118
+ } catch (error) {
119
+ yield {
120
+ detail: { error },
121
+ subjectId: '',
122
+ timestamp: new Date().toISOString(),
123
+ type: SessionEventType.SessionFailed,
124
+ }
125
+ throw error
126
+ }
127
+ }
@@ -58,9 +58,16 @@ const taskConfigSchema = z
58
58
  })
59
59
  .strict()
60
60
 
61
+ const verifyConfigSchema = z
62
+ .object({
63
+ commands: z.array(z.string().trim().min(1)).default([]),
64
+ })
65
+ .strict()
66
+
61
67
  const workflowConfigSchema = z
62
68
  .object({
63
69
  task: taskConfigSchema.default({}),
70
+ verify: verifyConfigSchema.default({}),
64
71
  workflow: z
65
72
  .object({
66
73
  mode: workflowModeSchema.default('direct'),
@@ -92,8 +99,13 @@ export interface TaskSettingsConfig {
92
99
  source: string
93
100
  }
94
101
 
102
+ export interface VerifyConfig {
103
+ commands: string[]
104
+ }
105
+
95
106
  export interface WorkflowConfig {
96
107
  task: TaskSettingsConfig
108
+ verify: VerifyConfig
97
109
  workflow: WorkflowSettingsConfig
98
110
  }
99
111
 
@@ -119,6 +131,9 @@ export async function loadWorkflowConfig(
119
131
  maxIterations: parsedConfig.task.maxIterations,
120
132
  source: parsedConfig.task.source,
121
133
  },
134
+ verify: {
135
+ commands: parsedConfig.verify.commands,
136
+ },
122
137
  workflow: {
123
138
  mode: parsedConfig.workflow.mode,
124
139
  roles: parsedConfig.workflow.roles,