orch-code 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/LICENSE +21 -0
  3. package/README.md +624 -0
  4. package/cmd/apply.go +111 -0
  5. package/cmd/auth.go +393 -0
  6. package/cmd/auth_test.go +100 -0
  7. package/cmd/diff.go +57 -0
  8. package/cmd/doctor.go +149 -0
  9. package/cmd/explain.go +192 -0
  10. package/cmd/explain_test.go +62 -0
  11. package/cmd/init.go +100 -0
  12. package/cmd/interactive.go +1372 -0
  13. package/cmd/interactive_input.go +45 -0
  14. package/cmd/interactive_input_test.go +55 -0
  15. package/cmd/logs.go +72 -0
  16. package/cmd/model.go +84 -0
  17. package/cmd/plan.go +149 -0
  18. package/cmd/provider.go +189 -0
  19. package/cmd/provider_model_doctor_test.go +91 -0
  20. package/cmd/root.go +67 -0
  21. package/cmd/run.go +123 -0
  22. package/cmd/run_engine.go +208 -0
  23. package/cmd/run_engine_test.go +30 -0
  24. package/cmd/session.go +589 -0
  25. package/cmd/session_helpers.go +54 -0
  26. package/cmd/session_integration_test.go +30 -0
  27. package/cmd/session_list_current_test.go +87 -0
  28. package/cmd/session_messages_test.go +163 -0
  29. package/cmd/session_runs_test.go +68 -0
  30. package/cmd/sprint1_integration_test.go +119 -0
  31. package/cmd/stats.go +173 -0
  32. package/cmd/stats_test.go +71 -0
  33. package/cmd/version.go +4 -0
  34. package/go.mod +45 -0
  35. package/go.sum +108 -0
  36. package/internal/agents/agent.go +31 -0
  37. package/internal/agents/coder.go +167 -0
  38. package/internal/agents/planner.go +155 -0
  39. package/internal/agents/reviewer.go +118 -0
  40. package/internal/agents/runtime.go +25 -0
  41. package/internal/agents/runtime_test.go +77 -0
  42. package/internal/auth/account.go +78 -0
  43. package/internal/auth/oauth.go +523 -0
  44. package/internal/auth/store.go +287 -0
  45. package/internal/confidence/policy.go +174 -0
  46. package/internal/confidence/policy_test.go +71 -0
  47. package/internal/confidence/scorer.go +253 -0
  48. package/internal/confidence/scorer_test.go +83 -0
  49. package/internal/config/config.go +331 -0
  50. package/internal/config/config_defaults_test.go +138 -0
  51. package/internal/execution/contract_builder.go +160 -0
  52. package/internal/execution/contract_builder_test.go +68 -0
  53. package/internal/execution/plan_compliance.go +161 -0
  54. package/internal/execution/plan_compliance_test.go +71 -0
  55. package/internal/execution/retry_directive.go +132 -0
  56. package/internal/execution/scope_guard.go +69 -0
  57. package/internal/logger/logger.go +120 -0
  58. package/internal/models/contracts_test.go +100 -0
  59. package/internal/models/models.go +269 -0
  60. package/internal/orchestrator/orchestrator.go +701 -0
  61. package/internal/orchestrator/orchestrator_retry_test.go +135 -0
  62. package/internal/orchestrator/review_engine_test.go +50 -0
  63. package/internal/orchestrator/state.go +42 -0
  64. package/internal/orchestrator/test_classifier_test.go +68 -0
  65. package/internal/patch/applier.go +131 -0
  66. package/internal/patch/applier_test.go +25 -0
  67. package/internal/patch/parser.go +89 -0
  68. package/internal/patch/patch.go +60 -0
  69. package/internal/patch/summary.go +30 -0
  70. package/internal/patch/validator.go +104 -0
  71. package/internal/planning/normalizer.go +416 -0
  72. package/internal/planning/normalizer_test.go +64 -0
  73. package/internal/providers/errors.go +35 -0
  74. package/internal/providers/openai/client.go +498 -0
  75. package/internal/providers/openai/client_test.go +187 -0
  76. package/internal/providers/provider.go +47 -0
  77. package/internal/providers/registry.go +32 -0
  78. package/internal/providers/registry_test.go +57 -0
  79. package/internal/providers/router.go +52 -0
  80. package/internal/providers/state.go +114 -0
  81. package/internal/providers/state_test.go +64 -0
  82. package/internal/repo/analyzer.go +188 -0
  83. package/internal/repo/context.go +83 -0
  84. package/internal/review/engine.go +267 -0
  85. package/internal/review/engine_test.go +103 -0
  86. package/internal/runstore/store.go +137 -0
  87. package/internal/runstore/store_test.go +59 -0
  88. package/internal/runtime/lock.go +150 -0
  89. package/internal/runtime/lock_test.go +57 -0
  90. package/internal/session/compaction.go +260 -0
  91. package/internal/session/compaction_test.go +36 -0
  92. package/internal/session/service.go +117 -0
  93. package/internal/session/service_test.go +113 -0
  94. package/internal/storage/storage.go +1498 -0
  95. package/internal/storage/storage_test.go +413 -0
  96. package/internal/testing/classifier.go +80 -0
  97. package/internal/testing/classifier_test.go +36 -0
  98. package/internal/tools/command.go +160 -0
  99. package/internal/tools/command_test.go +56 -0
  100. package/internal/tools/file.go +111 -0
  101. package/internal/tools/git.go +77 -0
  102. package/internal/tools/invalid_params_test.go +36 -0
  103. package/internal/tools/policy.go +98 -0
  104. package/internal/tools/policy_test.go +36 -0
  105. package/internal/tools/registry_test.go +52 -0
  106. package/internal/tools/result.go +30 -0
  107. package/internal/tools/search.go +86 -0
  108. package/internal/tools/tool.go +94 -0
  109. package/main.go +9 -0
  110. package/npm/orch.js +25 -0
  111. package/package.json +41 -0
  112. package/scripts/changelog.js +20 -0
  113. package/scripts/check-release-version.js +21 -0
  114. package/scripts/lib/release-utils.js +223 -0
  115. package/scripts/postinstall.js +157 -0
  116. package/scripts/release.js +52 -0
@@ -0,0 +1,701 @@
1
+ // Task → Repo Analysis → Context Selection → Planner → Coder → Patch Validation → Test → Reviewer → Result
2
+ package orchestrator
3
+
4
+ import (
5
+ "context"
6
+ "fmt"
7
+ "os"
8
+ "strings"
9
+ "time"
10
+
11
+ "github.com/furkanbeydemir/orch/internal/agents"
12
+ "github.com/furkanbeydemir/orch/internal/auth"
13
+ "github.com/furkanbeydemir/orch/internal/confidence"
14
+ "github.com/furkanbeydemir/orch/internal/config"
15
+ "github.com/furkanbeydemir/orch/internal/execution"
16
+ "github.com/furkanbeydemir/orch/internal/logger"
17
+ "github.com/furkanbeydemir/orch/internal/models"
18
+ "github.com/furkanbeydemir/orch/internal/patch"
19
+ "github.com/furkanbeydemir/orch/internal/planning"
20
+ "github.com/furkanbeydemir/orch/internal/providers"
21
+ "github.com/furkanbeydemir/orch/internal/providers/openai"
22
+ "github.com/furkanbeydemir/orch/internal/repo"
23
+ reviewengine "github.com/furkanbeydemir/orch/internal/review"
24
+ testingengine "github.com/furkanbeydemir/orch/internal/testing"
25
+ "github.com/furkanbeydemir/orch/internal/tools"
26
+ )
27
+
28
+ type Orchestrator struct {
29
+ cfg *config.Config
30
+ // log, execution trace logger.
31
+ log *logger.Logger
32
+ analyzer *repo.Analyzer
33
+ contextBuilder *repo.ContextBuilder
34
+ planner agents.Agent
35
+ coder agents.Agent
36
+ reviewer agents.Agent
37
+ patchPipeline *patch.Pipeline
38
+ contractBuilder *execution.ContractBuilder
39
+ scopeGuard *execution.ScopeGuard
40
+ planGuard *execution.PlanComplianceGuard
41
+ retryBuilder *execution.RetryDirectiveBuilder
42
+ testClassifier *testingengine.Classifier
43
+ reviewEngine *reviewengine.Engine
44
+ confidenceScorer *confidence.Scorer
45
+ confidencePolicy *confidence.Policy
46
+ toolRegistry *tools.Registry
47
+ repoRoot string
48
+ providerReady bool
49
+ // verbose controls detailed log output.
50
+ verbose bool
51
+ }
52
+
53
+ func New(cfg *config.Config, repoRoot string, verbose bool) *Orchestrator {
54
+ runID := fmt.Sprintf("run-%d", time.Now().UnixNano())
55
+ log := logger.New(runID, repoRoot, verbose)
56
+
57
+ orch := &Orchestrator{
58
+ cfg: cfg,
59
+ log: log,
60
+ analyzer: repo.NewAnalyzer(repoRoot),
61
+ contextBuilder: repo.NewContextBuilder(repoRoot),
62
+ planner: agents.NewPlanner(cfg.Models.Planner),
63
+ coder: agents.NewCoder(cfg.Models.Coder),
64
+ reviewer: agents.NewReviewer(cfg.Models.Reviewer),
65
+ patchPipeline: patch.NewPipeline(cfg.Patch.MaxFiles, cfg.Patch.MaxLines),
66
+ contractBuilder: execution.NewContractBuilder(cfg),
67
+ scopeGuard: execution.NewScopeGuard(),
68
+ planGuard: execution.NewPlanComplianceGuard(),
69
+ retryBuilder: execution.NewRetryDirectiveBuilder(),
70
+ testClassifier: testingengine.NewClassifier(),
71
+ reviewEngine: reviewengine.NewEngine(),
72
+ confidenceScorer: confidence.New(),
73
+ confidencePolicy: confidence.NewPolicy(cfg),
74
+ toolRegistry: tools.DefaultRegistryWithPolicy(repoRoot, buildPolicy(cfg, tools.ModeRun), nil),
75
+ repoRoot: repoRoot,
76
+ verbose: verbose,
77
+ }
78
+
79
+ orch.attachProviderRuntime()
80
+
81
+ return orch
82
+ }
83
+
84
+ func (o *Orchestrator) attachProviderRuntime() {
85
+ if o == nil || o.cfg == nil {
86
+ return
87
+ }
88
+ if !o.cfg.Provider.Flags.OpenAIEnabled {
89
+ return
90
+ }
91
+ mode := strings.ToLower(strings.TrimSpace(o.cfg.Provider.OpenAI.AuthMode))
92
+ hasEnvAPIKey := strings.TrimSpace(os.Getenv(o.cfg.Provider.OpenAI.APIKeyEnv)) != ""
93
+ if mode == "api_key" && !hasEnvAPIKey {
94
+ cred, err := auth.Get(o.repoRoot, "openai")
95
+ if err != nil || cred == nil || strings.ToLower(strings.TrimSpace(cred.Type)) != "api" || strings.TrimSpace(cred.Key) == "" {
96
+ return
97
+ }
98
+ }
99
+
100
+ registry := providers.NewRegistry()
101
+ client := openai.New(o.cfg.Provider.OpenAI)
102
+ client.SetTokenResolver(func(ctx context.Context) (string, error) {
103
+ _ = ctx
104
+ if strings.ToLower(strings.TrimSpace(o.cfg.Provider.OpenAI.AuthMode)) == "api_key" {
105
+ cred, err := auth.Get(o.repoRoot, "openai")
106
+ if err != nil || cred == nil {
107
+ return "", err
108
+ }
109
+ if strings.ToLower(strings.TrimSpace(cred.Type)) == "api" {
110
+ return strings.TrimSpace(cred.Key), nil
111
+ }
112
+ return "", nil
113
+ }
114
+ return auth.ResolveAccountAccessToken(o.repoRoot, "openai")
115
+ })
116
+ registry.Register(client)
117
+ router := providers.NewRouter(o.cfg, registry)
118
+ runtime := &agents.LLMRuntime{Router: router}
119
+ o.providerReady = true
120
+
121
+ if planner, ok := o.planner.(*agents.Planner); ok {
122
+ planner.SetRuntime(runtime)
123
+ }
124
+ if coder, ok := o.coder.(*agents.Coder); ok {
125
+ coder.SetRuntime(runtime)
126
+ }
127
+ if reviewer, ok := o.reviewer.(*agents.Reviewer); ok {
128
+ reviewer.SetRuntime(runtime)
129
+ }
130
+ }
131
+
132
+ // Pipeline: Analyze → Plan → Code → Validate → Test → Review
133
+ func (o *Orchestrator) Run(task *models.Task) (*models.RunState, error) {
134
+ runID := fmt.Sprintf("run-%d", time.Now().UnixNano())
135
+ o.log = logger.New(runID, o.repoRoot, o.verbose)
136
+
137
+ state := &models.RunState{
138
+ ID: runID,
139
+ Task: *task,
140
+ Status: models.StatusCreated,
141
+ Logs: make([]models.LogEntry, 0),
142
+ Retries: models.RetryState{},
143
+ StartedAt: time.Now(),
144
+ }
145
+
146
+ o.log.Log("orchestrator", "start", fmt.Sprintf("Task started: %s", task.Description))
147
+ o.log.Log("policy", "mode", "policy decision mode=run read_only=false")
148
+ if o.providerReady {
149
+ o.log.Log("provider", "status", fmt.Sprintf("active=openai planner=%s coder=%s reviewer=%s auth_mode=%s", o.cfg.Provider.OpenAI.Models.Planner, o.cfg.Provider.OpenAI.Models.Coder, o.cfg.Provider.OpenAI.Models.Reviewer, o.cfg.Provider.OpenAI.AuthMode))
150
+ } else {
151
+ o.log.Log("provider", "status", "inactive; falling back to local agent behavior")
152
+ }
153
+ o.toolRegistry = tools.DefaultRegistryWithPolicy(o.repoRoot, buildPolicy(o.cfg, tools.ModeRun), func(message string) {
154
+ o.log.Log("policy", "decision", message)
155
+ })
156
+
157
+ // 1. Repository analysis
158
+ if err := o.stepAnalyze(state); err != nil {
159
+ return o.fail(state, err)
160
+ }
161
+
162
+ // 2. Planning
163
+ if err := o.stepPlan(state); err != nil {
164
+ return o.fail(state, err)
165
+ }
166
+
167
+ if err := o.stepCode(state); err != nil {
168
+ return o.fail(state, err)
169
+ }
170
+
171
+ if err := o.stepValidateWithRetries(state); err != nil {
172
+ return o.fail(state, err)
173
+ }
174
+
175
+ if err := o.stepTestWithRetries(state); err != nil {
176
+ return o.fail(state, err)
177
+ }
178
+
179
+ if err := o.stepReviewWithRetries(state); err != nil {
180
+ return o.fail(state, err)
181
+ }
182
+
183
+ if err := Transition(state, models.StatusCompleted); err != nil {
184
+ return o.fail(state, err)
185
+ }
186
+
187
+ now := time.Now()
188
+ state.CompletedAt = &now
189
+ o.log.Log("orchestrator", "complete", "Pipeline completed successfully")
190
+
191
+ state.Logs = o.log.Entries()
192
+ _ = o.log.Save()
193
+
194
+ return state, nil
195
+ }
196
+
197
+ func (o *Orchestrator) Plan(task *models.Task) (*models.Plan, error) {
198
+ _, plan, err := o.PlanDetailed(task)
199
+ return plan, err
200
+ }
201
+
202
+ func (o *Orchestrator) PlanDetailed(task *models.Task) (*models.TaskBrief, *models.Plan, error) {
203
+ o.log.Log("policy", "mode", "policy decision mode=plan read_only=true")
204
+ o.toolRegistry = tools.DefaultRegistryWithPolicy(o.repoRoot, buildPolicy(o.cfg, tools.ModePlan), func(message string) {
205
+ o.log.Log("policy", "decision", message)
206
+ })
207
+ o.log.Log("orchestrator", "plan-only", fmt.Sprintf("Generating plan: %s", task.Description))
208
+
209
+ repoMap, err := o.analyzer.Analyze()
210
+ if err != nil {
211
+ return nil, nil, fmt.Errorf("repository analysis failed: %w", err)
212
+ }
213
+
214
+ taskBrief, compiledPlan := o.compilePlanArtifacts(task, repoMap)
215
+ input := &agents.Input{
216
+ Task: task,
217
+ TaskBrief: taskBrief,
218
+ RepoMap: repoMap,
219
+ Plan: compiledPlan,
220
+ }
221
+
222
+ output, err := o.planner.Execute(input)
223
+ if err != nil {
224
+ return nil, nil, fmt.Errorf("planning failed: %w", err)
225
+ }
226
+ if output == nil || output.Plan == nil {
227
+ return taskBrief, compiledPlan, nil
228
+ }
229
+
230
+ return taskBrief, output.Plan, nil
231
+ }
232
+
233
+ func (o *Orchestrator) stepAnalyze(state *models.RunState) error {
234
+ if err := Transition(state, models.StatusAnalyzing); err != nil {
235
+ return err
236
+ }
237
+ o.log.Log("analyzer", "analyze", "Scanning repository...")
238
+
239
+ _, err := o.analyzer.Analyze()
240
+ if err != nil {
241
+ return fmt.Errorf("repository analysis failed: %w", err)
242
+ }
243
+
244
+ o.log.Log("analyzer", "analyze", "Repository analysis completed")
245
+ return nil
246
+ }
247
+
248
+ func (o *Orchestrator) stepPlan(state *models.RunState) error {
249
+ if err := Transition(state, models.StatusPlanning); err != nil {
250
+ return err
251
+ }
252
+ o.log.Log("planner", "plan", "Generating plan...")
253
+ if o.providerReady {
254
+ o.log.Log("provider", "planner", fmt.Sprintf("model=%s", o.cfg.Provider.OpenAI.Models.Planner))
255
+ }
256
+
257
+ repoMap, err := o.analyzer.Analyze()
258
+ if err != nil {
259
+ return fmt.Errorf("context repository analysis failed: %w", err)
260
+ }
261
+
262
+ taskBrief, compiledPlan := o.compilePlanArtifacts(&state.Task, repoMap)
263
+ input := &agents.Input{
264
+ Task: &state.Task,
265
+ TaskBrief: taskBrief,
266
+ RepoMap: repoMap,
267
+ Plan: compiledPlan,
268
+ }
269
+
270
+ output, err := o.planner.Execute(input)
271
+ if err != nil {
272
+ return fmt.Errorf("planning failed: %w", err)
273
+ }
274
+
275
+ state.TaskBrief = taskBrief
276
+ state.Plan = compiledPlan
277
+ if output != nil && output.Plan != nil {
278
+ state.Plan = output.Plan
279
+ }
280
+ state.Context = o.contextBuilder.Build(&state.Task, repoMap, state.Plan)
281
+ o.log.Log("context", "build", fmt.Sprintf("Context built: selected=%d tests=%d configs=%d", len(state.Context.SelectedFiles), len(state.Context.RelatedTests), len(state.Context.RelevantConfigs)))
282
+
283
+ o.log.Log("planner", "plan", "Plan generated")
284
+ return nil
285
+ }
286
+
287
+ func (o *Orchestrator) stepCode(state *models.RunState) error {
288
+ if err := Transition(state, models.StatusCoding); err != nil {
289
+ return err
290
+ }
291
+ o.log.Log("coder", "code", "Generating code changes...")
292
+ if o.providerReady {
293
+ o.log.Log("provider", "coder", fmt.Sprintf("model=%s", o.cfg.Provider.OpenAI.Models.Coder))
294
+ }
295
+
296
+ state.ExecutionContract = o.contractBuilder.Build(&state.Task, state.TaskBrief, state.Plan, state.Context)
297
+ if state.ExecutionContract != nil {
298
+ o.log.Log("execution", "contract", fmt.Sprintf("allowed_files=%d inspect_files=%d required_edits=%d", len(state.ExecutionContract.AllowedFiles), len(state.ExecutionContract.InspectFiles), len(state.ExecutionContract.RequiredEdits)))
299
+ }
300
+
301
+ input := &agents.Input{
302
+ Task: &state.Task,
303
+ TaskBrief: state.TaskBrief,
304
+ Plan: state.Plan,
305
+ ExecutionContract: state.ExecutionContract,
306
+ Context: state.Context,
307
+ RetryDirective: state.RetryDirective,
308
+ }
309
+
310
+ output, err := o.coder.Execute(input)
311
+ if err != nil {
312
+ return fmt.Errorf("code generation failed: %w", err)
313
+ }
314
+
315
+ state.Patch = output.Patch
316
+ if state.Patch != nil && strings.TrimSpace(state.Patch.RawDiff) != "" {
317
+ parsedPatch, parseErr := patch.NewParser().Parse(state.Patch.RawDiff)
318
+ if parseErr != nil {
319
+ state.ValidationResults = append(state.ValidationResults, models.ValidationResult{
320
+ Name: "patch_parse_valid",
321
+ Stage: "validation",
322
+ Status: models.ValidationFail,
323
+ Severity: models.SeverityHigh,
324
+ Summary: parseErr.Error(),
325
+ })
326
+ return fmt.Errorf("code generation produced an invalid patch: %w", parseErr)
327
+ }
328
+ parsedPatch.TaskID = state.Task.ID
329
+ state.Patch = parsedPatch
330
+ }
331
+ state.RetryDirective = nil
332
+ o.log.Log("coder", "code", "Code changes generated")
333
+ return nil
334
+ }
335
+
336
+ func (o *Orchestrator) stepValidate(state *models.RunState) error {
337
+ if err := Transition(state, models.StatusValidating); err != nil {
338
+ return err
339
+ }
340
+ o.log.Log("validator", "validate", "Validating patch...")
341
+ state.ValidationResults = []models.ValidationResult{}
342
+
343
+ if state.Patch == nil {
344
+ result := models.ValidationResult{
345
+ Name: "patch_present",
346
+ Stage: "validation",
347
+ Status: models.ValidationFail,
348
+ Severity: models.SeverityHigh,
349
+ Summary: "no patch found to validate",
350
+ }
351
+ state.ValidationResults = append(state.ValidationResults, result)
352
+ return fmt.Errorf("%s", result.Summary)
353
+ }
354
+
355
+ state.ValidationResults = append(state.ValidationResults, models.ValidationResult{
356
+ Name: "patch_parse_valid",
357
+ Stage: "validation",
358
+ Status: models.ValidationPass,
359
+ Severity: models.SeverityLow,
360
+ Summary: "patch parsed successfully",
361
+ })
362
+
363
+ if err := o.patchPipeline.Validate(state.Patch); err != nil {
364
+ state.ValidationResults = append(state.ValidationResults, models.ValidationResult{
365
+ Name: "patch_hygiene",
366
+ Stage: "validation",
367
+ Status: models.ValidationFail,
368
+ Severity: models.SeverityHigh,
369
+ Summary: err.Error(),
370
+ })
371
+ if o.cfg.Safety.FeatureFlags.PatchConflictReporting {
372
+ return fmt.Errorf("patch validation failed (impacted files: %s): %w", strings.Join(patchFilePaths(state.Patch), ", "), err)
373
+ }
374
+ return fmt.Errorf("patch validation failed: %w", err)
375
+ }
376
+ state.ValidationResults = append(state.ValidationResults, models.ValidationResult{
377
+ Name: "patch_hygiene",
378
+ Stage: "validation",
379
+ Status: models.ValidationPass,
380
+ Severity: models.SeverityLow,
381
+ Summary: "patch passed patch hygiene validation",
382
+ })
383
+
384
+ scopeResult := o.scopeGuard.Validate(state.ExecutionContract, state.Patch)
385
+ state.ValidationResults = append(state.ValidationResults, scopeResult)
386
+ if scopeResult.Status == models.ValidationFail {
387
+ return fmt.Errorf("%s", scopeResult.Summary)
388
+ }
389
+
390
+ planResult := o.planGuard.Validate(state.Plan, state.ExecutionContract, state.Patch)
391
+ state.ValidationResults = append(state.ValidationResults, planResult)
392
+ if planResult.Status == models.ValidationFail {
393
+ return fmt.Errorf("%s", planResult.Summary)
394
+ }
395
+
396
+ o.log.Log("validator", "validate", fmt.Sprintf("Patch validated with %d gate results", len(state.ValidationResults)))
397
+ return nil
398
+ }
399
+
400
+ func (o *Orchestrator) stepTest(state *models.RunState) error {
401
+ if err := Transition(state, models.StatusTesting); err != nil {
402
+ return err
403
+ }
404
+ o.log.Log("test", "test", "Running tests...")
405
+ state.TestFailures = nil
406
+ state.ValidationResults = filterOutStage(state.ValidationResults, "test")
407
+
408
+ result, err := o.toolRegistry.Execute("run_tests", map[string]string{"command": o.cfg.Commands.Test})
409
+ if err != nil {
410
+ state.ValidationResults = append(state.ValidationResults,
411
+ models.ValidationResult{
412
+ Name: "required_tests_executed",
413
+ Stage: "test",
414
+ Status: models.ValidationFail,
415
+ Severity: models.SeverityHigh,
416
+ Summary: "failed to start test command",
417
+ },
418
+ )
419
+ state.TestFailures = o.testClassifier.Classify("", err.Error())
420
+ state.TestResults = strings.TrimSpace(err.Error())
421
+ return fmt.Errorf("failed to start test command: %w", err)
422
+ }
423
+
424
+ if result == nil {
425
+ state.ValidationResults = append(state.ValidationResults,
426
+ models.ValidationResult{
427
+ Name: "required_tests_executed",
428
+ Stage: "test",
429
+ Status: models.ValidationFail,
430
+ Severity: models.SeverityHigh,
431
+ Summary: "test result was not returned",
432
+ },
433
+ )
434
+ state.TestFailures = o.testClassifier.Classify("", "test result was not returned")
435
+ return fmt.Errorf("test result was not returned")
436
+ }
437
+
438
+ state.ValidationResults = append(state.ValidationResults,
439
+ models.ValidationResult{
440
+ Name: "required_tests_executed",
441
+ Stage: "test",
442
+ Status: models.ValidationPass,
443
+ Severity: models.SeverityLow,
444
+ Summary: "required tests were executed",
445
+ },
446
+ )
447
+
448
+ state.TestResults = strings.TrimSpace(result.Output)
449
+ if !result.Success {
450
+ o.log.Log("test", "test", "Tests failed")
451
+ if state.TestResults == "" {
452
+ state.TestResults = strings.TrimSpace(result.Error)
453
+ }
454
+ state.TestFailures = o.testClassifier.Classify(result.Output, result.Error)
455
+ summaries := make([]string, 0, len(state.TestFailures))
456
+ for _, failure := range state.TestFailures {
457
+ summaries = append(summaries, failure.Code+": "+failure.Summary)
458
+ }
459
+ state.ValidationResults = append(state.ValidationResults,
460
+ models.ValidationResult{
461
+ Name: "required_tests_passed",
462
+ Stage: "test",
463
+ Status: models.ValidationFail,
464
+ Severity: models.SeverityHigh,
465
+ Summary: strings.Join(summaries, " | "),
466
+ },
467
+ )
468
+ return fmt.Errorf("tests failed: %s", strings.TrimSpace(result.Error))
469
+ }
470
+
471
+ state.ValidationResults = append(state.ValidationResults,
472
+ models.ValidationResult{
473
+ Name: "required_tests_passed",
474
+ Stage: "test",
475
+ Status: models.ValidationPass,
476
+ Severity: models.SeverityLow,
477
+ Summary: "required tests passed",
478
+ },
479
+ )
480
+ state.TestFailures = nil
481
+ o.log.Log("test", "test", "Tests completed")
482
+ return nil
483
+ }
484
+
485
+ func (o *Orchestrator) stepReview(state *models.RunState) error {
486
+ if err := Transition(state, models.StatusReviewing); err != nil {
487
+ return err
488
+ }
489
+ o.log.Log("reviewer", "review", "Reviewing changes...")
490
+ state.ValidationResults = filterOutStage(state.ValidationResults, "review")
491
+ if o.providerReady {
492
+ o.log.Log("provider", "reviewer", fmt.Sprintf("model=%s", o.cfg.Provider.OpenAI.Models.Reviewer))
493
+ }
494
+
495
+ input := &agents.Input{
496
+ Task: &state.Task,
497
+ TaskBrief: state.TaskBrief,
498
+ Plan: state.Plan,
499
+ ExecutionContract: state.ExecutionContract,
500
+ Patch: state.Patch,
501
+ ValidationResults: state.ValidationResults,
502
+ TestResults: state.TestResults,
503
+ }
504
+
505
+ output, err := o.reviewer.Execute(input)
506
+ if err != nil {
507
+ return fmt.Errorf("review failed: %w", err)
508
+ }
509
+
510
+ var providerReview *models.ReviewResult
511
+ if output != nil {
512
+ providerReview = output.Review
513
+ }
514
+ scorecard, finalReview := o.reviewEngine.Evaluate(state, providerReview)
515
+ state.ReviewScorecard = scorecard
516
+ state.Review = finalReview
517
+ state.Confidence = o.confidenceScorer.Score(state)
518
+ if state.Review == nil {
519
+ return fmt.Errorf("review engine did not produce a review result")
520
+ }
521
+ if state.Confidence != nil {
522
+ o.log.Log("confidence", "score", fmt.Sprintf("score=%.2f band=%s", state.Confidence.Score, state.Confidence.Band))
523
+ }
524
+ if err := o.confidencePolicy.Apply(state); err != nil {
525
+ return fmt.Errorf("confidence policy blocked completion: %w", err)
526
+ }
527
+ o.log.Log("reviewer", "review", fmt.Sprintf("Review completed: %s", state.Review.Decision))
528
+ return nil
529
+ }
530
+
531
+ func (o *Orchestrator) stepValidateWithRetries(state *models.RunState) error {
532
+ maxRetries := 0
533
+ if o.cfg.Safety.FeatureFlags.RetryLimits {
534
+ maxRetries = o.cfg.Safety.Retry.ValidationMax
535
+ }
536
+
537
+ for {
538
+ err := o.stepValidate(state)
539
+ if err == nil {
540
+ return nil
541
+ }
542
+
543
+ if state.Retries.Validation >= maxRetries {
544
+ o.addUnresolvedFailure(state, "validation", err)
545
+ return err
546
+ }
547
+
548
+ state.Retries.Validation++
549
+ state.RetryDirective = o.retryBuilder.FromValidation(state, state.Retries.Validation)
550
+ if state.RetryDirective != nil {
551
+ o.log.Log("orchestrator", "retry_contract", fmt.Sprintf("stage=%s attempt=%d failed_gates=%s", state.RetryDirective.Stage, state.RetryDirective.Attempt, strings.Join(state.RetryDirective.FailedGates, ",")))
552
+ }
553
+ o.log.Log("orchestrator", "retry", fmt.Sprintf("Validation failed, retrying code generation (%d/%d)", state.Retries.Validation, maxRetries))
554
+ if codeErr := o.stepCode(state); codeErr != nil {
555
+ o.addUnresolvedFailure(state, "coding-after-validation", codeErr)
556
+ return codeErr
557
+ }
558
+ }
559
+ }
560
+
561
+ func (o *Orchestrator) stepTestWithRetries(state *models.RunState) error {
562
+ maxRetries := 0
563
+ if o.cfg.Safety.FeatureFlags.RetryLimits {
564
+ maxRetries = o.cfg.Safety.Retry.TestMax
565
+ }
566
+
567
+ for {
568
+ err := o.stepTest(state)
569
+ if err == nil {
570
+ return nil
571
+ }
572
+
573
+ if state.Retries.Testing >= maxRetries {
574
+ o.addUnresolvedFailure(state, "test", err)
575
+ return err
576
+ }
577
+
578
+ state.Retries.Testing++
579
+ state.RetryDirective = o.retryBuilder.FromTest(state, state.Retries.Testing)
580
+ if state.RetryDirective != nil {
581
+ o.log.Log("orchestrator", "retry_contract", fmt.Sprintf("stage=%s attempt=%d failed_tests=%d", state.RetryDirective.Stage, state.RetryDirective.Attempt, len(state.RetryDirective.FailedTests)))
582
+ }
583
+ o.log.Log("orchestrator", "retry", fmt.Sprintf("Tests failed, retrying code generation (%d/%d)", state.Retries.Testing, maxRetries))
584
+ if codeErr := o.stepCode(state); codeErr != nil {
585
+ o.addUnresolvedFailure(state, "coding-after-test", codeErr)
586
+ return codeErr
587
+ }
588
+ if validateErr := o.stepValidate(state); validateErr != nil {
589
+ o.addUnresolvedFailure(state, "validation-after-test", validateErr)
590
+ return validateErr
591
+ }
592
+ }
593
+ }
594
+
595
+ func (o *Orchestrator) stepReviewWithRetries(state *models.RunState) error {
596
+ maxRetries := 0
597
+ if o.cfg.Safety.FeatureFlags.RetryLimits {
598
+ maxRetries = o.cfg.Safety.Retry.ReviewMax
599
+ }
600
+
601
+ for {
602
+ err := o.stepReview(state)
603
+ if err != nil {
604
+ o.addUnresolvedFailure(state, "review", err)
605
+ return err
606
+ }
607
+
608
+ if state.Review == nil || state.Review.Decision != models.ReviewRevise {
609
+ return nil
610
+ }
611
+
612
+ if state.Retries.Review >= maxRetries {
613
+ err = fmt.Errorf("review requested revise beyond retry limit")
614
+ o.addUnresolvedFailure(state, "review-revise", err)
615
+ return err
616
+ }
617
+
618
+ state.Retries.Review++
619
+ state.RetryDirective = o.retryBuilder.FromReview(state, state.Retries.Review)
620
+ if state.RetryDirective != nil {
621
+ o.log.Log("orchestrator", "retry_contract", fmt.Sprintf("stage=%s attempt=%d reasons=%d", state.RetryDirective.Stage, state.RetryDirective.Attempt, len(state.RetryDirective.Reasons)))
622
+ }
623
+ o.log.Log("orchestrator", "retry", fmt.Sprintf("Review requested revise, retrying code generation (%d/%d)", state.Retries.Review, maxRetries))
624
+
625
+ if codeErr := o.stepCode(state); codeErr != nil {
626
+ o.addUnresolvedFailure(state, "coding-after-review", codeErr)
627
+ return codeErr
628
+ }
629
+ if validateErr := o.stepValidateWithRetries(state); validateErr != nil {
630
+ return validateErr
631
+ }
632
+ if testErr := o.stepTestWithRetries(state); testErr != nil {
633
+ return testErr
634
+ }
635
+ }
636
+ }
637
+
638
+ func (o *Orchestrator) addUnresolvedFailure(state *models.RunState, stage string, err error) {
639
+ failure := fmt.Sprintf("%s: %v", stage, err)
640
+ state.UnresolvedFailures = append(state.UnresolvedFailures, failure)
641
+ state.BestPatchSummary = patch.Summarize(state.Patch)
642
+ o.log.Log("orchestrator", "unresolved", failure)
643
+ }
644
+
645
+ func filterOutStage(results []models.ValidationResult, stage string) []models.ValidationResult {
646
+ filtered := make([]models.ValidationResult, 0, len(results))
647
+ for _, result := range results {
648
+ if strings.EqualFold(strings.TrimSpace(result.Stage), strings.TrimSpace(stage)) {
649
+ continue
650
+ }
651
+ filtered = append(filtered, result)
652
+ }
653
+ return filtered
654
+ }
655
+
656
+ func (o *Orchestrator) compilePlanArtifacts(task *models.Task, repoMap *models.RepoMap) (*models.TaskBrief, *models.Plan) {
657
+ taskBrief := planning.NormalizeTask(task)
658
+ compiledPlan := planning.CompilePlan(task, taskBrief, repoMap)
659
+ return taskBrief, compiledPlan
660
+ }
661
+
662
+ func buildPolicy(cfg *config.Config, mode string) tools.Policy {
663
+ policy := tools.Policy{Mode: mode}
664
+ if cfg != nil {
665
+ policy.RequireDestructiveApproval = cfg.Safety.RequireDestructiveApproval
666
+ }
667
+ if mode == tools.ModePlan {
668
+ policy.RequireDestructiveApproval = false
669
+ }
670
+ return policy
671
+ }
672
+
673
+ func patchFilePaths(p *models.Patch) []string {
674
+ if p == nil {
675
+ return []string{"unknown"}
676
+ }
677
+ paths := make([]string, 0, len(p.Files))
678
+ for _, file := range p.Files {
679
+ if strings.TrimSpace(file.Path) == "" {
680
+ continue
681
+ }
682
+ paths = append(paths, file.Path)
683
+ }
684
+ if len(paths) == 0 {
685
+ return []string{"unknown"}
686
+ }
687
+ return paths
688
+ }
689
+
690
+ func (o *Orchestrator) fail(state *models.RunState, err error) (*models.RunState, error) {
691
+ o.log.Log("orchestrator", "fail", fmt.Sprintf("Error: %v", err))
692
+ state.Error = err.Error()
693
+ _ = Transition(state, models.StatusFailed)
694
+
695
+ now := time.Now()
696
+ state.CompletedAt = &now
697
+ state.Logs = o.log.Entries()
698
+ _ = o.log.Save()
699
+
700
+ return state, err
701
+ }