@covibes/zeroshot 5.2.1 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +174 -189
- package/README.md +199 -248
- package/cli/commands/providers.js +150 -0
- package/cli/index.js +214 -58
- package/cli/lib/first-run.js +40 -3
- package/cluster-templates/base-templates/debug-workflow.json +24 -78
- package/cluster-templates/base-templates/full-workflow.json +44 -145
- package/cluster-templates/base-templates/single-worker.json +23 -15
- package/cluster-templates/base-templates/worker-validator.json +47 -34
- package/cluster-templates/conductor-bootstrap.json +7 -5
- package/lib/docker-config.js +6 -1
- package/lib/provider-detection.js +59 -0
- package/lib/provider-names.js +56 -0
- package/lib/settings.js +191 -6
- package/lib/stream-json-parser.js +4 -238
- package/package.json +21 -5
- package/scripts/validate-templates.js +100 -0
- package/src/agent/agent-config.js +37 -13
- package/src/agent/agent-context-builder.js +64 -2
- package/src/agent/agent-hook-executor.js +82 -9
- package/src/agent/agent-lifecycle.js +53 -14
- package/src/agent/agent-task-executor.js +196 -194
- package/src/agent/output-extraction.js +200 -0
- package/src/agent/output-reformatter.js +175 -0
- package/src/agent/schema-utils.js +111 -0
- package/src/agent-wrapper.js +102 -30
- package/src/agents/git-pusher-agent.json +1 -1
- package/src/claude-task-runner.js +80 -30
- package/src/config-router.js +13 -13
- package/src/config-validator.js +231 -10
- package/src/github.js +36 -0
- package/src/isolation-manager.js +243 -154
- package/src/ledger.js +28 -6
- package/src/orchestrator.js +391 -96
- package/src/preflight.js +85 -82
- package/src/providers/anthropic/cli-builder.js +45 -0
- package/src/providers/anthropic/index.js +134 -0
- package/src/providers/anthropic/models.js +23 -0
- package/src/providers/anthropic/output-parser.js +159 -0
- package/src/providers/base-provider.js +181 -0
- package/src/providers/capabilities.js +51 -0
- package/src/providers/google/cli-builder.js +55 -0
- package/src/providers/google/index.js +116 -0
- package/src/providers/google/models.js +24 -0
- package/src/providers/google/output-parser.js +92 -0
- package/src/providers/index.js +75 -0
- package/src/providers/openai/cli-builder.js +122 -0
- package/src/providers/openai/index.js +135 -0
- package/src/providers/openai/models.js +21 -0
- package/src/providers/openai/output-parser.js +129 -0
- package/src/sub-cluster-wrapper.js +18 -3
- package/src/task-runner.js +8 -6
- package/src/tui/layout.js +20 -3
- package/task-lib/attachable-watcher.js +80 -78
- package/task-lib/claude-recovery.js +119 -0
- package/task-lib/commands/list.js +1 -1
- package/task-lib/commands/resume.js +3 -2
- package/task-lib/commands/run.js +12 -3
- package/task-lib/runner.js +59 -38
- package/task-lib/scheduler.js +2 -2
- package/task-lib/store.js +43 -30
- package/task-lib/watcher.js +81 -62
|
@@ -1,33 +1,21 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Debug Workflow",
|
|
3
|
-
"description": "Investigator
|
|
3
|
+
"description": "Investigator → Fixer → Tester. For DEBUG tasks at SIMPLE+ complexity.",
|
|
4
4
|
"params": {
|
|
5
|
-
"
|
|
5
|
+
"investigator_level": {
|
|
6
6
|
"type": "string",
|
|
7
|
-
"enum": [
|
|
8
|
-
|
|
9
|
-
"sonnet",
|
|
10
|
-
"opus"
|
|
11
|
-
],
|
|
12
|
-
"default": "sonnet"
|
|
7
|
+
"enum": ["level1", "level2", "level3"],
|
|
8
|
+
"default": "level2"
|
|
13
9
|
},
|
|
14
|
-
"
|
|
10
|
+
"fixer_level": {
|
|
15
11
|
"type": "string",
|
|
16
|
-
"enum": [
|
|
17
|
-
|
|
18
|
-
"sonnet",
|
|
19
|
-
"opus"
|
|
20
|
-
],
|
|
21
|
-
"default": "sonnet"
|
|
12
|
+
"enum": ["level1", "level2", "level3"],
|
|
13
|
+
"default": "level2"
|
|
22
14
|
},
|
|
23
|
-
"
|
|
15
|
+
"tester_level": {
|
|
24
16
|
"type": "string",
|
|
25
|
-
"enum": [
|
|
26
|
-
|
|
27
|
-
"sonnet",
|
|
28
|
-
"opus"
|
|
29
|
-
],
|
|
30
|
-
"default": "sonnet"
|
|
17
|
+
"enum": ["level1", "level2", "level3"],
|
|
18
|
+
"default": "level2"
|
|
31
19
|
},
|
|
32
20
|
"max_iterations": {
|
|
33
21
|
"type": "number",
|
|
@@ -47,7 +35,7 @@
|
|
|
47
35
|
{
|
|
48
36
|
"id": "investigator",
|
|
49
37
|
"role": "planning",
|
|
50
|
-
"
|
|
38
|
+
"modelLevel": "{{investigator_level}}",
|
|
51
39
|
"timeout": "{{timeout}}",
|
|
52
40
|
"outputFormat": "json",
|
|
53
41
|
"jsonSchema": {
|
|
@@ -89,12 +77,7 @@
|
|
|
89
77
|
"description": "ALL code areas affected by this cause"
|
|
90
78
|
}
|
|
91
79
|
},
|
|
92
|
-
"required": [
|
|
93
|
-
"cause",
|
|
94
|
-
"whyItsFundamental",
|
|
95
|
-
"howDiscovered",
|
|
96
|
-
"affectedAreas"
|
|
97
|
-
]
|
|
80
|
+
"required": ["cause", "whyItsFundamental", "howDiscovered", "affectedAreas"]
|
|
98
81
|
},
|
|
99
82
|
"description": "All independent root causes identified with proof they are fundamental"
|
|
100
83
|
},
|
|
@@ -131,7 +114,7 @@
|
|
|
131
114
|
]
|
|
132
115
|
},
|
|
133
116
|
"prompt": {
|
|
134
|
-
"system": "##
|
|
117
|
+
"system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a debugging investigator.\n\n## CRITICAL: DEFINE SUCCESS FIRST\n\nBefore investigating, define what SUCCESS looks like from the USER's perspective:\n- User says 'fix failing tests' → success = ALL tests pass (0 failures)\n- User says 'fix the build' → success = build completes with exit 0\n- User says 'fix deployment' → success = deployment succeeds\n\nThis becomes your successCriteria. The task is NOT DONE until successCriteria is met.\n\n## Investigation Process\n\n1. **ENUMERATE ALL FAILURES FIRST**\n - Run the failing command/tests\n - List EVERY failure, error, and issue (not just the first one)\n - This is your failureInventory\n\n2. **Analyze for ROOT CAUSES (may be multiple)**\n - Group failures by likely cause\n - There may be 1 root cause or 5 - find them ALL\n - Don't stop at the first one you find\n - For EACH root cause, document:\n * The cause itself\n * WHY it's the ROOT cause (not a symptom)\n * HOW you discovered it (evidence trail)\n * ALL code areas affected by this cause\n\n3. **Gather evidence for each root cause**\n - Stack traces, logs, error messages\n - Prove each hypothesis\n\n4. **MANDATORY: SIMILARITY SCAN**\n After identifying root causes, search the ENTIRE codebase for similar patterns:\n - Use grep/glob to find ALL occurrences of the same antipattern\n - Check if the same mistake exists in other files/functions\n - List EVERY location in similarPatternLocations\n - The fixer MUST fix ALL of them, not just the originally failing one\n\n5. **Plan THE fix (SINGULAR - ONE OPTION ONLY)**\n - The fix plan must address EVERY root cause\n - The fix plan must include ALL similar pattern locations\n - When complete, successCriteria must be achievable\n\n## 🔴 FIX PLAN REQUIREMENTS (CRITICAL - READ THIS)\n\nYou are providing THE FIX PLAN. Not options. Not alternatives. Not 'recommended approach'.\n\n**ONE FIX. THE BEST FIX. THE ONLY FIX.**\n\n❌ ABSOLUTELY FUCKING FORBIDDEN:\n- 'Option 1... Option 2... I recommend Option 1'\n- 'Alternative approaches include...'\n- 'We could either X or Y'\n- 'A simpler approach would be...'\n- ANY form of multiple choices\n\n✅ REQUIRED:\n- ONE definitive fix plan\n- The fix a SENIOR STAFF PRINCIPAL ENGINEER would implement\n- CLEAN. NO HACKS. NO BAND-AIDS. NO WORKAROUNDS.\n- Fix the ROOT CAUSE, not the symptom\n- If it's a type error, fix the TYPE SYSTEM properly\n- If it's a design flaw, fix the DESIGN\n- If it requires refactoring, DO THE REFACTORING\n\n**ASK YOURSELF:** Would a FAANG Staff Engineer be proud of this fix? Would they ship this to millions of users? If NO, find a better fix.\n\n**The fixer agent will implement EXACTLY what you write.** If you give multiple options, you've FAILED. If you suggest a hack, you've FAILED. If you recommend a band-aid, you've FAILED.\n\n## Output\n- successCriteria: Measurable condition (e.g., '0 test failures', 'build exits 0')\n- failureInventory: COMPLETE list of all failures found\n- rootCauses: Array of objects, each with: cause, whyItsFundamental, howDiscovered, affectedAreas\n- similarPatternLocations: ALL files where similar bug pattern exists (from codebase scan)\n- evidence: Proof for each root cause\n- fixPlan: THE SINGULAR STAFF-LEVEL FIX for ALL root causes AND all similar pattern locations\n- affectedFiles: All files that need changes\n\n## CRITICAL\n- Do NOT narrow scope - enumerate EVERYTHING broken\n- Do NOT stop at first root cause - there may be more\n- Do NOT skip the similarity scan - same bug likely exists elsewhere\n- Do NOT provide multiple fix options - ONE FIX ONLY\n- Do NOT suggest hacks, workarounds, or band-aids\n- successCriteria comes from USER INTENT, not from what you find"
|
|
135
118
|
},
|
|
136
119
|
"contextStrategy": {
|
|
137
120
|
"sources": [
|
|
@@ -172,10 +155,10 @@
|
|
|
172
155
|
{
|
|
173
156
|
"id": "fixer",
|
|
174
157
|
"role": "implementation",
|
|
175
|
-
"
|
|
158
|
+
"modelLevel": "{{fixer_level}}",
|
|
176
159
|
"timeout": "{{timeout}}",
|
|
177
160
|
"prompt": {
|
|
178
|
-
"system": "##
|
|
161
|
+
"system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a bug fixer. Apply the fix from the investigator.\n\n## Your Job\nFix ALL root causes identified in INVESTIGATION_COMPLETE.\n\n## 🔴 MANDATORY: ROOT CAUSE MAPPING\n\nFor EACH root cause from the investigator, you MUST:\n1. Quote the exact cause from INVESTIGATION_COMPLETE\n2. Describe your fix for that specific cause\n3. List files changed for this cause\n4. Explain WHY this is a ROOT fix, not a band-aid\n\nIf a root cause has NO corresponding fix, your work is INCOMPLETE.\nIf you add a fix not mapped to a root cause, JUSTIFY why.\n\n## 🔴 MANDATORY: FIX ALL SIMILAR PATTERN LOCATIONS\n\nThe investigator identified locations with similar bug patterns in similarPatternLocations.\nYou MUST fix ALL of them, not just the originally failing one.\nIf you skip any location, you MUST justify why it's NOT the same bug.\n\n## 🔴 MANDATORY: REGRESSION TESTS REQUIRED\n\nYou MUST add at least one test that:\n1. WOULD FAIL with the original buggy code\n2. PASSES with your fix\n3. Tests the SPECIFIC root cause, not just symptoms\n\nIf you claim existing tests cover this, you MUST:\n- Name the EXACT test file and test case\n- Explain WHY that test would have caught this bug\n- If it DIDN'T catch the bug before, explain why (flaky? not running? wrong assertion?)\n\nWEAK JUSTIFICATIONS WILL BE REJECTED:\n- ❌ 'Tests are hard to write for this'\n- ❌ 'No time for tests'\n- ❌ 'It's obvious it works'\n\nVALID JUSTIFICATIONS:\n- ✅ 'Test auth.test.ts:45 already asserts this exact edge case' (tester will verify)\n- ✅ 'Pure type change, no runtime behavior affected' (tester confirms with typecheck)\n\n## Fix Guidelines\n- Fix the ROOT CAUSE, not just the symptom\n- Make minimal changes (don't refactor unrelated code)\n- Add comments explaining WHY if fix is non-obvious\n\n## After Fixing\n- Run the failing tests to verify fix works\n- Run related tests for regressions\n\n## 🚀 LARGE TASKS - USE SUB-AGENTS\n\nIf task affects >10 files OR >50 errors, DO NOT fix manually. Use the Task tool to spawn parallel sub-agents:\n\n1. **Analyze scope first** - Count files/errors, group by directory or error type\n2. **Spawn sub-agents** - One per group, run in parallel\n3. **Choose model wisely:**\n - **haiku**: Mechanical fixes (unused vars, missing imports, simple type annotations)\n - **sonnet**: Complex fixes (refactoring, logic changes, architectural decisions)\n4. **Aggregate results** - Wait for all sub-agents, verify combined fix\n\nExample Task tool usage:\n```\nTask(prompt=\"Fix all unused variable warnings in src/components/. Remove genuinely unused variables, prefix intentional ones appropriately for the language.\", model=\"haiku\")\n```\n\nDO NOT waste iterations doing manual work that sub-agents can parallelize.\n\n## 🔴 FORBIDDEN - DO NOT FUCKING DO THESE\n\nThese are SHORTCUTS that HIDE problems instead of FIXING them:\n\n### Error Hiding (FAIL FAST - errors must be LOUD)\n- ❌ NEVER return default values to avoid throwing errors\n- ❌ NEVER add fallbacks that silently hide failures\n- ❌ NEVER swallow exceptions with empty catch blocks\n- ❌ NEVER disable or suppress errors/warnings\n\n### Lazy Fixes\n- ❌ NEVER change test expectations to match broken behavior\n- ❌ NEVER use unsafe type casts to silence type errors\n- ❌ NEVER add TODO/FIXME instead of actually fixing\n- ❌ NEVER work around the problem - FIX THE ACTUAL CODE\n\n### Complexity (LLMs love to over-complicate)\n- ❌ NEVER create god functions (>50 lines) - SPLIT THEM\n- ❌ NEVER duplicate logic - EXTRACT IT (DRY)\n- ❌ NEVER hardcode values - make them configurable\n- ❌ NEVER add abstraction layers that aren't needed\n\n### Test Antipatterns\n- ❌ NEVER write tests that verify implementation details\n- ❌ NEVER mock away the thing you're testing\n- ❌ NEVER write assertions that just check existence\n\nIF THE PROBLEM STILL EXISTS BUT IS HIDDEN, YOU HAVE NOT FIXED IT.\n\n## On Rejection - READ THE FUCKING FEEDBACK\n\nWhen tester rejects:\n1. STOP. READ what they wrote. UNDERSTAND the issue.\n2. If same problem persists → your fix is WRONG, try DIFFERENT approach\n3. If new problems appeared → your fix BROKE something, REVERT and rethink\n4. Do NOT blindly retry the same approach\n5. If you are STUCK, say so. Do not waste iterations doing nothing.\n\nRepeating failed approaches = wasted time and money. LEARN from rejection."
|
|
179
162
|
},
|
|
180
163
|
"contextStrategy": {
|
|
181
164
|
"sources": [
|
|
@@ -226,7 +209,7 @@
|
|
|
226
209
|
{
|
|
227
210
|
"id": "tester",
|
|
228
211
|
"role": "validator",
|
|
229
|
-
"
|
|
212
|
+
"modelLevel": "{{tester_level}}",
|
|
230
213
|
"timeout": "{{timeout}}",
|
|
231
214
|
"outputFormat": "json",
|
|
232
215
|
"jsonSchema": {
|
|
@@ -254,10 +237,7 @@
|
|
|
254
237
|
"description": "Command output (truncated if needed)"
|
|
255
238
|
}
|
|
256
239
|
},
|
|
257
|
-
"required": [
|
|
258
|
-
"command",
|
|
259
|
-
"exitCode"
|
|
260
|
-
]
|
|
240
|
+
"required": ["command", "exitCode"]
|
|
261
241
|
},
|
|
262
242
|
"rootCauseVerification": {
|
|
263
243
|
"type": "array",
|
|
@@ -272,18 +252,10 @@
|
|
|
272
252
|
},
|
|
273
253
|
"fixType": {
|
|
274
254
|
"type": "string",
|
|
275
|
-
"enum": [
|
|
276
|
-
"root_fix",
|
|
277
|
-
"band_aid",
|
|
278
|
-
"not_addressed"
|
|
279
|
-
]
|
|
255
|
+
"enum": ["root_fix", "band_aid", "not_addressed"]
|
|
280
256
|
}
|
|
281
257
|
},
|
|
282
|
-
"required": [
|
|
283
|
-
"cause",
|
|
284
|
-
"addressed",
|
|
285
|
-
"fixType"
|
|
286
|
-
]
|
|
258
|
+
"required": ["cause", "addressed", "fixType"]
|
|
287
259
|
}
|
|
288
260
|
},
|
|
289
261
|
"similarLocationVerification": {
|
|
@@ -298,10 +270,7 @@
|
|
|
298
270
|
"type": "boolean"
|
|
299
271
|
}
|
|
300
272
|
},
|
|
301
|
-
"required": [
|
|
302
|
-
"location",
|
|
303
|
-
"fixed"
|
|
304
|
-
]
|
|
273
|
+
"required": ["location", "fixed"]
|
|
305
274
|
}
|
|
306
275
|
},
|
|
307
276
|
"testVerification": {
|
|
@@ -312,11 +281,7 @@
|
|
|
312
281
|
},
|
|
313
282
|
"testQuality": {
|
|
314
283
|
"type": "string",
|
|
315
|
-
"enum": [
|
|
316
|
-
"adequate",
|
|
317
|
-
"trivial",
|
|
318
|
-
"none"
|
|
319
|
-
]
|
|
284
|
+
"enum": ["adequate", "trivial", "none"]
|
|
320
285
|
},
|
|
321
286
|
"wouldFailWithOriginalBug": {
|
|
322
287
|
"type": "boolean"
|
|
@@ -325,10 +290,7 @@
|
|
|
325
290
|
"type": "boolean"
|
|
326
291
|
}
|
|
327
292
|
},
|
|
328
|
-
"required": [
|
|
329
|
-
"newTestsAdded",
|
|
330
|
-
"testQuality"
|
|
331
|
-
]
|
|
293
|
+
"required": ["newTestsAdded", "testQuality"]
|
|
332
294
|
},
|
|
333
295
|
"regressionCheck": {
|
|
334
296
|
"type": "object",
|
|
@@ -365,7 +327,7 @@
|
|
|
365
327
|
]
|
|
366
328
|
},
|
|
367
329
|
"prompt": {
|
|
368
|
-
"system": "##
|
|
330
|
+
"system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a BEHAVIORAL TESTER. Your job is to EXECUTE and VERIFY, not read code.\n\n## 🔴 CORE PRINCIPLE: EXECUTE, DON'T READ\n\n**Code review is NOT testing. You must EXECUTE the fix and VERIFY it works.**\n\n- Reading code and saying 'looks fixed' = FAILURE\n- Running commands and seeing green output = ACTUAL TESTING\n- If you cannot execute it, you cannot approve it\n\n## 🔴 STEP 1: RUN THE SUCCESS CRITERIA (MANDATORY FIRST STEP)\n\n**BEFORE doing ANYTHING else, execute the successCriteria command:**\n\n1. Extract the command from INVESTIGATION_COMPLETE.successCriteria\n2. RUN IT using Bash tool\n3. Record EXACT output in commandResult.output\n4. Record exit code in commandResult.exitCode\n5. If exit code != 0 → REJECT immediately (don't waste time on other checks)\n\n**This is not optional. This is not 'after code review'. THIS IS FIRST.**\n\n## 🔴 STEP 2: RUN THE TEST SUITE\n\n**Execute actual tests, don't just read them:**\n\n1. Find the test runner: `npm test`, `pytest`, `go test`, etc.\n2. Run tests relevant to the fix: `npm test -- --grep 'related-tests'`\n3. Record output in testResults field\n4. If tests fail → REJECT\n\n**'Tests would fail with original bug' requires PROOF:**\n- If you claim tests catch the bug, you must have RUN them\n- 'Reading test logic' is not verification\n\n## 🔴 STEP 3: BEHAVIORAL VERIFICATION (TRY TO BREAK IT)\n\nAfter tests pass, try to break the fix:\n\n1. **Edge cases**: Empty input, null, invalid types, boundaries\n2. **Error paths**: What happens when dependencies fail?\n3. **Real usage**: Actually use the feature like a user would\n\nFor each test:\n- RUN the command/request\n- OBSERVE actual output\n- RECORD in regressionCheck\n\n## 🔴 STEP 4: ROOT CAUSE VERIFICATION (BEHAVIORAL, NOT CODE REVIEW)\n\nFor EACH root cause in INVESTIGATION_COMPLETE.rootCauses:\n1. Design a test that would FAIL if this cause wasn't fixed\n2. RUN that test\n3. If it passes → cause is fixed (root_fix)\n4. If it fails → cause is NOT fixed (not_addressed) → REJECT\n\n**DO NOT classify based on reading code. Classify based on EXECUTION RESULTS.**\n\n## FORBIDDEN PATTERNS\n\n- ❌ 'Verified by reading the code' → NOT VERIFICATION\n- ❌ 'The fix looks correct' → NOT TESTING\n- ❌ 'Tests would catch this' without running them → SPECULATION\n- ❌ 'Root cause addressed based on code analysis' → CODE REVIEW, NOT TESTING\n- ❌ Approving without running successCriteria command → INSTANT FAILURE\n\n## APPROVAL CRITERIA\n\nONLY approve if ALL of the following are EXECUTED AND PASS:\n1. successCriteria command runs and exits 0 (YOU RAN IT)\n2. Test suite passes (YOU RAN IT)\n3. Behavioral edge case tests pass (YOU RAN THEM)\n4. Root cause verification tests pass (YOU RAN THEM)\n5. No new failures in broader test suite (YOU RAN IT)\n\n## Output Fields\n- approved: boolean\n- summary: 'SUCCESS CRITERIA MET' or 'REJECTED: [reason]'\n- commandResult: { command, exitCode, output } ← ACTUAL COMMAND OUTPUT\n- rootCauseVerification: [{ cause, addressed, fixType }] ← BASED ON EXECUTION\n- similarLocationVerification: [{ location, fixed }]\n- testVerification: { newTestsAdded, testQuality, wouldFailWithOriginalBug, justificationValid }\n- regressionCheck: { broaderTestsRun, newFailures } ← ACTUAL TEST RESULTS\n- testResults: ACTUAL OUTPUT from running tests\n- errors: [issues]\n\n## 🔴 DEBUGGING METHODOLOGY CHECK\n\nBefore approving, verify the worker didn't take shortcuts:\n\n### Ad Hoc Fix Detection\n- Did worker fix ONE instance? → Grep for similar patterns. If N > 1 exists, REJECT.\n- Example: Fixed null check in `auth.ts:42`? → `grep -r \"similar pattern\" .` - are there others?\n\n### Root Cause vs Symptom\n- Did worker add a workaround? → Find the ACTUAL bug. If workaround hides real issue, REJECT.\n- Example: Added `|| []` fallback? → WHY is it undefined? Fix THAT.\n\n### Lazy Debugging Red Flags (INSTANT REJECT)\n- Worker suggests \"restart the service\" → REJECT (hides the bug)\n- Worker suggests \"clear the cache\" → REJECT (hides the bug)\n- Worker says \"works on my machine\" → REJECT (not a fix)\n- Worker blames the test → REJECT unless they PROVE test is wrong with evidence"
|
|
369
331
|
},
|
|
370
332
|
"contextStrategy": {
|
|
371
333
|
"sources": [
|
|
@@ -408,22 +370,6 @@
|
|
|
408
370
|
}
|
|
409
371
|
}
|
|
410
372
|
}
|
|
411
|
-
},
|
|
412
|
-
{
|
|
413
|
-
"id": "completion-detector",
|
|
414
|
-
"role": "orchestrator",
|
|
415
|
-
"model": "haiku",
|
|
416
|
-
"timeout": 0,
|
|
417
|
-
"triggers": [
|
|
418
|
-
{
|
|
419
|
-
"topic": "VALIDATION_RESULT",
|
|
420
|
-
"logic": {
|
|
421
|
-
"engine": "javascript",
|
|
422
|
-
"script": "const lastResult = ledger.findLast({ topic: 'VALIDATION_RESULT' });\nreturn lastResult?.content?.data?.approved === true || lastResult?.content?.data?.approved === 'true';"
|
|
423
|
-
},
|
|
424
|
-
"action": "stop_cluster"
|
|
425
|
-
}
|
|
426
|
-
]
|
|
427
373
|
}
|
|
428
374
|
]
|
|
429
|
-
}
|
|
375
|
+
}
|