npm - @trygentic/agentloop - Versions diffs - 0.18.0-alpha.11 → 0.20.0-alpha.11 - Mend

@trygentic/agentloop 0.18.0-alpha.11 → 0.20.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +3 -3
package/templates/agents/engineer/engineer.bt.json +67 -10
package/templates/agents/product-manager/product-manager.bt.json +57 -6
package/templates/agents/qa-tester/qa-tester.bt.json +63 -8

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@trygentic/agentloop",
-  "version": "0.18.0-alpha.11",
+  "version": "0.20.0-alpha.11",
   "description": "AI-powered autonomous coding agent",
   "bin": {
     "agentloop": "./bin/agentloop"
@@ -9,8 +9,8 @@
     "postinstall": "node ./scripts/postinstall.mjs"
   },
   "optionalDependencies": {
-    "@trygentic/agentloop-darwin-arm64": "0.18.0-alpha.11",
-    "@trygentic/agentloop-linux-x64": "0.18.0-alpha.11"
+    "@trygentic/agentloop-darwin-arm64": "0.20.0-alpha.11",
+    "@trygentic/agentloop-linux-x64": "0.20.0-alpha.11"
   },
   "engines": {
     "node": ">=18.0.0"

package/templates/agents/engineer/engineer.bt.json CHANGED Viewed

@@ -19,6 +19,51 @@
           "call": "FetchTaskContext",
           "comment": "Load task details, comments, and determine if this is a QA rejection re-work"
         },
+        {
+          "type": "action",
+          "call": "LoadProjectSpecifications",
+          "comment": "Load specification documents from .agentloop/specifications/ so LLM nodes can align implementations with actual project requirements"
+        },
+        {
+          "type": "selector",
+          "comment": "Summarize project specifications if available (non-critical: skip if no specs)",
+          "children": [
+            {
+              "type": "sequence",
+              "children": [
+                {
+                  "type": "condition",
+                  "call": "HasProjectSpecifications",
+                  "comment": "Only summarize if specifications were loaded"
+                },
+                {
+                  "type": "llm-action",
+                  "name": "SummarizeProjectSpecifications",
+                  "prompt": "Distill the following project specification documents into a compact structured summary. Extract ONLY what is explicitly stated — do not infer, assume, or add anything not in the source documents.\n\n## Raw Specifications\n{{projectSpecifications}}\n\n## Output Format\nProduce a structured summary covering ONLY sections that have explicit information in the documents:\n\n### Technology Stack\nList every explicitly named technology, framework, library, and version. Example: 'Next.js 14 App Router', 'TypeScript 5.x', 'localStorage for client-side persistence'\n\n### File Structure\nList every file path, directory, or component name mentioned. Example: 'lib/cardUtils.ts', 'data/cardMeanings.json', 'components/CardSpread.tsx'\n\n### Data & Persistence\nHow data is stored, fetched, and managed. Database schema, API endpoints, storage keys, state management approach.\n\n### Domain Constraints\nExplicit rules, limits, and requirements. What the project MUST do and MUST NOT do. Example: 'No external API calls', 'Must work offline', 'Max 15 files total'\n\n### Acceptance Criteria\nTestable success conditions from the specs.\n\n### What Is NOT Used\nTechnologies or approaches explicitly excluded. Example: 'No backend server', 'No database', 'No authentication'\n\nBe exhaustive on details but terse on prose. Use bullet points. Copy exact names, paths, and values from the source — do not paraphrase technical terms.",
+                  "contextKeys": ["projectSpecifications"],
+                  "outputSchema": {
+                    "type": "object",
+                    "properties": {
+                      "summary": {
+                        "type": "string",
+                        "description": "Structured summary of project specifications"
+                      }
+                    },
+                    "required": ["summary"]
+                  },
+                  "outputKey": "projectSpecSummary",
+                  "temperature": 0.1,
+                  "allowedTools": []
+                }
+              ]
+            },
+            {
+              "type": "action",
+              "call": "NoOp",
+              "comment": "Continue without summarization if no specs or summarization fails"
+            }
+          ]
+        },
         {
           "type": "selector",
           "comment": "Check for incoming agent messages (non-critical: continue even if unavailable)",
@@ -101,14 +146,16 @@
                         {
                           "type": "llm-action",
                           "name": "AnalyzeQAFeedbackAndFix",
-                          "prompt": "You are an engineer agent handling a QA rejection. The task was previously submitted for review but QA found issues that need to be fixed.\n\nTask: {{taskDescription}}\n\nQA Feedback and Previous Comments:\n{{taskComments}}\n\nCodebase Context:\n{{codebaseContext}}\n\nPrevious Analysis (if any):\n{{taskAnalysis}}\n\n## CRITICAL: YOU MUST MAKE CODE CHANGES\nQA found issues during actual app testing (E2E/runtime). This means the current code has bugs that MUST be fixed with code changes. DO NOT just run unit tests and conclude nothing needs fixing. Unit tests passing does NOT mean runtime errors are fixed \u2014 QA tests the actual running app and found real problems.\n\nIf unit tests pass but QA reported runtime errors, that means:\n- The unit tests don't cover the failing scenario, OR\n- The bug only manifests at runtime (wrong imports, missing props, incorrect component rendering, etc.)\n\nYou MUST:\n1. Read the QA feedback carefully to understand the EXACT runtime error\n2. Find the root cause in the source code (not just the test code)\n3. Make actual code changes to fix the issue\n4. If the bug isn't obvious, add a NEW test that reproduces the QA-reported failure\n5. Verify your fix with tests\n\nDO NOT conclude 'no changes needed' \u2014 QA rejected this task because something is broken. Find it and fix it.\n\n**Codebase Exploration Strategy (FOLLOW THIS ORDER):**\n1. FIRST use agentloop-memory MCP tools for intelligent code discovery (the code graph was already indexed):\n   - `mcp__agentloop-memory__semantic_search` \u2014 find relevant code by natural language description\n   - `mcp__agentloop-memory__query` \u2014 combined semantic + structural search\n   - `mcp__agentloop-memory__find_similar_code` \u2014 find existing patterns similar to what you need\n   - `mcp__agentloop-memory__list_file_entities` \u2014 enumerate functions, classes, and exports in a file\n   - `mcp__agentloop-memory__list_entity_relationships` \u2014 trace imports, references, and dependencies\n   - `mcp__agentloop-memory__analyze_code_impact` \u2014 understand what depends on code you plan to change\n2. THEN use Read to examine specific file contents, and Grep/Glob for targeted text searches or file pattern matching\n3. If agentloop-memory tools fail or return no results after 2-3 attempts, fall back to Grep/Glob\n\nIMPORTANT: Each change must include the full 'code' field with the complete file content to write. Address ALL QA feedback - partial fixes will result in another rejection.\n\nYou MUST produce at least one code change. If you cannot find the exact bug, at minimum add a regression test that verifies the scenario QA reported.",
+                          "prompt": "You are an engineer agent handling a QA rejection. The task was previously submitted for review but QA found issues that need to be fixed.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nYour implementation MUST use the specific technologies, file paths, data storage approaches, and constraints described above. These specifications are authoritative — do not substitute alternative libraries, patterns, or approaches unless the specs are technically impossible to implement.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nYour implementation MUST use the specific technologies, file paths, and constraints described above. These specifications are authoritative.\n{{/if}}\n\nTask: {{taskDescription}}\n\nQA Feedback and Previous Comments:\n{{taskComments}}\n\nCodebase Context:\n{{codebaseContext}}\n\nPrevious Analysis (if any):\n{{taskAnalysis}}\n\n## CRITICAL: YOU MUST MAKE CODE CHANGES\nQA found issues during actual app testing (E2E/runtime). This means the current code has bugs that MUST be fixed with code changes. DO NOT just run unit tests and conclude nothing needs fixing. Unit tests passing does NOT mean runtime errors are fixed \u2014 QA tests the actual running app and found real problems.\n\nIf unit tests pass but QA reported runtime errors, that means:\n- The unit tests don't cover the failing scenario, OR\n- The bug only manifests at runtime (wrong imports, missing props, incorrect component rendering, etc.)\n\nYou MUST:\n1. Read the QA feedback carefully to understand the EXACT runtime error\n2. Find the root cause in the source code (not just the test code)\n3. Make actual code changes to fix the issue\n4. If the bug isn't obvious, add a NEW test that reproduces the QA-reported failure\n5. Verify your fix with tests\n\nDO NOT conclude 'no changes needed' \u2014 QA rejected this task because something is broken. Find it and fix it.\n\n**Codebase Exploration Strategy (FOLLOW THIS ORDER):**\n1. FIRST use agentloop-memory MCP tools for intelligent code discovery (the code graph was already indexed):\n   - `mcp__agentloop-memory__semantic_search` \u2014 find relevant code by natural language description\n   - `mcp__agentloop-memory__query` \u2014 combined semantic + structural search\n   - `mcp__agentloop-memory__find_similar_code` \u2014 find existing patterns similar to what you need\n   - `mcp__agentloop-memory__list_file_entities` \u2014 enumerate functions, classes, and exports in a file\n   - `mcp__agentloop-memory__list_entity_relationships` \u2014 trace imports, references, and dependencies\n   - `mcp__agentloop-memory__analyze_code_impact` \u2014 understand what depends on code you plan to change\n2. THEN use Read to examine specific file contents, and Grep/Glob for targeted text searches or file pattern matching\n3. If agentloop-memory tools fail or return no results after 2-3 attempts, fall back to Grep/Glob\n\nIMPORTANT: Each change must include the full 'code' field with the complete file content to write. Address ALL QA feedback - partial fixes will result in another rejection.\n\nYou MUST produce at least one code change. If you cannot find the exact bug, at minimum add a regression test that verifies the scenario QA reported.",
                           "minTurns": 5,
                           "contextKeys": [
                             "taskDescription",
                             "taskTitle",
                             "taskComments",
                             "codebaseContext",
-                            "taskAnalysis"
+                            "taskAnalysis",
+                            "projectSpecifications",
+                            "projectSpecSummary"
                           ],
                           "subagent": "engineer",
                           "maxTurns": 50,
@@ -235,12 +282,14 @@
                         {
                           "type": "llm-action",
                           "name": "AnalyzeTask",
-                          "prompt": "You are an engineer agent analyzing a task. Examine the task requirements and the codebase context provided. Determine the complexity of the task and identify which files will likely need to be modified.\n\nTask: {{taskDescription}}\n\n{{taskComments}}\n\nIMPORTANT: Only reference files and directories that appear in the codebase context. Do NOT guess or hallucinate file paths that are not listed there.\n\nIf this task was previously rejected by QA, pay close attention to the feedback in the comments above. Address ALL issues mentioned by QA in your implementation.\n\nIMPORTANT: Always plan to include tests for your implementation. Use the project's EXISTING test framework \u2014 check codebase context for test runner, test scripts, and existing test files. Do NOT add a new test framework or test runner configuration (no jest.config, vitest.config, etc.). Follow the naming conventions and import patterns found in existing test files.\n\nProvide a thorough analysis of what needs to be done, including what tests you will create.",
+                          "prompt": "You are an engineer agent analyzing a task. Examine the task requirements and the codebase context provided. Determine the complexity of the task and identify which files will likely need to be modified.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nYour implementation MUST use the specific technologies, file paths, data storage approaches, and constraints described above. These specifications are authoritative — do not substitute alternative libraries, patterns, or approaches unless the specs are technically impossible to implement.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nYour implementation MUST use the specific technologies, file paths, and constraints described above. These specifications are authoritative.\n{{/if}}\n\nTask: {{taskDescription}}\n\n{{taskComments}}\n\nIMPORTANT: Only reference files and directories that appear in the codebase context. Do NOT guess or hallucinate file paths that are not listed there.\n\nIf this task was previously rejected by QA, pay close attention to the feedback in the comments above. Address ALL issues mentioned by QA in your implementation.\n\nIMPORTANT: Always plan to include tests for your implementation. Use the project's EXISTING test framework \u2014 check codebase context for test runner, test scripts, and existing test files. Do NOT add a new test framework or test runner configuration (no jest.config, vitest.config, etc.). Follow the naming conventions and import patterns found in existing test files.\n\nProvide a thorough analysis of what needs to be done, including what tests you will create.",
                           "contextKeys": [
                             "taskDescription",
                             "taskTitle",
                             "codebaseContext",
-                            "taskComments"
+                            "taskComments",
+                            "projectSpecifications",
+                            "projectSpecSummary"
                           ],
                           "outputSchema": {
                             "type": "object",
@@ -314,12 +363,14 @@
                               "child": {
                                 "type": "llm-action",
                                 "name": "ImplementDirectly",
-                                "prompt": "Implement the task directly. Generate the code changes needed.\n\nTask: {{taskDescription}}\nAnalysis: {{taskAnalysis}}\n\n{{taskComments}}\n\nIf this task was previously rejected by QA, make sure your implementation addresses ALL the issues mentioned in their feedback.\n\nYou already have codebase context, task analysis, and task details in your context. Start implementing immediately \u2014 do NOT return empty changes or claim you need more exploration.\n\n**Codebase Exploration Strategy (FOLLOW THIS ORDER):**\n1. FIRST use agentloop-memory MCP tools for intelligent code discovery (the code graph was already indexed):\n   - `mcp__agentloop-memory__semantic_search` \u2014 find relevant code by natural language description (e.g., search for concepts, function purposes, feature areas)\n   - `mcp__agentloop-memory__query` \u2014 combined semantic + structural search for broader discovery\n   - `mcp__agentloop-memory__find_similar_code` \u2014 find existing patterns similar to what you need to implement\n   - `mcp__agentloop-memory__list_file_entities` \u2014 enumerate functions, classes, and exports in a specific file\n   - `mcp__agentloop-memory__list_entity_relationships` \u2014 trace imports, references, and dependencies between entities\n   - `mcp__agentloop-memory__analyze_code_impact` \u2014 understand what depends on code you plan to change (blast radius)\n2. THEN use Read to examine specific file contents, and Grep/Glob for targeted text searches or file pattern matching\n3. If agentloop-memory tools fail or return no results after 2-3 attempts, fall back to Grep/Glob\n\nIMPORTANT: Always include test files alongside your implementation. Create at least one test file that verifies the core functionality. Use the project's EXISTING test framework and test runner \u2014 check codebase context for what test framework the project uses, what test scripts are available, and how existing test files are structured. Do NOT install or configure a new test framework (no jest.config.js, no vitest.config.ts, etc.). Follow the naming conventions, import patterns, and directory structure of existing test files in the project.\n\n**CRITICAL: Read Before Edit Rule:**\nYou MUST call the `read` tool on any existing file BEFORE calling `edit` on it. The edit tool validates that you've read the file first. If you skip the read, the edit will fail with \"You must read file X before overwriting it.\" For new files, use the `write` tool instead of `edit`.\n\nProvide the implementation with file paths and complete code content for each file.\n\nIMPORTANT: Each change must include the full 'code' field with the complete file content to write. Include both implementation files AND test files in the changes array. You MUST produce at least one file change.\n\n**Test Configuration Rules (CRITICAL):**\n- Tests run in a non-interactive CI-like environment. NEVER configure tests to use watch mode.\n- When creating vitest.config.ts/js, always set `test: { watch: false }` or use `defineConfig({ test: { watch: false } })`.\n- When writing package.json test scripts with vitest, ALWAYS use `\"test\": \"vitest run\"` (NOT `\"test\": \"vitest\"`).\n- For jest, always include `--watchAll=false` in the test script if needed.\n- Never add `--watch` or `--watchAll` flags to test scripts.",
+                                "prompt": "Implement the task directly. Generate the code changes needed.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nYour implementation MUST use the specific technologies, file paths, data storage approaches, and constraints described above. These specifications are authoritative — do not substitute alternative libraries, patterns, or approaches unless the specs are technically impossible to implement.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nYour implementation MUST use the specific technologies, file paths, and constraints described above. These specifications are authoritative.\n{{/if}}\n\nTask: {{taskDescription}}\nAnalysis: {{taskAnalysis}}\n\n{{taskComments}}\n\nIf this task was previously rejected by QA, make sure your implementation addresses ALL the issues mentioned in their feedback.\n\nYou already have codebase context, task analysis, and task details in your context. Start implementing immediately \u2014 do NOT return empty changes or claim you need more exploration.\n\n**Codebase Exploration Strategy (FOLLOW THIS ORDER):**\n1. FIRST use agentloop-memory MCP tools for intelligent code discovery (the code graph was already indexed):\n   - `mcp__agentloop-memory__semantic_search` \u2014 find relevant code by natural language description (e.g., search for concepts, function purposes, feature areas)\n   - `mcp__agentloop-memory__query` \u2014 combined semantic + structural search for broader discovery\n   - `mcp__agentloop-memory__find_similar_code` \u2014 find existing patterns similar to what you need to implement\n   - `mcp__agentloop-memory__list_file_entities` \u2014 enumerate functions, classes, and exports in a specific file\n   - `mcp__agentloop-memory__list_entity_relationships` \u2014 trace imports, references, and dependencies between entities\n   - `mcp__agentloop-memory__analyze_code_impact` \u2014 understand what depends on code you plan to change (blast radius)\n2. THEN use Read to examine specific file contents, and Grep/Glob for targeted text searches or file pattern matching\n3. If agentloop-memory tools fail or return no results after 2-3 attempts, fall back to Grep/Glob\n\nIMPORTANT: Always include test files alongside your implementation. Create at least one test file that verifies the core functionality. Use the project's EXISTING test framework and test runner \u2014 check codebase context for what test framework the project uses, what test scripts are available, and how existing test files are structured. Do NOT install or configure a new test framework (no jest.config.js, no vitest.config.ts, etc.). Follow the naming conventions, import patterns, and directory structure of existing test files in the project.\n\n**CRITICAL: Read Before Edit Rule:**\nYou MUST call the `read` tool on any existing file BEFORE calling `edit` on it. The edit tool validates that you've read the file first. If you skip the read, the edit will fail with \"You must read file X before overwriting it.\" For new files, use the `write` tool instead of `edit`.\n\nProvide the implementation with file paths and complete code content for each file.\n\nIMPORTANT: Each change must include the full 'code' field with the complete file content to write. Include both implementation files AND test files in the changes array. You MUST produce at least one file change.\n\n**Test Configuration Rules (CRITICAL):**\n- Tests run in a non-interactive CI-like environment. NEVER configure tests to use watch mode.\n- When creating vitest.config.ts/js, always set `test: { watch: false }` or use `defineConfig({ test: { watch: false } })`.\n- When writing package.json test scripts with vitest, ALWAYS use `\"test\": \"vitest run\"` (NOT `\"test\": \"vitest\"`).\n- For jest, always include `--watchAll=false` in the test script if needed.\n- Never add `--watch` or `--watchAll` flags to test scripts.",
                                 "contextKeys": [
                                   "taskDescription",
                                   "taskAnalysis",
                                   "codebaseContext",
-                                  "taskComments"
+                                  "taskComments",
+                                  "projectSpecifications",
+                                  "projectSpecSummary"
                                 ],
                                 "subagent": "engineer",
                                 "maxTurns": 500,
@@ -444,12 +495,14 @@
                                   {
                                     "type": "llm-action",
                                     "name": "CreateImplementationPlan",
-                                    "prompt": "Create a detailed implementation plan for this complex task.\n\nTask: {{taskDescription}}\nAnalysis: {{taskAnalysis}}\n\n{{taskComments}}\n\nIMPORTANT: Only reference files and directories that appear in the codebase context. Do NOT guess or hallucinate file paths that are not listed there. Adapt your plan to match the actual project layout.\n\nIf this task was previously rejected by QA, incorporate their feedback into your plan.\n\nIMPORTANT: Include test creation as part of your implementation steps. Each step that creates functionality should have a corresponding test step or include tests within it.\n\nBreak down the implementation into clear steps with dependencies.",
+                                    "prompt": "Create a detailed implementation plan for this complex task.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nYour implementation MUST use the specific technologies, file paths, data storage approaches, and constraints described above. These specifications are authoritative — do not substitute alternative libraries, patterns, or approaches unless the specs are technically impossible to implement.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nYour implementation MUST use the specific technologies, file paths, and constraints described above. These specifications are authoritative.\n{{/if}}\n\nTask: {{taskDescription}}\nAnalysis: {{taskAnalysis}}\n\n{{taskComments}}\n\nIMPORTANT: Only reference files and directories that appear in the codebase context. Do NOT guess or hallucinate file paths that are not listed there. Adapt your plan to match the actual project layout.\n\nIf this task was previously rejected by QA, incorporate their feedback into your plan.\n\nIMPORTANT: Include test creation as part of your implementation steps. Each step that creates functionality should have a corresponding test step or include tests within it.\n\nBreak down the implementation into clear steps with dependencies.",
                                     "contextKeys": [
                                       "taskDescription",
                                       "taskAnalysis",
                                       "codebaseContext",
-                                      "taskComments"
+                                      "taskComments",
+                                      "projectSpecifications",
+                                      "projectSpecSummary"
                                     ],
                                     "outputSchema": {
                                       "type": "object",
@@ -518,13 +571,15 @@
                                   {
                                     "type": "llm-action",
                                     "name": "ImplementIncrementally",
-                                    "prompt": "Execute the implementation plan step by step.\n\nPlan: {{implementationPlan}}\nTask: {{taskDescription}}\n\n{{taskComments}}\n\nGenerate all the code changes according to the plan. Make sure to address any QA feedback from previous attempts.\n\nYou already have the codebase context, task analysis, and implementation plan in your context. Start implementing immediately \u2014 do NOT return empty changes or claim you need more exploration.\n\n**Codebase Exploration Strategy (FOLLOW THIS ORDER):**\n1. FIRST use agentloop-memory MCP tools for intelligent code discovery (the code graph was already indexed):\n   - `mcp__agentloop-memory__semantic_search` \u2014 find relevant code by natural language description (e.g., search for concepts, function purposes, feature areas)\n   - `mcp__agentloop-memory__query` \u2014 combined semantic + structural search for broader discovery\n   - `mcp__agentloop-memory__find_similar_code` \u2014 find existing patterns similar to what you need to implement\n   - `mcp__agentloop-memory__list_file_entities` \u2014 enumerate functions, classes, and exports in a specific file\n   - `mcp__agentloop-memory__list_entity_relationships` \u2014 trace imports, references, and dependencies between entities\n   - `mcp__agentloop-memory__analyze_code_impact` \u2014 understand what depends on code you plan to change (blast radius)\n2. THEN use Read to examine specific file contents, and Grep/Glob for targeted text searches or file pattern matching\n3. If agentloop-memory tools fail or return no results after 2-3 attempts, fall back to Grep/Glob\n\nIMPORTANT: Always include test files alongside your implementation. Create at least one test file that verifies the core functionality. Use the project's EXISTING test framework and test runner \u2014 check codebase context for what test framework the project uses, what test scripts are available, and how existing test files are structured. Do NOT install or configure a new test framework (no jest.config.js, no vitest.config.ts, etc.). Follow the naming conventions, import patterns, and directory structure of existing test files in the project.\n\n**CRITICAL: Read Before Edit Rule:**\nYou MUST call the `read` tool on any existing file BEFORE calling `edit` on it. The edit tool validates that you've read the file first. If you skip the read, the edit will fail with \"You must read file X before overwriting it.\" For new files, use the `write` tool instead of `edit`.\n\nIMPORTANT: Each change must include the full 'code' field with the complete file content to write. Include both implementation files AND test files in the changes array. You MUST produce at least one file change.\n\n**Test Configuration Rules (CRITICAL):**\n- Tests run in a non-interactive CI-like environment. NEVER configure tests to use watch mode.\n- When creating vitest.config.ts/js, always set `test: { watch: false }` or use `defineConfig({ test: { watch: false } })`.\n- When writing package.json test scripts with vitest, ALWAYS use `\"test\": \"vitest run\"` (NOT `\"test\": \"vitest\"`).\n- For jest, always include `--watchAll=false` in the test script if needed.\n- Never add `--watch` or `--watchAll` flags to test scripts.",
+                                    "prompt": "Execute the implementation plan step by step.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nYour implementation MUST use the specific technologies, file paths, data storage approaches, and constraints described above. These specifications are authoritative — do not substitute alternative libraries, patterns, or approaches unless the specs are technically impossible to implement.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nYour implementation MUST use the specific technologies, file paths, and constraints described above. These specifications are authoritative.\n{{/if}}\n\nPlan: {{implementationPlan}}\nTask: {{taskDescription}}\n\n{{taskComments}}\n\nGenerate all the code changes according to the plan. Make sure to address any QA feedback from previous attempts.\n\nYou already have the codebase context, task analysis, and implementation plan in your context. Start implementing immediately \u2014 do NOT return empty changes or claim you need more exploration.\n\n**Codebase Exploration Strategy (FOLLOW THIS ORDER):**\n1. FIRST use agentloop-memory MCP tools for intelligent code discovery (the code graph was already indexed):\n   - `mcp__agentloop-memory__semantic_search` \u2014 find relevant code by natural language description (e.g., search for concepts, function purposes, feature areas)\n   - `mcp__agentloop-memory__query` \u2014 combined semantic + structural search for broader discovery\n   - `mcp__agentloop-memory__find_similar_code` \u2014 find existing patterns similar to what you need to implement\n   - `mcp__agentloop-memory__list_file_entities` \u2014 enumerate functions, classes, and exports in a specific file\n   - `mcp__agentloop-memory__list_entity_relationships` \u2014 trace imports, references, and dependencies between entities\n   - `mcp__agentloop-memory__analyze_code_impact` \u2014 understand what depends on code you plan to change (blast radius)\n2. THEN use Read to examine specific file contents, and Grep/Glob for targeted text searches or file pattern matching\n3. If agentloop-memory tools fail or return no results after 2-3 attempts, fall back to Grep/Glob\n\nIMPORTANT: Always include test files alongside your implementation. Create at least one test file that verifies the core functionality. Use the project's EXISTING test framework and test runner \u2014 check codebase context for what test framework the project uses, what test scripts are available, and how existing test files are structured. Do NOT install or configure a new test framework (no jest.config.js, no vitest.config.ts, etc.). Follow the naming conventions, import patterns, and directory structure of existing test files in the project.\n\n**CRITICAL: Read Before Edit Rule:**\nYou MUST call the `read` tool on any existing file BEFORE calling `edit` on it. The edit tool validates that you've read the file first. If you skip the read, the edit will fail with \"You must read file X before overwriting it.\" For new files, use the `write` tool instead of `edit`.\n\nIMPORTANT: Each change must include the full 'code' field with the complete file content to write. Include both implementation files AND test files in the changes array. You MUST produce at least one file change.\n\n**Test Configuration Rules (CRITICAL):**\n- Tests run in a non-interactive CI-like environment. NEVER configure tests to use watch mode.\n- When creating vitest.config.ts/js, always set `test: { watch: false }` or use `defineConfig({ test: { watch: false } })`.\n- When writing package.json test scripts with vitest, ALWAYS use `\"test\": \"vitest run\"` (NOT `\"test\": \"vitest\"`).\n- For jest, always include `--watchAll=false` in the test script if needed.\n- Never add `--watch` or `--watchAll` flags to test scripts.",
                                     "contextKeys": [
                                       "implementationPlan",
                                       "taskDescription",
                                       "taskAnalysis",
                                       "codebaseContext",
-                                      "taskComments"
+                                      "taskComments",
+                                      "projectSpecifications",
+                                      "projectSpecSummary"
                                     ],
                                     "subagent": "engineer",
                                     "maxTurns": 500,
@@ -1198,6 +1253,8 @@
     "isQARejection": false,
     "hasQAFeedback": false,
     "codebaseContext": null,
+    "projectSpecifications": null,
+    "projectSpecSummary": null,
     "codeGraphIndexed": false,
     "appliedChanges": [],
     "stagedFiles": [],

package/templates/agents/product-manager/product-manager.bt.json CHANGED Viewed

@@ -19,6 +19,51 @@
           "call": "FetchTaskContext",
           "comment": "Load task details if a real DB task was assigned, or use the request data already on the blackboard (for both agent messages and direct requests)"
         },
+        {
+          "type": "action",
+          "call": "LoadProjectSpecifications",
+          "comment": "Load specification documents from .agentloop/specifications/ so LLM nodes can align tasks with actual project requirements"
+        },
+        {
+          "type": "selector",
+          "comment": "Summarize project specifications if available (non-critical: skip if no specs)",
+          "children": [
+            {
+              "type": "sequence",
+              "children": [
+                {
+                  "type": "condition",
+                  "call": "HasProjectSpecifications",
+                  "comment": "Only summarize if specifications were loaded"
+                },
+                {
+                  "type": "llm-action",
+                  "name": "SummarizeProjectSpecifications",
+                  "prompt": "Distill the following project specification documents into a compact structured summary. Extract ONLY what is explicitly stated — do not infer, assume, or add anything not in the source documents.\n\n## Raw Specifications\n{{projectSpecifications}}\n\n## Output Format\nProduce a structured summary covering ONLY sections that have explicit information in the documents:\n\n### Technology Stack\nList every explicitly named technology, framework, library, and version. Example: 'Next.js 14 App Router', 'TypeScript 5.x', 'localStorage for client-side persistence'\n\n### File Structure\nList every file path, directory, or component name mentioned. Example: 'lib/cardUtils.ts', 'data/cardMeanings.json', 'components/CardSpread.tsx'\n\n### Data & Persistence\nHow data is stored, fetched, and managed. Database schema, API endpoints, storage keys, state management approach.\n\n### Domain Constraints\nExplicit rules, limits, and requirements. What the project MUST do and MUST NOT do. Example: 'No external API calls', 'Must work offline', 'Max 15 files total'\n\n### Acceptance Criteria\nTestable success conditions from the specs.\n\n### What Is NOT Used\nTechnologies or approaches explicitly excluded. Example: 'No backend server', 'No database', 'No authentication'\n\nBe exhaustive on details but terse on prose. Use bullet points. Copy exact names, paths, and values from the source — do not paraphrase technical terms.",
+                  "contextKeys": ["projectSpecifications"],
+                  "outputSchema": {
+                    "type": "object",
+                    "properties": {
+                      "summary": {
+                        "type": "string",
+                        "description": "Structured summary of project specifications"
+                      }
+                    },
+                    "required": ["summary"]
+                  },
+                  "outputKey": "projectSpecSummary",
+                  "temperature": 0.1,
+                  "allowedTools": []
+                }
+              ]
+            },
+            {
+              "type": "action",
+              "call": "NoOp",
+              "comment": "Continue without summarization if no specs or summarization fails"
+            }
+          ]
+        },
         {
           "type": "selector",
           "comment": "Route based on message type: pre-existing bug escalation, existing subproject update, or normal task planning",
@@ -47,10 +92,12 @@
                     "mcp__agentloop__validate_dag",
                     "mcp__agentloop__send_agent_message"
                   ],
-                  "prompt": "You are handling a pre-existing bug escalation from QA testing.\n\n## Bug Details\nTitle: {{taskTitle}}\nDetails: {{taskDescription}}\n\n## What Happened\nA QA agent found that testing of an unrelated task is completely blocked by a pre-existing bug.\nThe bug is NOT related to the task's changes - it is a pre-existing issue in the codebase.\n\n## Your Workflow\n\n### Turn 1 - Gather Context (parallel reads)\nCall BOTH tools in a SINGLE response:\n- `mcp__agentloop__list_subprojects` - find the subproject for the blocked task\n- `mcp__agentloop__list_tasks` with `limit: 100` - check for existing tasks that might already address this bug\n\n### Turn 2 - Pause Subproject\nCall `mcp__agentloop__pause_subproject` with:\n- subprojectId: the blocked task's subproject ID (from Turn 1)\n- reason: \"Reorganizing DAG for pre-existing bug fix\"\n- timeoutSeconds: 120\n\nThis prevents the orchestrator from picking up tasks that could conflict with the DAG reorganization.\n\n### Turn 3 - Check for Existing Fix Task\nIf there is already a task to fix this bug, just add a dependency and skip to Turn 5.\nOtherwise, proceed to Turn 4.\n\n### Turn 4 - Create Fix Task\nCall `mcp__agentloop__create_task` with:\n- title: \"Fix pre-existing bug: [brief description]\"\n- description: Include the bug file, error message, and context from the escalation\n- priority: \"high\" (blocking other tasks)\n- assigned_agent: \"engineer\"\n- subprojectId: same as the blocked task's subproject\n- tags: \"bug-fix,pre-existing,blocking\"\n\nRecord the returned task ID.\n\n### Turn 5 - Add Dependency and Reorganize DAG\nCall these in a SINGLE response:\n- `mcp__agentloop__add_task_dependency` - make the blocked task depend on the fix task\n  (dependentTaskId = blocked task ID, prerequisiteTaskId = new fix task ID)\n- `mcp__agentloop__update_task_status` - move the blocked task to 'todo' if it is not already\n- `mcp__agentloop__reorganize_dag` - recalculate execution order\n\n### Turn 6 - Resume Subproject\nALWAYS call `mcp__agentloop__resume_subproject` with the subprojectId, even if previous steps failed.\nFailing to resume will leave the subproject permanently paused and block all future task scheduling.\n\n### Turn 7 - Notify Merge Resolver\nSend a coordination message to the merge-resolver:\n- `mcp__agentloop__send_agent_message` with to: \"merge-resolver\", type: \"coordination\"\n- content: { event: \"prerequisite_fix_created\", fixTaskId, blockedTaskId, bugDescription }\n\n### Turn 8 - Validate\nCall `mcp__agentloop__validate_dag` to ensure no cycles were introduced.\n\nProvide a summary when done.",
+                  "prompt": "You are handling a pre-existing bug escalation from QA testing.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nCRITICAL: Extract and embed specific details into task descriptions. Engineers work in isolated worktrees. Every task must be self-contained with explicit technology names, file paths, data storage approach, and constraints — never 'as specified in docs'.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nCRITICAL: Extract and embed specific details from the specifications above into task descriptions. Every task must be self-contained.\n{{/if}}\n\n## Bug Details\nTitle: {{taskTitle}}\nDetails: {{taskDescription}}\n\n## What Happened\nA QA agent found that testing of an unrelated task is completely blocked by a pre-existing bug.\nThe bug is NOT related to the task's changes - it is a pre-existing issue in the codebase.\n\n## Your Workflow\n\n### Turn 1 - Gather Context (parallel reads)\nCall BOTH tools in a SINGLE response:\n- `mcp__agentloop__list_subprojects` - find the subproject for the blocked task\n- `mcp__agentloop__list_tasks` with `limit: 100` - check for existing tasks that might already address this bug\n\n### Turn 2 - Pause Subproject\nCall `mcp__agentloop__pause_subproject` with:\n- subprojectId: the blocked task's subproject ID (from Turn 1)\n- reason: \"Reorganizing DAG for pre-existing bug fix\"\n- timeoutSeconds: 120\n\nThis prevents the orchestrator from picking up tasks that could conflict with the DAG reorganization.\n\n### Turn 3 - Check for Existing Fix Task\nIf there is already a task to fix this bug, just add a dependency and skip to Turn 5.\nOtherwise, proceed to Turn 4.\n\n### Turn 4 - Create Fix Task\nCall `mcp__agentloop__create_task` with:\n- title: \"Fix pre-existing bug: [brief description]\"\n- description: Include the bug file, error message, and context from the escalation\n- priority: \"high\" (blocking other tasks)\n- assigned_agent: \"engineer\"\n- subprojectId: same as the blocked task's subproject\n- tags: \"bug-fix,pre-existing,blocking\"\n\nRecord the returned task ID.\n\n### Turn 5 - Add Dependency and Reorganize DAG\nCall these in a SINGLE response:\n- `mcp__agentloop__add_task_dependency` - make the blocked task depend on the fix task\n  (dependentTaskId = blocked task ID, prerequisiteTaskId = new fix task ID)\n- `mcp__agentloop__update_task_status` - move the blocked task to 'todo' if it is not already\n- `mcp__agentloop__reorganize_dag` - recalculate execution order\n\n### Turn 6 - Resume Subproject\nALWAYS call `mcp__agentloop__resume_subproject` with the subprojectId, even if previous steps failed.\nFailing to resume will leave the subproject permanently paused and block all future task scheduling.\n\n### Turn 7 - Notify Merge Resolver\nSend a coordination message to the merge-resolver:\n- `mcp__agentloop__send_agent_message` with to: \"merge-resolver\", type: \"coordination\"\n- content: { event: \"prerequisite_fix_created\", fixTaskId, blockedTaskId, bugDescription }\n\n### Turn 8 - Validate\nCall `mcp__agentloop__validate_dag` to ensure no cycles were introduced.\n\nProvide a summary when done.",
                   "contextKeys": [
                     "taskTitle",
-                    "taskDescription"
+                    "taskDescription",
+                    "projectSpecifications",
+                    "projectSpecSummary"
                   ],
                   "subagent": "product-manager",
                   "outputSchema": {
@@ -123,8 +170,8 @@
                     "mcp__agentloop__visualize_dag",
                     "mcp__agentloop__send_agent_message"
                   ],
-                  "prompt": "You are updating tasks in an existing subproject based on a user request.\n\n## Request\nTitle: {{taskTitle}}\nDescription: {{taskDescription}}\n\n## CRITICAL: Minimize Tool Call Turns\nBatch independent tool calls in the SAME response.\n\n## Your Workflow\n\n### Turn 1 — Gather Context (parallel reads)\nCall ALL of these in a SINGLE response:\n- `mcp__agentloop__get_subproject` with the subprojectId from the message\n- `mcp__agentloop__list_tasks` with `limit: 100` to see all tasks\n- `mcp__agentloop__list_subprojects` for full context\n\n### Turn 2 — Analyze What Needs to Change\nCompare the user's request against existing tasks in the subproject.\nDetermine:\n- Which existing tasks need modification (title, description, priority changes)\n- Which new tasks need to be created\n- Which existing tasks are now unnecessary and should be deleted/blocked\n- Which dependencies need to change\n\n### Turn 3 — Pause if Active\nIf the subproject has ANY tasks in 'in-progress' or 'done' status:\n- Call `mcp__agentloop__pause_subproject` with reason: \"Updating subproject tasks\" and timeoutSeconds: 120\n\n### Turn 4 — Apply Changes (SINGLE response)\nBatch ALL changes in a SINGLE response:\n- `mcp__agentloop__update_task` for each task that needs modification\n- `mcp__agentloop__create_task` for new tasks (with subprojectId)\n- `mcp__agentloop__update_task_status` to block tasks that are no longer needed\n- `mcp__agentloop__add_task_dependency` for new dependencies\n\n### Turn 5 — Reorganize and Validate (parallel)\nCall in a SINGLE response:\n- `mcp__agentloop__reorganize_dag`\n- `mcp__agentloop__validate_dag`\n- `mcp__agentloop__visualize_dag`\n\n### Turn 6 — Resume and Notify\nCall in a SINGLE response:\n- `mcp__agentloop__resume_subproject` (if paused)\n- `mcp__agentloop__send_agent_message` to merge-resolver if the subproject is active\n\nProvide a summary of all changes made.",
-                  "contextKeys": ["taskTitle", "taskDescription", "taskComments"],
+                  "prompt": "You are updating tasks in an existing subproject based on a user request.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nCRITICAL: Extract and embed specific details into task descriptions. Engineers work in isolated worktrees. Every task must be self-contained with explicit technology names, file paths, data storage approach, and constraints — never 'as specified in docs'.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nCRITICAL: Extract and embed specific details from the specifications above into task descriptions. Every task must be self-contained.\n{{/if}}\n\n## Request\nTitle: {{taskTitle}}\nDescription: {{taskDescription}}\n\n## CRITICAL: Minimize Tool Call Turns\nBatch independent tool calls in the SAME response.\n\n## Your Workflow\n\n### Turn 1 — Gather Context (parallel reads)\nCall ALL of these in a SINGLE response:\n- `mcp__agentloop__get_subproject` with the subprojectId from the message\n- `mcp__agentloop__list_tasks` with `limit: 100` to see all tasks\n- `mcp__agentloop__list_subprojects` for full context\n\n### Turn 2 — Analyze What Needs to Change\nCompare the user's request against existing tasks in the subproject.\nDetermine:\n- Which existing tasks need modification (title, description, priority changes)\n- Which new tasks need to be created\n- Which existing tasks are now unnecessary and should be deleted/blocked\n- Which dependencies need to change\n\n### Turn 3 — Pause if Active\nIf the subproject has ANY tasks in 'in-progress' or 'done' status:\n- Call `mcp__agentloop__pause_subproject` with reason: \"Updating subproject tasks\" and timeoutSeconds: 120\n\n### Turn 4 — Apply Changes (SINGLE response)\nBatch ALL changes in a SINGLE response:\n- `mcp__agentloop__update_task` for each task that needs modification\n- `mcp__agentloop__create_task` for new tasks (with subprojectId)\n- `mcp__agentloop__update_task_status` to block tasks that are no longer needed\n- `mcp__agentloop__add_task_dependency` for new dependencies\n\n### Turn 5 — Reorganize and Validate (parallel)\nCall in a SINGLE response:\n- `mcp__agentloop__reorganize_dag`\n- `mcp__agentloop__validate_dag`\n- `mcp__agentloop__visualize_dag`\n\n### Turn 6 — Resume and Notify\nCall in a SINGLE response:\n- `mcp__agentloop__resume_subproject` (if paused)\n- `mcp__agentloop__send_agent_message` to merge-resolver if the subproject is active\n\nProvide a summary of all changes made.",
+                  "contextKeys": ["taskTitle", "taskDescription", "taskComments", "projectSpecifications", "projectSpecSummary"],
                   "subagent": "product-manager",
                   "outputSchema": {
                     "type": "object",
@@ -163,11 +210,13 @@
                 "mcp__agentloop__reorganize_dag",
                 "mcp__agentloop__send_agent_message"
               ],
-              "prompt": "You are a product manager agent. Your job is to break down a high-level feature request into actionable AGILE tasks with proper DAG dependencies.\n\n## Feature Request\nTitle: {{taskTitle}}\nDescription: {{taskDescription}}\n\n## CRITICAL: Maximize Parallel Tool Calls\nYou MUST minimize the number of LLM turns by batching independent tool calls into the SAME response.\nEvery extra turn adds ~5-10 seconds of latency. Batch aggressively.\n\n## Your Workflow\n\n### Turn 1 \u2014 Gather Context (parallel reads)\nCall BOTH of these tools in a SINGLE response:\n- `mcp__agentloop__list_subprojects` \u2014 check for existing subprojects\n- `mcp__agentloop__list_tasks` with `limit: 100` \u2014 check existing tasks (omit status to get all)\n\n### Turn 1.5 \u2014 Analyze Existing Subprojects (CRITICAL)\nReview the results from Turn 1. For each existing subproject:\n- Check its name and description against the new work request\n- If tasks already cover this work, report \"Work already planned\" and STOP\n- If the subproject is RELATED to the new work:\n  - Reuse this subproject's ID for new tasks (do NOT create a new subproject)\n  - When adding dependencies, include dependencies on EXISTING tasks in the subproject if appropriate\n  - After adding all tasks and dependencies, call `mcp__agentloop__reorganize_dag`\n\n### Turn 2 \u2014 Create Subproject (if needed)\nIf the delegation message included a subprojectId, reuse it. Otherwise call `mcp__agentloop__create_subproject`.\nIf a subproject already exists for this work (determined in Turn 1.5), skip creation and reuse its ID.\nSave the subprojectId for ALL subsequent create_task calls.\nIf tasks already cover this work, report that instead of creating duplicates and stop.\n\n### Turn 3 \u2014 Analyze & Create ALL Tasks (SINGLE response)\nDetermine task count based on ACTUAL complexity:\n- Simple (1-5 tasks): \"add logout button\" -> 1-2 tasks\n- Medium (5-15 tasks): \"add user authentication\" -> 8-12 tasks\n- Large (20-30 tasks): \"build payment system\" -> 25-30 tasks\n\nDO NOT inflate task counts artificially.\n\n**IMPORTANT: Call ALL `mcp__agentloop__create_task` tools in a SINGLE response as parallel tool_use blocks.**\nDo NOT create tasks one at a time across multiple turns. Include all of them in one message.\nEach call needs: title, description, priority, tags, sequence, subprojectId.\nRecord all returned task IDs from the results.\n\n### Turn 4 \u2014 Add ALL Dependencies (SINGLE response)\n**IMPORTANT: Call ALL `mcp__agentloop__add_task_dependency` tools in a SINGLE response as parallel tool_use blocks.**\nDo NOT add dependencies one at a time across multiple turns.\nUse the task IDs returned from Turn 3. Maximize parallelism \u2014 engineers work in isolated worktrees.\n\n### Turn 4.5 \u2014 Pause if Active Subproject\nIf you are adding tasks to an existing subproject that has ANY tasks in 'in-progress' or 'done' status:\n1. Call `mcp__agentloop__pause_subproject` with reason: \"Adding new tasks and reorganizing DAG\" and timeoutSeconds: 120\n\nIf the subproject has NO active tasks (all todo or none started), skip pause/resume.\n\n### Turn 5 \u2014 Validate and Reorganize\nCall in a SINGLE response:\n- `mcp__agentloop__validate_dag`\n- `mcp__agentloop__visualize_dag`\n- `mcp__agentloop__reorganize_dag` (MANDATORY if tasks were added to an existing subproject)\n\n### Turn 5.5 \u2014 Resume Subproject & Notify Merge Resolver (if paused in Turn 4.5)\nIf you paused the subproject in Turn 4.5, you MUST resume it now.\nCall in a SINGLE response:\n- `mcp__agentloop__resume_subproject` - ALWAYS resume, even if reorganization failed\n- `mcp__agentloop__send_agent_message` with:\n  - to: \"merge-resolver\"\n  - type: \"coordination\"\n  - content: { event: \"subproject_tasks_added\", subprojectId: <id>, newTaskIds: [<ids>], message: \"New tasks added to active subproject. Merge ordering may need adjustment.\" }\n\n**CRITICAL: You MUST call `mcp__agentloop__resume_subproject` if you called `pause_subproject` earlier. Failing to resume will leave the subproject permanently paused.**\n\nIf the subproject was NOT paused (not active), just notify the merge-resolver:\nIf you added tasks to an existing subproject that has any tasks in 'in-progress' or 'done' status:\nCall `mcp__agentloop__send_agent_message` with:\n- to: \"merge-resolver\"\n- type: \"coordination\"\n- content: { event: \"subproject_tasks_added\", subprojectId: <id>, newTaskIds: [<ids>], message: \"New tasks added to active subproject. Merge ordering may need adjustment.\" }\n\n## Critical Rules\n- You are a PLANNER, not an implementer. NEVER write code or create files.\n- ALWAYS create tasks using mcp__agentloop__create_task\n- ALWAYS build DAG dependencies using mcp__agentloop__add_task_dependency\n- ALWAYS include subprojectId in every create_task call\n- Engineers work in project root (.) - NEVER include commands that create subdirectories\n- Explicitly specify tech stack in task descriptions\n- NEVER make sequential tool calls when they can be parallel. This is a performance-critical agent.\n- When adding to an existing subproject, call reorganize_dag AFTER adding tasks and dependencies.\n- Notify merge-resolver when modifying active subprojects (those with in-progress or done tasks).\n\nProvide a summary when done.",
+              "prompt": "You are a product manager agent. Your job is to break down a high-level feature request into actionable AGILE tasks with proper DAG dependencies.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nCRITICAL — YOU ARE A TRANSLATOR, NOT A RELAY:\nYour job is to convert these specifications into self-contained work instructions. Engineers work in isolated worktrees and will NOT have access to these specification documents. Each task description is the engineer's PRIMARY and ONLY source of truth.\n\nEvery task description MUST include:\n- Explicit technology names (e.g., 'Next.js 14 App Router', 'localStorage', 'Framer Motion') — never 'as specified in docs'\n- Exact file paths from the implementation plan (e.g., 'lib/cardUtils.ts', 'data/cardMeanings.json')\n- Data storage approach stated explicitly (e.g., 'Use localStorage with key \"appState\"', not 'use persistence layer from plan')\n- API endpoints, database schema, component hierarchy — written inline with full detail\n- Relevant constraints and acceptance criteria copied verbatim from the specifications\n- If the spec describes ~15 files, say that. If persistence is localStorage, say localStorage.\n\nNEVER write vague references like:\n- 'as specified in docs' / 'per the plan' / 'per PRD'\n- 'use the locked application stack from baseline'\n- 'exact packages from docs'\n- 'as described in the implementation plan'\n\nInstead, write the ACTUAL specification inline in every task description.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nCRITICAL: Extract and embed specific details from the specifications above into task descriptions. Engineers work in isolated worktrees and may not have access to these specification documents. Every task must be self-contained.\n{{/if}}\n\n## Feature Request\nTitle: {{taskTitle}}\nDescription: {{taskDescription}}\n\n## CRITICAL: Maximize Parallel Tool Calls\nYou MUST minimize the number of LLM turns by batching independent tool calls into the SAME response.\nEvery extra turn adds ~5-10 seconds of latency. Batch aggressively.\n\n## Your Workflow\n\n### Turn 1 \u2014 Gather Context (parallel reads)\nCall BOTH of these tools in a SINGLE response:\n- `mcp__agentloop__list_subprojects` \u2014 check for existing subprojects\n- `mcp__agentloop__list_tasks` with `limit: 100` \u2014 check existing tasks (omit status to get all)\n\n### Turn 1.5 \u2014 Analyze Existing Subprojects (CRITICAL)\nReview the results from Turn 1. For each existing subproject:\n- Check its name and description against the new work request\n- If tasks already cover this work, report \"Work already planned\" and STOP\n- If the subproject is RELATED to the new work:\n  - Reuse this subproject's ID for new tasks (do NOT create a new subproject)\n  - When adding dependencies, include dependencies on EXISTING tasks in the subproject if appropriate\n  - After adding all tasks and dependencies, call `mcp__agentloop__reorganize_dag`\n\n### Turn 2 \u2014 Create Subproject (if needed)\nIf the delegation message included a subprojectId, reuse it. Otherwise call `mcp__agentloop__create_subproject`.\nIf a subproject already exists for this work (determined in Turn 1.5), skip creation and reuse its ID.\nSave the subprojectId for ALL subsequent create_task calls.\nIf tasks already cover this work, report that instead of creating duplicates and stop.\n\n### Turn 3 \u2014 Analyze & Create ALL Tasks (SINGLE response)\nDetermine task count based on ACTUAL complexity:\n- Simple (1-5 tasks): \"add logout button\" -> 1-2 tasks\n- Medium (5-15 tasks): \"add user authentication\" -> 8-12 tasks\n- Large (20-30 tasks): \"build payment system\" -> 25-30 tasks\n\nDO NOT inflate task counts artificially.\n\n**IMPORTANT: Call ALL `mcp__agentloop__create_task` tools in a SINGLE response as parallel tool_use blocks.**\nDo NOT create tasks one at a time across multiple turns. Include all of them in one message.\nEach call needs: title, description, priority, tags, sequence, subprojectId.\nRecord all returned task IDs from the results.\n\n### Turn 4 \u2014 Add ALL Dependencies (SINGLE response)\n**IMPORTANT: Call ALL `mcp__agentloop__add_task_dependency` tools in a SINGLE response as parallel tool_use blocks.**\nDo NOT add dependencies one at a time across multiple turns.\nUse the task IDs returned from Turn 3. Maximize parallelism \u2014 engineers work in isolated worktrees.\n\n### Turn 4.5 \u2014 Pause if Active Subproject\nIf you are adding tasks to an existing subproject that has ANY tasks in 'in-progress' or 'done' status:\n1. Call `mcp__agentloop__pause_subproject` with reason: \"Adding new tasks and reorganizing DAG\" and timeoutSeconds: 120\n\nIf the subproject has NO active tasks (all todo or none started), skip pause/resume.\n\n### Turn 5 \u2014 Validate and Reorganize\nCall in a SINGLE response:\n- `mcp__agentloop__validate_dag`\n- `mcp__agentloop__visualize_dag`\n- `mcp__agentloop__reorganize_dag` (MANDATORY if tasks were added to an existing subproject)\n\n### Turn 5.5 \u2014 Resume Subproject & Notify Merge Resolver (if paused in Turn 4.5)\nIf you paused the subproject in Turn 4.5, you MUST resume it now.\nCall in a SINGLE response:\n- `mcp__agentloop__resume_subproject` - ALWAYS resume, even if reorganization failed\n- `mcp__agentloop__send_agent_message` with:\n  - to: \"merge-resolver\"\n  - type: \"coordination\"\n  - content: { event: \"subproject_tasks_added\", subprojectId: <id>, newTaskIds: [<ids>], message: \"New tasks added to active subproject. Merge ordering may need adjustment.\" }\n\n**CRITICAL: You MUST call `mcp__agentloop__resume_subproject` if you called `pause_subproject` earlier. Failing to resume will leave the subproject permanently paused.**\n\nIf the subproject was NOT paused (not active), just notify the merge-resolver:\nIf you added tasks to an existing subproject that has any tasks in 'in-progress' or 'done' status:\nCall `mcp__agentloop__send_agent_message` with:\n- to: \"merge-resolver\"\n- type: \"coordination\"\n- content: { event: \"subproject_tasks_added\", subprojectId: <id>, newTaskIds: [<ids>], message: \"New tasks added to active subproject. Merge ordering may need adjustment.\" }\n\n## Critical Rules\n- You are a PLANNER, not an implementer. NEVER write code or create files.\n- ALWAYS create tasks using mcp__agentloop__create_task\n- ALWAYS build DAG dependencies using mcp__agentloop__add_task_dependency\n- ALWAYS include subprojectId in every create_task call\n- Engineers work in project root (.) - NEVER include commands that create subdirectories\n- Explicitly specify tech stack in task descriptions\n- NEVER make sequential tool calls when they can be parallel. This is a performance-critical agent.\n- When adding to an existing subproject, call reorganize_dag AFTER adding tasks and dependencies.\n- Notify merge-resolver when modifying active subprojects (those with in-progress or done tasks).\n\nProvide a summary when done.",
               "contextKeys": [
                 "taskTitle",
                 "taskDescription",
-                "taskComments"
+                "taskComments",
+                "projectSpecifications",
+                "projectSpecSummary"
               ],
               "subagent": "product-manager",
               "outputSchema": {
@@ -260,6 +309,8 @@
     "taskComments": null,
     "taskDetails": null,
     "codebaseContext": null,
+    "projectSpecifications": null,
+    "projectSpecSummary": null,
     "requestedStatus": "done",
     "statusChangeReason": "Task breakdown complete, tasks created with DAG dependencies",
     "custom": {

package/templates/agents/qa-tester/qa-tester.bt.json CHANGED Viewed

@@ -19,6 +19,51 @@
           "call": "FetchTaskContext",
           "comment": "Load task details, comments, and engineer completion info"
         },
+        {
+          "type": "action",
+          "call": "LoadProjectSpecifications",
+          "comment": "Load specification documents from .agentloop/specifications/ so QA can validate implementations against actual project requirements"
+        },
+        {
+          "type": "selector",
+          "comment": "Summarize project specifications if available (non-critical: skip if no specs)",
+          "children": [
+            {
+              "type": "sequence",
+              "children": [
+                {
+                  "type": "condition",
+                  "call": "HasProjectSpecifications",
+                  "comment": "Only summarize if specifications were loaded"
+                },
+                {
+                  "type": "llm-action",
+                  "name": "SummarizeProjectSpecifications",
+                  "prompt": "Distill the following project specification documents into a compact structured summary. Extract ONLY what is explicitly stated — do not infer, assume, or add anything not in the source documents.\n\n## Raw Specifications\n{{projectSpecifications}}\n\n## Output Format\nProduce a structured summary covering ONLY sections that have explicit information in the documents:\n\n### Technology Stack\nList every explicitly named technology, framework, library, and version. Example: 'Next.js 14 App Router', 'TypeScript 5.x', 'localStorage for client-side persistence'\n\n### File Structure\nList every file path, directory, or component name mentioned. Example: 'lib/cardUtils.ts', 'data/cardMeanings.json', 'components/CardSpread.tsx'\n\n### Data & Persistence\nHow data is stored, fetched, and managed. Database schema, API endpoints, storage keys, state management approach.\n\n### Domain Constraints\nExplicit rules, limits, and requirements. What the project MUST do and MUST NOT do. Example: 'No external API calls', 'Must work offline', 'Max 15 files total'\n\n### Acceptance Criteria\nTestable success conditions from the specs.\n\n### What Is NOT Used\nTechnologies or approaches explicitly excluded. Example: 'No backend server', 'No database', 'No authentication'\n\nBe exhaustive on details but terse on prose. Use bullet points. Copy exact names, paths, and values from the source — do not paraphrase technical terms.",
+                  "contextKeys": ["projectSpecifications"],
+                  "outputSchema": {
+                    "type": "object",
+                    "properties": {
+                      "summary": {
+                        "type": "string",
+                        "description": "Structured summary of project specifications"
+                      }
+                    },
+                    "required": ["summary"]
+                  },
+                  "outputKey": "projectSpecSummary",
+                  "temperature": 0.1,
+                  "allowedTools": []
+                }
+              ]
+            },
+            {
+              "type": "action",
+              "call": "NoOp",
+              "comment": "Continue without summarization if no specs or summarization fails"
+            }
+          ]
+        },
         {
           "type": "selector",
           "comment": "Check for incoming agent messages (non-critical: continue even if unavailable)",
@@ -105,12 +150,14 @@
                         {
                           "type": "llm-action",
                           "name": "AnalyzeChanges",
-                          "prompt": "You are a QA agent analyzing changes. Review the task and git diff.\n\nTask: {{taskDescription}}\nGit Diff: {{gitDiff}}\nProject Info: {{projectInfo}}\n\nBriefly summarize what was changed.",
+                          "prompt": "You are a QA agent analyzing changes. Review the task and git diff.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nValidate the implementation against these specifications. Check that:\n- The correct technologies and packages are used (not alternatives)\n- File paths match what the specs describe\n- Data storage, API endpoints, and schemas match spec requirements\n- Constraints and acceptance criteria from the specs are satisfied\nFlag any deviations as spec violations in your feedback.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nValidate the implementation against these specifications. Flag any deviations.\n{{/if}}\n\nTask: {{taskDescription}}\nGit Diff: {{gitDiff}}\nProject Info: {{projectInfo}}\n\nBriefly summarize what was changed.",
                           "contextKeys": [
                             "taskDescription",
                             "taskTitle",
                             "gitDiff",
-                            "projectInfo"
+                            "projectInfo",
+                            "projectSpecifications",
+                            "projectSpecSummary"
                           ],
                           "outputSchema": {
                             "type": "object",
@@ -235,14 +282,16 @@
                                           {
                                             "type": "llm-action",
                                             "name": "AnalyzeTestResults",
-                                            "prompt": "Analyze the test results in the context of what files were changed.\n\nTest Output: {{testResults}}\nTest Command: {{testCommandInfo}}\nGit Diff (files changed by engineer): {{gitDiff}}\nTask Files: {{taskFiles}}\nChange Analysis: {{changeAnalysis}}\n\nYour job is to determine if the engineer's changes CAUSED any test failures. You MUST distinguish between:\n\n1. **Task-related failures**: Tests that fail because of code the engineer changed or added. These are in files listed in the git diff or task files, or test files that directly import/test those changed modules. These are legitimate failures.\n\n2. **Pre-existing/unrelated failures**: Tests that fail in modules the engineer did NOT touch. These failures existed BEFORE the engineer's changes and are NOT the engineer's responsibility. Do NOT count these as failures.\n\n3. **Environment issues**: Test runner not found (exit code 127), dependencies not installed, 'command not found' errors, missing optional dependencies (@rollup/rollup-*, @esbuild/*), module resolution errors. These are QA environment issues, NOT code issues.\n\nIMPORTANT: If ONLY environment issues occurred and there are NO indications of task-related failures (taskRelatedFailures is 0 or null), set 'passed' to true \u2014 the engineer's code is not at fault for environment problems. Classify failures as 'environment'.\n\nSet 'passed' to true if:\n- Tests actually executed AND there are NO task-related failures, OR\n- Tests did NOT execute due to environment issues AND there are NO task-related failures detected\n\nSet 'passed' to false if:\n- There are task-related failures (regardless of whether other environment issues exist)\n\nFor each failure, classify it as 'task-related', 'pre-existing', or 'environment' in the classification field.",
+                                            "prompt": "Analyze the test results in the context of what files were changed.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nValidate the implementation against these specifications. Check that:\n- The correct technologies and packages are used (not alternatives)\n- File paths match what the specs describe\n- Data storage, API endpoints, and schemas match spec requirements\n- Constraints and acceptance criteria from the specs are satisfied\nFlag any deviations as spec violations in your feedback.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nValidate the implementation against these specifications. Flag any deviations.\n{{/if}}\n\nTest Output: {{testResults}}\nTest Command: {{testCommandInfo}}\nGit Diff (files changed by engineer): {{gitDiff}}\nTask Files: {{taskFiles}}\nChange Analysis: {{changeAnalysis}}\n\nYour job is to determine if the engineer's changes CAUSED any test failures. You MUST distinguish between:\n\n1. **Task-related failures**: Tests that fail because of code the engineer changed or added. These are in files listed in the git diff or task files, or test files that directly import/test those changed modules. These are legitimate failures.\n\n2. **Pre-existing/unrelated failures**: Tests that fail in modules the engineer did NOT touch. These failures existed BEFORE the engineer's changes and are NOT the engineer's responsibility. Do NOT count these as failures.\n\n3. **Environment issues**: Test runner not found (exit code 127), dependencies not installed, 'command not found' errors, missing optional dependencies (@rollup/rollup-*, @esbuild/*), module resolution errors. These are QA environment issues, NOT code issues.\n\nIMPORTANT: If ONLY environment issues occurred and there are NO indications of task-related failures (taskRelatedFailures is 0 or null), set 'passed' to true \u2014 the engineer's code is not at fault for environment problems. Classify failures as 'environment'.\n\nSet 'passed' to true if:\n- Tests actually executed AND there are NO task-related failures, OR\n- Tests did NOT execute due to environment issues AND there are NO task-related failures detected\n\nSet 'passed' to false if:\n- There are task-related failures (regardless of whether other environment issues exist)\n\nFor each failure, classify it as 'task-related', 'pre-existing', or 'environment' in the classification field.",
                                             "contextKeys": [
                                               "testResults",
                                               "testCommandInfo",
                                               "changeAnalysis",
                                               "gitDiff",
                                               "taskFiles",
-                                              "engineerTestSetup"
+                                              "engineerTestSetup",
+                                              "projectSpecifications",
+                                              "projectSpecSummary"
                                             ],
                                             "outputSchema": {
                                               "type": "object",
@@ -429,10 +478,12 @@
                                         {
                                           "type": "llm-action",
                                           "name": "WriteApprovalComment",
-                                          "prompt": "Write a brief approval comment.\n\nTask: {{taskDescription}}\nTest Results: {{analyzedTestResults}}\n\nKeep it short. If there were pre-existing test failures (not caused by the engineer), mention them briefly as known pre-existing issues that are not blocking.",
+                                          "prompt": "Write a brief approval comment.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nValidate the implementation against these specifications. Check that:\n- The correct technologies and packages are used (not alternatives)\n- File paths match what the specs describe\n- Data storage, API endpoints, and schemas match spec requirements\n- Constraints and acceptance criteria from the specs are satisfied\nFlag any deviations as spec violations in your feedback.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nValidate the implementation against these specifications. Flag any deviations.\n{{/if}}\n\nTask: {{taskDescription}}\nTest Results: {{analyzedTestResults}}\n\nKeep it short. If there were pre-existing test failures (not caused by the engineer), mention them briefly as known pre-existing issues that are not blocking.",
                                           "contextKeys": [
                                             "taskDescription",
-                                            "analyzedTestResults"
+                                            "analyzedTestResults",
+                                            "projectSpecifications",
+                                            "projectSpecSummary"
                                           ],
                                           "outputSchema": {
                                             "type": "object",
@@ -596,13 +647,15 @@
                                                 {
                                                   "type": "llm-action",
                                                   "name": "DocumentRejection",
-                                                  "prompt": "Document why the task is rejected based ONLY on task-related test failures.\n\nTask: {{taskDescription}}\nTest Results: {{analyzedTestResults}}\nGit Diff: {{gitDiff}}\nTask Files: {{taskFiles}}\n\nExplain what failed and what needs fixing. ONLY include failures that are classified as 'task-related' \u2014 failures in code the engineer actually changed.\n\nCRITICAL RULES:\n1. NEVER reject for pre-existing failures (tests failing in code the engineer did NOT touch).\n2. NEVER reject because dependencies were not installed, test runners were not found (exit code 127), or the test environment was not set up.\n3. ONLY reject for actual code failures in the engineer's changed files: tests that fail due to bugs, missing implementations, incorrect logic, or code that does not meet acceptance criteria.\n4. If the only failures are pre-existing or environment-related, this rejection should NOT have been reached \u2014 but if it was, explain that the failures are not task-related and recommend approval.",
+                                                  "prompt": "Document why the task is rejected based ONLY on task-related test failures.\n\n{{#if projectSpecSummary}}\n## Project Specification Summary\n{{projectSpecSummary}}\n\nValidate the implementation against these specifications. Check that:\n- The correct technologies and packages are used (not alternatives)\n- File paths match what the specs describe\n- Data storage, API endpoints, and schemas match spec requirements\n- Constraints and acceptance criteria from the specs are satisfied\nFlag any deviations as spec violations in your feedback.\n{{else if projectSpecifications}}\n## Project Specifications (Raw)\n{{projectSpecifications}}\n\nValidate the implementation against these specifications. Flag any deviations.\n{{/if}}\n\nTask: {{taskDescription}}\nTest Results: {{analyzedTestResults}}\nGit Diff: {{gitDiff}}\nTask Files: {{taskFiles}}\n\nExplain what failed and what needs fixing. ONLY include failures that are classified as 'task-related' \u2014 failures in code the engineer actually changed.\n\nCRITICAL RULES:\n1. NEVER reject for pre-existing failures (tests failing in code the engineer did NOT touch).\n2. NEVER reject because dependencies were not installed, test runners were not found (exit code 127), or the test environment was not set up.\n3. ONLY reject for actual code failures in the engineer's changed files: tests that fail due to bugs, missing implementations, incorrect logic, or code that does not meet acceptance criteria.\n4. If the only failures are pre-existing or environment-related, this rejection should NOT have been reached \u2014 but if it was, explain that the failures are not task-related and recommend approval.",
                                                   "contextKeys": [
                                                     "taskDescription",
                                                     "analyzedTestResults",
                                                     "testResults",
                                                     "gitDiff",
-                                                    "taskFiles"
+                                                    "taskFiles",
+                                                    "projectSpecifications",
+                                                    "projectSpecSummary"
                                                   ],
                                                   "outputSchema": {
                                                     "type": "object",
@@ -861,6 +914,8 @@
     "environmentFixAttempted": false,
     "environmentFixResults": null,
     "projectInfo": null,
+    "projectSpecifications": null,
+    "projectSpecSummary": null,
     "engineerTestSetup": null,
     "testCommandInfo": null,
     "testExitCode": null,