ralph-lisa-loop 0.3.10 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/policy.d.ts CHANGED
@@ -21,6 +21,7 @@ export declare function checkRalph(tag: string, content: string): PolicyViolatio
21
21
  * Check Lisa's submission for policy violations.
22
22
  */
23
23
  export declare function checkLisa(tag: string, content: string): PolicyViolation[];
24
+ export declare function checkNeedsWorkResponse(ralphTag: string, lastLisaTag: string): PolicyViolation[];
24
25
  /**
25
26
  * Run policy checks based on mode.
26
27
  * Returns { proceed, violations } so callers can format output clearly (IMP-4).
package/dist/policy.js CHANGED
@@ -12,6 +12,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.getPolicyMode = getPolicyMode;
13
13
  exports.checkRalph = checkRalph;
14
14
  exports.checkLisa = checkLisa;
15
+ exports.checkNeedsWorkResponse = checkNeedsWorkResponse;
15
16
  exports.runPolicyCheck = runPolicyCheck;
16
17
  function getPolicyMode() {
17
18
  const mode = process.env.RL_POLICY_MODE || "warn";
@@ -24,6 +25,15 @@ function getPolicyMode() {
24
25
  */
25
26
  function checkRalph(tag, content) {
26
27
  const violations = [];
28
+ // [PLAN] must include test plan (step42: mandatory test execution)
29
+ if (tag === "PLAN") {
30
+ if (!content.match(/测试计划|[Tt]est [Pp]lan|测试命令|[Tt]est [Cc]ommand/)) {
31
+ violations.push({
32
+ rule: "plan-test-plan",
33
+ message: `[PLAN] submission missing test plan (test command + coverage scope).`,
34
+ });
35
+ }
36
+ }
27
37
  // [CODE] or [FIX] must include Test Results and file:line references
28
38
  if (tag === "CODE" || tag === "FIX") {
29
39
  if (!content.includes("Test Results") &&
@@ -34,12 +44,42 @@ function checkRalph(tag, content) {
34
44
  message: `[${tag}] submission missing "Test Results" section.`,
35
45
  });
36
46
  }
47
+ // step42: Test Results must include concrete execution evidence (exit code or pass/fail count)
48
+ // Exception: explicit "Skipped:" line inside the Test Results section only
49
+ // Section is bounded: from "Test Results" heading to next heading (## or blank-line-then-heading) or EOF
50
+ const testResultsMatch = content.match(/[Tt]est [Rr]esults[^\n]*\n([\s\S]*?)(?=\n##\s|\n\n[A-Z]|\n\n\*\*[A-Z]|$)/);
51
+ if (testResultsMatch) {
52
+ const testResultsBody = testResultsMatch[1];
53
+ const hasSkipLine = /^[\s\-*]*[Ss]kip(ped)?\s*:.*\S/m.test(testResultsBody);
54
+ const hasExecutionEvidence = /[Ee]xit code|退出码|\d+\/\d+\s*(pass|通过|passed)|(\d+)\s*tests?\s*pass/i.test(testResultsBody);
55
+ if (!hasSkipLine && !hasExecutionEvidence) {
56
+ violations.push({
57
+ rule: "test-results-detail",
58
+ message: `[${tag}] Test Results must include exit code or pass/fail count (e.g., "Exit code: 0" or "42/42 passed"), or explicit "Skipped:" with justification.`,
59
+ });
60
+ }
61
+ }
37
62
  if (!/\w+\.\w+:\d+/.test(content)) {
38
63
  violations.push({
39
64
  rule: "file-line-ref",
40
65
  message: `[${tag}] submission must include at least one file:line reference (e.g., commands.ts:42).`,
41
66
  });
42
67
  }
68
+ // New tests count check (Proposal §3.6)
69
+ // Warn if "New tests: 0" without valid justification
70
+ const lc = content.toLowerCase();
71
+ const hasNewTests = /new tests?:\s*[1-9]/i.test(content);
72
+ const hasZeroTests = /new tests?:\s*0/i.test(content);
73
+ if (hasZeroTests && !hasNewTests) {
74
+ // Check for valid justification keywords
75
+ const hasJustification = /\b(ui.only|layout.only|config.only|no.testable.logic|template.only|documentation)\b/i.test(content);
76
+ if (!hasJustification) {
77
+ violations.push({
78
+ rule: "new-tests-required",
79
+ message: `[${tag}] reports 0 new tests without valid justification. Add unit tests or explain why (e.g., "config-only change").`,
80
+ });
81
+ }
82
+ }
43
83
  }
44
84
  // [RESEARCH] must have substance
45
85
  if (tag === "RESEARCH") {
@@ -58,6 +98,17 @@ function checkRalph(tag, content) {
58
98
  message: "[RESEARCH] submission needs at least 2 fields (reference/key types/data structure/verification) or equivalent summary with evidence.",
59
99
  });
60
100
  }
101
+ // RESEARCH verification markers (Proposal §3.9)
102
+ // Checks for at least one global Verified:/Evidence: marker per submission.
103
+ // Per-claim enforcement is not mechanically feasible — Lisa reviews claim-level rigor.
104
+ const hasVerifiedMarker = /\bverified\s*:/i.test(content);
105
+ const hasEvidenceMarker = /\bevidence\s*:/i.test(content);
106
+ if (!hasVerifiedMarker && !hasEvidenceMarker) {
107
+ violations.push({
108
+ rule: "research-verification",
109
+ message: '[RESEARCH] submission should include at least one "Verified:" or "Evidence:" marker to support factual claims.',
110
+ });
111
+ }
61
112
  }
62
113
  return violations;
63
114
  }
@@ -86,6 +137,26 @@ function checkLisa(tag, content) {
86
137
  }
87
138
  return violations;
88
139
  }
140
+ /**
141
+ * NEEDS_WORK response enforcement (Proposal §3.2).
142
+ * When Lisa's last review was [NEEDS_WORK], Ralph must respond with
143
+ * [FIX], [CHALLENGE], [DISCUSS], or [QUESTION] — not unrelated [CODE]/[RESEARCH]/[PLAN].
144
+ */
145
+ const NEEDS_WORK_ALLOWED_TAGS = new Set(["FIX", "CHALLENGE", "DISCUSS", "QUESTION"]);
146
+ const NEEDS_WORK_BLOCKED_TAGS = new Set(["CODE", "RESEARCH", "PLAN", "CONSENSUS"]);
147
+ function checkNeedsWorkResponse(ralphTag, lastLisaTag) {
148
+ if (lastLisaTag !== "NEEDS_WORK")
149
+ return [];
150
+ if (NEEDS_WORK_ALLOWED_TAGS.has(ralphTag))
151
+ return [];
152
+ if (NEEDS_WORK_BLOCKED_TAGS.has(ralphTag)) {
153
+ return [{
154
+ rule: "needs-work-response",
155
+ message: `[${ralphTag}] submitted after Lisa's [NEEDS_WORK]. You must respond with [FIX], [CHALLENGE], [DISCUSS], or [QUESTION] first. If the task scope changed, run: ralph-lisa scope-update "new scope"`,
156
+ }];
157
+ }
158
+ return [];
159
+ }
89
160
  /**
90
161
  * Run policy checks based on mode.
91
162
  * Returns { proceed, violations } so callers can format output clearly (IMP-4).
package/dist/state.d.ts CHANGED
@@ -10,10 +10,28 @@ export declare function findProjectRoot(startDir?: string): string | null;
10
10
  * Reset the cached project root. Used in tests.
11
11
  */
12
12
  export declare function resetProjectRootCache(): void;
13
+ export declare function _setTmuxStateDirOverride(value: string | null | undefined): void;
14
+ /**
15
+ * Try to read RL_STATE_DIR from tmux session environment.
16
+ * Returns null if not in tmux or env var not set.
17
+ */
18
+ export declare function getTmuxStateDir(): string | null;
19
+ /**
20
+ * Resolve state directory with priority (Proposal §3.10):
21
+ * 1. tmux show-environment RL_STATE_DIR (authoritative in auto mode)
22
+ * 2. $RL_STATE_DIR environment variable (manual mode / override)
23
+ * 3. findProjectRoot() upward search (fallback)
24
+ *
25
+ * Returns { dir, source } for diagnostics.
26
+ */
27
+ export declare function resolveStateDir(): {
28
+ dir: string;
29
+ source: "tmux" | "env" | "auto-detect";
30
+ };
13
31
  /**
14
32
  * Get the .dual-agent/ state directory path.
15
33
  * When projectDir is explicitly given, uses that path directly.
16
- * When omitted, searches upward from CWD to find .dual-agent/ (like git).
34
+ * When omitted, uses priority resolution: tmux env shell env → upward search.
17
35
  */
18
36
  export declare function stateDir(projectDir?: string): string;
19
37
  /**
package/dist/state.js CHANGED
@@ -40,6 +40,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
40
40
  exports.VALID_TAGS = exports.ARCHIVE_DIR = exports.STATE_DIR = void 0;
41
41
  exports.findProjectRoot = findProjectRoot;
42
42
  exports.resetProjectRootCache = resetProjectRootCache;
43
+ exports._setTmuxStateDirOverride = _setTmuxStateDirOverride;
44
+ exports.getTmuxStateDir = getTmuxStateDir;
45
+ exports.resolveStateDir = resolveStateDir;
43
46
  exports.stateDir = stateDir;
44
47
  exports.checkSession = checkSession;
45
48
  exports.readFile = readFile;
@@ -59,6 +62,7 @@ exports.appendHistory = appendHistory;
59
62
  exports.updateLastAction = updateLastAction;
60
63
  const fs = __importStar(require("node:fs"));
61
64
  const path = __importStar(require("node:path"));
65
+ const node_child_process_1 = require("node:child_process");
62
66
  exports.STATE_DIR = ".dual-agent";
63
67
  exports.ARCHIVE_DIR = ".dual-agent-archive";
64
68
  exports.VALID_TAGS = "PLAN|RESEARCH|CODE|FIX|PASS|NEEDS_WORK|CHALLENGE|DISCUSS|QUESTION|CONSENSUS";
@@ -111,17 +115,72 @@ function resetProjectRootCache() {
111
115
  _cachedStartDir = undefined;
112
116
  _cachedProjectRoot = undefined;
113
117
  }
118
+ /**
119
+ * Test-only override for getTmuxStateDir(). When set to a string, that value
120
+ * is returned instead of querying tmux. Set to undefined to restore real behavior.
121
+ */
122
+ let _tmuxStateDirOverride;
123
+ function _setTmuxStateDirOverride(value) {
124
+ _tmuxStateDirOverride = value;
125
+ }
126
+ /**
127
+ * Try to read RL_STATE_DIR from tmux session environment.
128
+ * Returns null if not in tmux or env var not set.
129
+ */
130
+ function getTmuxStateDir() {
131
+ // Test override takes precedence
132
+ if (_tmuxStateDirOverride !== undefined)
133
+ return _tmuxStateDirOverride;
134
+ try {
135
+ const tmuxEnv = process.env.TMUX;
136
+ if (!tmuxEnv)
137
+ return null;
138
+ const result = (0, node_child_process_1.execSync)("tmux show-environment RL_STATE_DIR 2>/dev/null", {
139
+ encoding: "utf-8",
140
+ stdio: ["pipe", "pipe", "pipe"],
141
+ timeout: 3000,
142
+ }).trim();
143
+ // Format: "RL_STATE_DIR=/path" or "-RL_STATE_DIR" (unset)
144
+ if (result.startsWith("RL_STATE_DIR=")) {
145
+ return result.slice("RL_STATE_DIR=".length);
146
+ }
147
+ return null;
148
+ }
149
+ catch {
150
+ return null;
151
+ }
152
+ }
153
+ /**
154
+ * Resolve state directory with priority (Proposal §3.10):
155
+ * 1. tmux show-environment RL_STATE_DIR (authoritative in auto mode)
156
+ * 2. $RL_STATE_DIR environment variable (manual mode / override)
157
+ * 3. findProjectRoot() upward search (fallback)
158
+ *
159
+ * Returns { dir, source } for diagnostics.
160
+ */
161
+ function resolveStateDir() {
162
+ // 1. tmux env (authoritative in auto mode)
163
+ const tmuxDir = getTmuxStateDir();
164
+ if (tmuxDir)
165
+ return { dir: tmuxDir, source: "tmux" };
166
+ // 2. Shell env var
167
+ const envDir = process.env.RL_STATE_DIR;
168
+ if (envDir)
169
+ return { dir: envDir, source: "env" };
170
+ // 3. Fallback: upward search
171
+ const root = findProjectRoot();
172
+ return { dir: path.join(root || process.cwd(), exports.STATE_DIR), source: "auto-detect" };
173
+ }
114
174
  /**
115
175
  * Get the .dual-agent/ state directory path.
116
176
  * When projectDir is explicitly given, uses that path directly.
117
- * When omitted, searches upward from CWD to find .dual-agent/ (like git).
177
+ * When omitted, uses priority resolution: tmux env shell env → upward search.
118
178
  */
119
179
  function stateDir(projectDir) {
120
180
  if (projectDir !== undefined) {
121
181
  return path.join(projectDir, exports.STATE_DIR);
122
182
  }
123
- const root = findProjectRoot();
124
- return path.join(root || process.cwd(), exports.STATE_DIR);
183
+ return resolveStateDir().dir;
125
184
  }
126
185
  /**
127
186
  * Check that a session exists. Searches upward from CWD when no explicit dir given.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-lisa-loop",
3
- "version": "0.3.10",
3
+ "version": "0.3.12",
4
4
  "description": "Turn-based dual-agent collaboration: Ralph codes, Lisa reviews, consensus required.",
5
5
  "bin": {
6
6
  "ralph-lisa": "dist/cli.js"
@@ -13,6 +13,6 @@ ralph-lisa whose-turn
13
13
  ## Rules
14
14
 
15
15
  - If output is `ralph`: You can proceed with your work
16
- - If output is `lisa`: STOP immediately and wait for Lisa's response
16
+ - If output is `lisa`: Wait for Lisa's feedback — do not take further action until your turn
17
17
 
18
18
  **NEVER skip this check before working.**
@@ -36,4 +36,4 @@ Detailed content here...
36
36
 
37
37
  ## After Submission
38
38
 
39
- The turn automatically passes to Lisa. You must STOP and wait.
39
+ The turn automatically passes to Lisa. Wait for her feedback — do not take further action until it is your turn again.
@@ -11,6 +11,6 @@ Check whose turn it is before taking any action.
11
11
  ## Rules
12
12
 
13
13
  - If output is `lisa`: You can proceed with your review
14
- - If output is `ralph`: STOP immediately and wait for Ralph's submission
14
+ - If output is `ralph`: Wait for Ralph's feedback — do not take further action until your turn
15
15
 
16
- **NEVER skip this check before working.**
16
+ **NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**
@@ -16,16 +16,16 @@ Then based on result:
16
16
  ```bash
17
17
  ralph-lisa read work.md
18
18
  ```
19
- - `ralph` → Say "Waiting for Ralph" and STOP
19
+ - `ralph` → Say "Waiting for Ralph's feedback" and wait — do not take further action until your turn
20
20
 
21
21
  **Do NOT wait for user to tell you to check. Check automatically.**
22
22
 
23
23
  ## CRITICAL: Turn-Based Rules
24
24
 
25
- - Output `lisa` → You can review
26
- - Output `ralph` → STOP immediately, tell user "Waiting for Ralph"
25
+ - Output `lisa` → You can review. If it's your turn but you cannot complete work (missing input, environment error, etc.), tell the user the specific reason and wait — do not retry repeatedly.
26
+ - Output `ralph` → Tell user it's not your turn. You may use subagents for preparatory work, but do not submit until it is your turn.
27
27
 
28
- **NEVER skip this check. NEVER work when it's not your turn.**
28
+ **NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**
29
29
 
30
30
  ## How to Submit
31
31
 
@@ -38,7 +38,7 @@ ralph-lisa submit-lisa --file .dual-agent/submit.md
38
38
 
39
39
  Inline mode (`ralph-lisa submit-lisa "[TAG] ..."`) is deprecated — it breaks on special characters. Use `--file` or `--stdin` instead.
40
40
 
41
- This automatically passes the turn to Ralph. Then you MUST STOP.
41
+ This automatically passes the turn to Ralph. Then wait do not take further action until it is your turn again.
42
42
 
43
43
  ## Tags You Can Use
44
44
 
@@ -59,7 +59,7 @@ This automatically passes the turn to Ralph. Then you MUST STOP.
59
59
  3. Review following the behavior spec below
60
60
  4. Write review to .dual-agent/submit.md
61
61
  5. ralph-lisa submit-lisa --file .dual-agent/submit.md
62
- 6. STOP and wait for Ralph
62
+ 6. Wait for Ralph's response
63
63
  7. ralph-lisa whose-turn → Check again
64
64
  8. Repeat
65
65
  ```
@@ -79,9 +79,13 @@ ralph-lisa history # View full history
79
79
 
80
80
  **Before every review**, check task alignment:
81
81
  1. Read task.md: `ralph-lisa read task.md`
82
- 2. Compare Ralph's work direction with the task goal
83
- 3. If misaligned: return [NEEDS_WORK] with "Direction misalignment" before reviewing code details
84
- 4. If aligned: proceed with normal code review
82
+ 2. Read context.md: `ralph-lisa read context.md` (if it exists — contains runtime directives)
83
+ 3. Compare Ralph's work direction with the task goal + context
84
+ 4. If misaligned: return [NEEDS_WORK] with "Direction misalignment" before reviewing code details
85
+ 5. If aligned: proceed with normal code review
86
+
87
+ **Auto-suggestion rule:** After 2 consecutive off-task NEEDS_WORK rounds, include in your review:
88
+ > "If the task scope has changed, ask Ralph to run `ralph-lisa scope-update` before resubmitting."
85
89
 
86
90
  This is your PRIMARY responsibility — catching direction drift early saves more time than catching code bugs.
87
91
 
@@ -92,24 +96,27 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
92
96
  | Requirement | Details |
93
97
  |-------------|---------|
94
98
  | Read task.md first | Before reviewing, run `ralph-lisa read task.md` to understand the user's original intent. Verify Ralph's work aligns with the task goal. |
99
+ | Read context.md | If it exists, run `ralph-lisa read context.md` for runtime directives and user decisions that supplement the task. Context is also auto-injected into work.md. |
95
100
  | Read actual code | For `[CODE]`/`[FIX]`, read the files listed in `Files Changed` section of work.md. Do NOT review based on Ralph's description alone. |
96
101
  | Cite `file:line` | Every `[PASS]` or `[NEEDS_WORK]` must reference at least one specific `file:line` location to support your conclusion. |
97
102
  | View full file context | When reviewing changes, read the full file (not just the diff snippet) to understand surrounding context. |
98
103
  | Check research | If the task involves reference implementations, protocols, or external APIs, verify that `[RESEARCH]` was submitted before `[CODE]`. |
104
+ | Verify test execution | For `[CODE]`/`[FIX]`, verify Test Results contain actual command, exit code, and pass/fail count — OR an explicit `Skipped:` with valid justification (e.g., config-only, no testable logic). If results look suspicious (missing numbers, generic text), return `[NEEDS_WORK]`. |
105
+ | Re-run tests | For `[CODE]`/`[FIX]` with executed tests, run the test command yourself to verify results. For skipped tests, verify the justification is valid. Report your findings in the review. |
106
+ | Verify test plan alignment | For `[CODE]`/`[FIX]`, verify Test Results match the test plan from the `[PLAN]` phase. If tests differ from the plan without explanation, return `[NEEDS_WORK]`. |
99
107
 
100
108
  ### SHOULD (professional standard)
101
109
 
102
110
  | Recommendation | Details |
103
111
  |----------------|---------|
104
112
  | Check test quality | Examine test files for coverage, assertion strength, and edge case handling. |
105
- | Verify test results | Confirm that Ralph's reported test results are plausible given the changes. |
106
113
  | Look for regressions | Consider whether changes could break existing functionality. |
107
114
 
108
115
  ### YOUR JUDGMENT (not prescribed)
109
116
 
110
117
  | Area | Details |
111
118
  |------|---------|
112
- | Run tests yourself | You may choose to run tests independently. This is your professional call. |
119
+ | Write verification tests | When static analysis is insufficient, write ad-hoc tests in `.dual-agent/tests/` and reference the output in your review. These are auto-cleaned on [CONSENSUS]. |
113
120
  | Review depth | Decide what to focus on based on risk and complexity. |
114
121
  | Accept or reject | Your verdict is your own professional judgment. |
115
122
 
@@ -119,8 +126,10 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
119
126
  - [ ] Logic correct
120
127
  - [ ] Edge cases handled
121
128
  - [ ] Tests adequate
122
- - [ ] **Test Results included in submission** (required for [CODE]/[FIX])
129
+ - [ ] **Test Results verified** `[CODE]`/`[FIX]` must have actual command + exit code + pass count, or explicit `Skipped:` with valid justification
130
+ - [ ] **Tests re-run** — You ran the test command yourself and confirmed results match (or verified skip justification)
123
131
  - [ ] **Research adequate** (if task involves reference implementations/protocols/external APIs, check that [RESEARCH] was submitted)
132
+ - [ ] **Research verified** — [RESEARCH] submissions must include at least one `Verified:` or `Evidence:` marker. Reject unverified claims.
124
133
  - [ ] **Factual claims verified** — For claims that a feature is "missing" or "not implemented", require `file:line` evidence or explicit acknowledgment that source code was not accessible
125
134
 
126
135
  ## Your Verdict is Advisory
@@ -143,10 +152,16 @@ Lisa: [NEEDS_WORK] ...
143
152
  Ralph: [FIX] Agree, because... / [CHALLENGE] Disagree, because...
144
153
  ```
145
154
 
155
+ ## Long-Running Tasks
156
+
157
+ For time-consuming operations (large-scale code review, batch test re-runs, deep research verification), consider using subagents or background tasks to work in parallel. Summarize subagent results before submitting your review.
158
+
159
+ This avoids blocking the main collaboration loop while waiting for slow operations to complete.
160
+
146
161
  ## Handling Disagreement
147
162
 
148
163
  If Ralph uses [CHALLENGE]:
149
164
  1. Consider his argument carefully
150
165
  2. If convinced → Change your verdict
151
166
  3. If not → Explain your reasoning with [CHALLENGE] or [DISCUSS]
152
- 4. After 5 rounds → Accept OVERRIDE or propose HANDOFF
167
+ 4. After 5 rounds → Deadlock auto-detected, watcher pauses for user intervention
@@ -16,16 +16,16 @@ Then based on result:
16
16
  ```bash
17
17
  ralph-lisa read review.md
18
18
  ```
19
- - `lisa` → Say "Waiting for Lisa" and STOP
19
+ - `lisa` → Say "Waiting for Lisa's feedback" and wait — do not take further action until your turn
20
20
 
21
21
  **Do NOT wait for user to tell you to check. Check automatically.**
22
22
 
23
23
  ## CRITICAL: Turn-Based Rules
24
24
 
25
- - Output `ralph` → You can work
26
- - Output `lisa` → STOP immediately, tell user "Waiting for Lisa"
25
+ - Output `ralph` → You can work. If it's your turn but you cannot complete work (missing input, environment error, etc.), tell the user the specific reason and wait — do not retry repeatedly.
26
+ - Output `lisa` → Tell user it's not your turn. You may use subagents for preparatory work, but do not submit until it is your turn.
27
27
 
28
- **NEVER skip this check. NEVER work when it's not your turn.**
28
+ **NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**
29
29
 
30
30
  ## How to Submit
31
31
 
@@ -38,7 +38,7 @@ ralph-lisa submit-ralph --file .dual-agent/submit.md
38
38
 
39
39
  Inline mode (`ralph-lisa submit-ralph "[TAG] ..."`) is deprecated — it breaks on special characters. Use `--file` or `--stdin` instead.
40
40
 
41
- This automatically passes the turn to Lisa. Then you MUST STOP.
41
+ This automatically passes the turn to Lisa. Then wait do not take further action until it is your turn again.
42
42
 
43
43
  ## Tags You Can Use
44
44
 
@@ -65,7 +65,8 @@ Research content should include:
65
65
  - Reference implementation: file_path:line_number
66
66
  - Key types: type_name (file:line_number)
67
67
  - Data format: actual verified structure
68
- - Verification: how assumptions were confirmed
68
+ - Verified: how each claim was confirmed (required — at least one `Verified:` or `Evidence:` marker per submission)
69
+ - Evidence: source of truth (file path, command output, API response)
69
70
 
70
71
  This is required when the task involves reference implementations, protocols, or external APIs. Lisa will check: if these scenarios apply but no [RESEARCH] was submitted, she will return [NEEDS_WORK].
71
72
 
@@ -73,10 +74,15 @@ This is required when the task involves reference implementations, protocols, or
73
74
 
74
75
  **[CODE] or [FIX] submissions must include:**
75
76
 
76
- ### Test Results
77
- - Test command: `npm test` / `pytest` / ...
78
- - Result: Passed / Failed (reason)
79
- - If skipping tests, must explain why
77
+ ### Test Results (must be from actual execution, not fabricated)
78
+ - Test command: the exact command you ran (e.g., `pytest -x`, `npm test`)
79
+ - Exit code: 0 (all passed) or non-zero (failures)
80
+ - Result: X/Y passed (concrete numbers)
81
+ - Failed output: if any failures, include last 30 lines of error output
82
+ - If skipping tests, must explain why — Lisa will judge whether the reason is valid
83
+ - Tests must follow the test plan established in the `[PLAN]` phase
84
+ - Test Results must reference the planned test command
85
+ - If the test plan changed, explain why in the submission
80
86
 
81
87
  ## Round 1: Mandatory [PLAN]
82
88
 
@@ -85,6 +91,13 @@ your understanding of the task before you start coding. Include:
85
91
  - Your understanding of the task goal
86
92
  - Proposed approach
87
93
  - Expected deliverables
94
+ - **Test plan** (mandatory):
95
+ - Test command (e.g., `pytest -x`, `npm test`, `go test ./...`, `flutter test`)
96
+ - Expected test coverage scope
97
+ - If no test framework exists, explain verification approach
98
+ - **Quality gate commands** (recommended): Identify lint/format/type-check commands for the project
99
+ - Examples: `npm run lint`, `ruff check .`, `go vet ./...`
100
+ - These can be configured via `RL_RALPH_GATE` + `RL_GATE_COMMANDS` for auto mode
88
101
 
89
102
  ## Workflow
90
103
 
@@ -95,7 +108,7 @@ your understanding of the task before you start coding. Include:
95
108
  → Submit [RESEARCH] first, wait for Lisa's review
96
109
  4. Write content to .dual-agent/submit.md
97
110
  5. ralph-lisa submit-ralph --file .dual-agent/submit.md
98
- 6. STOP and wait for Lisa
111
+ 6. Wait for Lisa's response
99
112
  7. ralph-lisa whose-turn → Check again
100
113
  8. (If ralph) Read Lisa's feedback: ralph-lisa read review.md
101
114
  9. Respond or proceed based on feedback
@@ -120,17 +133,43 @@ After context compaction, run `ralph-lisa recap` to recover current state:
120
133
 
121
134
  ## Handling Lisa's Feedback
122
135
 
123
- - `[PASS]` → Confirm consensus, then `/next-step`
136
+ - `[PASS]` → First check PASS quality:
137
+ - Does Lisa's PASS include substantive review content (specific file checks, test verification, technical analysis)?
138
+ - If it's a rubber-stamp PASS (no specific reasons, no code references, no test verification), submit `[CHALLENGE]` requesting substantive review — **at most once**
139
+ - If Lisa resubmits PASS after your challenge (even if still thin), accept and submit `[CONSENSUS]` to avoid infinite loop
140
+ - If it's a substantive PASS and you agree, submit `[CONSENSUS]`
124
141
  - `[NEEDS_WORK]` → You MUST explain your reasoning:
125
142
  - If you agree: explain WHY Lisa is right, then submit [FIX]
126
143
  - If you disagree: use [CHALLENGE] to provide counter-argument
127
144
  - **Never submit a bare [FIX] without explanation. No silent acceptance.**
128
- - After 5 rounds deadlock OVERRIDE or HANDOFF
145
+ - **You CANNOT submit [CODE]/[RESEARCH]/[PLAN] after NEEDS_WORK** — the CLI will reject it. Address the feedback first, or run `ralph-lisa scope-update` if the task scope changed.
146
+ - After 8 consecutive NEEDS_WORK rounds → DEADLOCK auto-detected, watcher pauses for user intervention
147
+
148
+ ## Submission Test Requirements
149
+
150
+ **[CODE] or [FIX] must report both regression and new tests:**
151
+
152
+ ```markdown
153
+ ### Test Results
154
+ - Regression: npm test → 150/150 pass (no breakage)
155
+ - New tests: 3 added
156
+ - resolveConfigDir.test.ts: platform path resolution (3 cases)
157
+ - ipc-shape.test.ts: getConversationMessages returns TMessage[]
158
+ ```
159
+
160
+ - "New tests: 0" requires justification (valid: pure UI layout, config-only change)
161
+ - Invalid excuse: "requires E2E" for pure functions, data shape validation, or mock-able IPC
162
+
163
+ ## Long-Running Tasks
164
+
165
+ For time-consuming operations (large-scale code search, batch test runs, CI waits, complex refactoring), consider using subagents or background tasks to work in parallel. Summarize subagent results before submitting.
166
+
167
+ This avoids blocking the main collaboration loop while waiting for slow operations to complete.
129
168
 
130
169
  ## Your Responsibilities
131
170
 
132
171
  1. Planning and coding
133
172
  2. Research before coding (when involving reference implementations/protocols/APIs)
134
- 3. Writing and running tests, including Test Results in submissions
173
+ 3. Writing and running tests **both regression and new unit tests**
135
174
  4. Responding to Lisa's reviews with reasoning
136
175
  5. Getting consensus before proceeding
@@ -22,6 +22,6 @@
22
22
  "rules": {
23
23
  "consensus": "Both parties must agree before proceeding",
24
24
  "verdict": "PASS/NEEDS_WORK is advisory, not a command",
25
- "deadlock": "After 5 rounds, use OVERRIDE or HANDOFF"
25
+ "deadlock": "After 8 consecutive NEEDS_WORK rounds, watcher pauses for user intervention"
26
26
  }
27
27
  }