ralph-lisa-loop 0.3.10 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/policy.d.ts CHANGED
@@ -21,6 +21,7 @@ export declare function checkRalph(tag: string, content: string): PolicyViolatio
21
21
  * Check Lisa's submission for policy violations.
22
22
  */
23
23
  export declare function checkLisa(tag: string, content: string): PolicyViolation[];
24
+ export declare function checkNeedsWorkResponse(ralphTag: string, lastLisaTag: string): PolicyViolation[];
24
25
  /**
25
26
  * Run policy checks based on mode.
26
27
  * Returns { proceed, violations } so callers can format output clearly (IMP-4).
package/dist/policy.js CHANGED
@@ -12,6 +12,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.getPolicyMode = getPolicyMode;
13
13
  exports.checkRalph = checkRalph;
14
14
  exports.checkLisa = checkLisa;
15
+ exports.checkNeedsWorkResponse = checkNeedsWorkResponse;
15
16
  exports.runPolicyCheck = runPolicyCheck;
16
17
  function getPolicyMode() {
17
18
  const mode = process.env.RL_POLICY_MODE || "warn";
@@ -40,6 +41,21 @@ function checkRalph(tag, content) {
40
41
  message: `[${tag}] submission must include at least one file:line reference (e.g., commands.ts:42).`,
41
42
  });
42
43
  }
44
+ // New tests count check (Proposal §3.6)
45
+ // Warn if "New tests: 0" without valid justification
46
+ const lc = content.toLowerCase();
47
+ const hasNewTests = /new tests?:\s*[1-9]/i.test(content);
48
+ const hasZeroTests = /new tests?:\s*0/i.test(content);
49
+ if (hasZeroTests && !hasNewTests) {
50
+ // Check for valid justification keywords
51
+ const hasJustification = /\b(ui.only|layout.only|config.only|no.testable.logic|template.only|documentation)\b/i.test(content);
52
+ if (!hasJustification) {
53
+ violations.push({
54
+ rule: "new-tests-required",
55
+ message: `[${tag}] reports 0 new tests without valid justification. Add unit tests or explain why (e.g., "config-only change").`,
56
+ });
57
+ }
58
+ }
43
59
  }
44
60
  // [RESEARCH] must have substance
45
61
  if (tag === "RESEARCH") {
@@ -58,6 +74,17 @@ function checkRalph(tag, content) {
58
74
  message: "[RESEARCH] submission needs at least 2 fields (reference/key types/data structure/verification) or equivalent summary with evidence.",
59
75
  });
60
76
  }
77
+ // RESEARCH verification markers (Proposal §3.9)
78
+ // Checks for at least one global Verified:/Evidence: marker per submission.
79
+ // Per-claim enforcement is not mechanically feasible — Lisa reviews claim-level rigor.
80
+ const hasVerifiedMarker = /\bverified\s*:/i.test(content);
81
+ const hasEvidenceMarker = /\bevidence\s*:/i.test(content);
82
+ if (!hasVerifiedMarker && !hasEvidenceMarker) {
83
+ violations.push({
84
+ rule: "research-verification",
85
+ message: '[RESEARCH] submission should include at least one "Verified:" or "Evidence:" marker to support factual claims.',
86
+ });
87
+ }
61
88
  }
62
89
  return violations;
63
90
  }
@@ -86,6 +113,26 @@ function checkLisa(tag, content) {
86
113
  }
87
114
  return violations;
88
115
  }
116
+ /**
117
+ * NEEDS_WORK response enforcement (Proposal §3.2).
118
+ * When Lisa's last review was [NEEDS_WORK], Ralph must respond with
119
+ * [FIX], [CHALLENGE], [DISCUSS], or [QUESTION] — not unrelated [CODE]/[RESEARCH]/[PLAN].
120
+ */
121
+ const NEEDS_WORK_ALLOWED_TAGS = new Set(["FIX", "CHALLENGE", "DISCUSS", "QUESTION"]);
122
+ const NEEDS_WORK_BLOCKED_TAGS = new Set(["CODE", "RESEARCH", "PLAN", "CONSENSUS"]);
123
+ function checkNeedsWorkResponse(ralphTag, lastLisaTag) {
124
+ if (lastLisaTag !== "NEEDS_WORK")
125
+ return [];
126
+ if (NEEDS_WORK_ALLOWED_TAGS.has(ralphTag))
127
+ return [];
128
+ if (NEEDS_WORK_BLOCKED_TAGS.has(ralphTag)) {
129
+ return [{
130
+ rule: "needs-work-response",
131
+ message: `[${ralphTag}] submitted after Lisa's [NEEDS_WORK]. You must respond with [FIX], [CHALLENGE], [DISCUSS], or [QUESTION] first. If the task scope changed, run: ralph-lisa scope-update "new scope"`,
132
+ }];
133
+ }
134
+ return [];
135
+ }
89
136
  /**
90
137
  * Run policy checks based on mode.
91
138
  * Returns { proceed, violations } so callers can format output clearly (IMP-4).
package/dist/state.d.ts CHANGED
@@ -10,10 +10,28 @@ export declare function findProjectRoot(startDir?: string): string | null;
10
10
  * Reset the cached project root. Used in tests.
11
11
  */
12
12
  export declare function resetProjectRootCache(): void;
13
+ export declare function _setTmuxStateDirOverride(value: string | null | undefined): void;
14
+ /**
15
+ * Try to read RL_STATE_DIR from tmux session environment.
16
+ * Returns null if not in tmux or env var not set.
17
+ */
18
+ export declare function getTmuxStateDir(): string | null;
19
+ /**
20
+ * Resolve state directory with priority (Proposal §3.10):
21
+ * 1. tmux show-environment RL_STATE_DIR (authoritative in auto mode)
22
+ * 2. $RL_STATE_DIR environment variable (manual mode / override)
23
+ * 3. findProjectRoot() upward search (fallback)
24
+ *
25
+ * Returns { dir, source } for diagnostics.
26
+ */
27
+ export declare function resolveStateDir(): {
28
+ dir: string;
29
+ source: "tmux" | "env" | "auto-detect";
30
+ };
13
31
  /**
14
32
  * Get the .dual-agent/ state directory path.
15
33
  * When projectDir is explicitly given, uses that path directly.
16
- * When omitted, searches upward from CWD to find .dual-agent/ (like git).
34
+ * When omitted, uses priority resolution: tmux env shell env → upward search.
17
35
  */
18
36
  export declare function stateDir(projectDir?: string): string;
19
37
  /**
package/dist/state.js CHANGED
@@ -40,6 +40,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
40
40
  exports.VALID_TAGS = exports.ARCHIVE_DIR = exports.STATE_DIR = void 0;
41
41
  exports.findProjectRoot = findProjectRoot;
42
42
  exports.resetProjectRootCache = resetProjectRootCache;
43
+ exports._setTmuxStateDirOverride = _setTmuxStateDirOverride;
44
+ exports.getTmuxStateDir = getTmuxStateDir;
45
+ exports.resolveStateDir = resolveStateDir;
43
46
  exports.stateDir = stateDir;
44
47
  exports.checkSession = checkSession;
45
48
  exports.readFile = readFile;
@@ -59,6 +62,7 @@ exports.appendHistory = appendHistory;
59
62
  exports.updateLastAction = updateLastAction;
60
63
  const fs = __importStar(require("node:fs"));
61
64
  const path = __importStar(require("node:path"));
65
+ const node_child_process_1 = require("node:child_process");
62
66
  exports.STATE_DIR = ".dual-agent";
63
67
  exports.ARCHIVE_DIR = ".dual-agent-archive";
64
68
  exports.VALID_TAGS = "PLAN|RESEARCH|CODE|FIX|PASS|NEEDS_WORK|CHALLENGE|DISCUSS|QUESTION|CONSENSUS";
@@ -111,17 +115,72 @@ function resetProjectRootCache() {
111
115
  _cachedStartDir = undefined;
112
116
  _cachedProjectRoot = undefined;
113
117
  }
118
+ /**
119
+ * Test-only override for getTmuxStateDir(). When set to a string, that value
120
+ * is returned instead of querying tmux. Set to undefined to restore real behavior.
121
+ */
122
+ let _tmuxStateDirOverride;
123
+ function _setTmuxStateDirOverride(value) {
124
+ _tmuxStateDirOverride = value;
125
+ }
126
+ /**
127
+ * Try to read RL_STATE_DIR from tmux session environment.
128
+ * Returns null if not in tmux or env var not set.
129
+ */
130
+ function getTmuxStateDir() {
131
+ // Test override takes precedence
132
+ if (_tmuxStateDirOverride !== undefined)
133
+ return _tmuxStateDirOverride;
134
+ try {
135
+ const tmuxEnv = process.env.TMUX;
136
+ if (!tmuxEnv)
137
+ return null;
138
+ const result = (0, node_child_process_1.execSync)("tmux show-environment RL_STATE_DIR 2>/dev/null", {
139
+ encoding: "utf-8",
140
+ stdio: ["pipe", "pipe", "pipe"],
141
+ timeout: 3000,
142
+ }).trim();
143
+ // Format: "RL_STATE_DIR=/path" or "-RL_STATE_DIR" (unset)
144
+ if (result.startsWith("RL_STATE_DIR=")) {
145
+ return result.slice("RL_STATE_DIR=".length);
146
+ }
147
+ return null;
148
+ }
149
+ catch {
150
+ return null;
151
+ }
152
+ }
153
+ /**
154
+ * Resolve state directory with priority (Proposal §3.10):
155
+ * 1. tmux show-environment RL_STATE_DIR (authoritative in auto mode)
156
+ * 2. $RL_STATE_DIR environment variable (manual mode / override)
157
+ * 3. findProjectRoot() upward search (fallback)
158
+ *
159
+ * Returns { dir, source } for diagnostics.
160
+ */
161
+ function resolveStateDir() {
162
+ // 1. tmux env (authoritative in auto mode)
163
+ const tmuxDir = getTmuxStateDir();
164
+ if (tmuxDir)
165
+ return { dir: tmuxDir, source: "tmux" };
166
+ // 2. Shell env var
167
+ const envDir = process.env.RL_STATE_DIR;
168
+ if (envDir)
169
+ return { dir: envDir, source: "env" };
170
+ // 3. Fallback: upward search
171
+ const root = findProjectRoot();
172
+ return { dir: path.join(root || process.cwd(), exports.STATE_DIR), source: "auto-detect" };
173
+ }
114
174
  /**
115
175
  * Get the .dual-agent/ state directory path.
116
176
  * When projectDir is explicitly given, uses that path directly.
117
- * When omitted, searches upward from CWD to find .dual-agent/ (like git).
177
+ * When omitted, uses priority resolution: tmux env shell env → upward search.
118
178
  */
119
179
  function stateDir(projectDir) {
120
180
  if (projectDir !== undefined) {
121
181
  return path.join(projectDir, exports.STATE_DIR);
122
182
  }
123
- const root = findProjectRoot();
124
- return path.join(root || process.cwd(), exports.STATE_DIR);
183
+ return resolveStateDir().dir;
125
184
  }
126
185
  /**
127
186
  * Check that a session exists. Searches upward from CWD when no explicit dir given.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-lisa-loop",
3
- "version": "0.3.10",
3
+ "version": "0.3.11",
4
4
  "description": "Turn-based dual-agent collaboration: Ralph codes, Lisa reviews, consensus required.",
5
5
  "bin": {
6
6
  "ralph-lisa": "dist/cli.js"
@@ -79,9 +79,13 @@ ralph-lisa history # View full history
79
79
 
80
80
  **Before every review**, check task alignment:
81
81
  1. Read task.md: `ralph-lisa read task.md`
82
- 2. Compare Ralph's work direction with the task goal
83
- 3. If misaligned: return [NEEDS_WORK] with "Direction misalignment" before reviewing code details
84
- 4. If aligned: proceed with normal code review
82
+ 2. Read context.md: `ralph-lisa read context.md` (if it exists — contains runtime directives)
83
+ 3. Compare Ralph's work direction with the task goal + context
84
+ 4. If misaligned: return [NEEDS_WORK] with "Direction misalignment" before reviewing code details
85
+ 5. If aligned: proceed with normal code review
86
+
87
+ **Auto-suggestion rule:** After 2 consecutive off-task NEEDS_WORK rounds, include in your review:
88
+ > "If the task scope has changed, ask Ralph to run `ralph-lisa scope-update` before resubmitting."
85
89
 
86
90
  This is your PRIMARY responsibility — catching direction drift early saves more time than catching code bugs.
87
91
 
@@ -92,6 +96,7 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
92
96
  | Requirement | Details |
93
97
  |-------------|---------|
94
98
  | Read task.md first | Before reviewing, run `ralph-lisa read task.md` to understand the user's original intent. Verify Ralph's work aligns with the task goal. |
99
+ | Read context.md | If it exists, run `ralph-lisa read context.md` for runtime directives and user decisions that supplement the task. Context is also auto-injected into work.md. |
95
100
  | Read actual code | For `[CODE]`/`[FIX]`, read the files listed in `Files Changed` section of work.md. Do NOT review based on Ralph's description alone. |
96
101
  | Cite `file:line` | Every `[PASS]` or `[NEEDS_WORK]` must reference at least one specific `file:line` location to support your conclusion. |
97
102
  | View full file context | When reviewing changes, read the full file (not just the diff snippet) to understand surrounding context. |
@@ -110,6 +115,7 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
110
115
  | Area | Details |
111
116
  |------|---------|
112
117
  | Run tests yourself | You may choose to run tests independently. This is your professional call. |
118
+ | Write verification tests | When static analysis is insufficient, write ad-hoc tests in `.dual-agent/tests/` and reference the output in your review. These are auto-cleaned on [CONSENSUS]. |
113
119
  | Review depth | Decide what to focus on based on risk and complexity. |
114
120
  | Accept or reject | Your verdict is your own professional judgment. |
115
121
 
@@ -121,6 +127,7 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
121
127
  - [ ] Tests adequate
122
128
  - [ ] **Test Results included in submission** (required for [CODE]/[FIX])
123
129
  - [ ] **Research adequate** (if task involves reference implementations/protocols/external APIs, check that [RESEARCH] was submitted)
130
+ - [ ] **Research verified** — [RESEARCH] submissions must include at least one `Verified:` or `Evidence:` marker. Reject unverified claims.
124
131
  - [ ] **Factual claims verified** — For claims that a feature is "missing" or "not implemented", require `file:line` evidence or explicit acknowledgment that source code was not accessible
125
132
 
126
133
  ## Your Verdict is Advisory
@@ -65,7 +65,8 @@ Research content should include:
65
65
  - Reference implementation: file_path:line_number
66
66
  - Key types: type_name (file:line_number)
67
67
  - Data format: actual verified structure
68
- - Verification: how assumptions were confirmed
68
+ - Verified: how each claim was confirmed (required — at least one `Verified:` or `Evidence:` marker per submission)
69
+ - Evidence: source of truth (file path, command output, API response)
69
70
 
70
71
  This is required when the task involves reference implementations, protocols, or external APIs. Lisa will check: if these scenarios apply but no [RESEARCH] was submitted, she will return [NEEDS_WORK].
71
72
 
@@ -120,17 +121,33 @@ After context compaction, run `ralph-lisa recap` to recover current state:
120
121
 
121
122
  ## Handling Lisa's Feedback
122
123
 
123
- - `[PASS]` → Confirm consensus, then `/next-step`
124
+ - `[PASS]` → Submit [CONSENSUS] to close. Lisa's [PASS] already approves — no need to wait for her [CONSENSUS] back (single-round consensus).
124
125
  - `[NEEDS_WORK]` → You MUST explain your reasoning:
125
126
  - If you agree: explain WHY Lisa is right, then submit [FIX]
126
127
  - If you disagree: use [CHALLENGE] to provide counter-argument
127
128
  - **Never submit a bare [FIX] without explanation. No silent acceptance.**
128
- - After 5 rounds deadlock OVERRIDE or HANDOFF
129
+ - **You CANNOT submit [CODE]/[RESEARCH]/[PLAN] after NEEDS_WORK** — the CLI will reject it. Address the feedback first, or run `ralph-lisa scope-update` if the task scope changed.
130
+ - After 3 consecutive NEEDS_WORK rounds → DEADLOCK auto-detected, watcher pauses for user intervention
131
+
132
+ ## Submission Test Requirements
133
+
134
+ **[CODE] or [FIX] must report both regression and new tests:**
135
+
136
+ ```markdown
137
+ ### Test Results
138
+ - Regression: npm test → 150/150 pass (no breakage)
139
+ - New tests: 3 added
140
+ - resolveConfigDir.test.ts: platform path resolution (3 cases)
141
+ - ipc-shape.test.ts: getConversationMessages returns TMessage[]
142
+ ```
143
+
144
+ - "New tests: 0" requires justification (valid: pure UI layout, config-only change)
145
+ - Invalid excuse: "requires E2E" for pure functions, data shape validation, or mock-able IPC
129
146
 
130
147
  ## Your Responsibilities
131
148
 
132
149
  1. Planning and coding
133
150
  2. Research before coding (when involving reference implementations/protocols/APIs)
134
- 3. Writing and running tests, including Test Results in submissions
151
+ 3. Writing and running tests **both regression and new unit tests**
135
152
  4. Responding to Lisa's reviews with reasoning
136
153
  5. Getting consensus before proceeding