npm - tmux-team - Versions diffs - 3.2.1 → 3.2.2 - Mend

tmux-team 3.2.1 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +15 -15
package/src/commands/talk.test.ts +123 -116
package/src/commands/talk.ts +159 -54

package/package.json CHANGED Viewed

@@ -1,25 +1,12 @@
 {
   "name": "tmux-team",
-  "version": "3.2.1",
+  "version": "3.2.2",
   "description": "CLI tool for AI agent collaboration in tmux - manage cross-pane communication",
   "type": "module",
   "bin": {
     "tmux-team": "./bin/tmux-team",
     "tmt": "./bin/tmux-team"
   },
-  "scripts": {
-    "dev": "tsx src/cli.ts",
-    "tmt": "./bin/tmux-team",
-    "test": "npm run test:run",
-    "test:watch": "vitest",
-    "test:run": "vitest run --coverage && node scripts/check-coverage.mjs --threshold 95",
-    "lint": "oxlint src/",
-    "lint:fix": "oxlint src/ --fix",
-    "format": "prettier --write src/",
-    "format:check": "prettier --check src/",
-    "type:check": "tsc --noEmit",
-    "check": "npm run type:check && npm run lint && npm run format:check"
-  },
   "keywords": [
     "tmux",
     "cli",
@@ -56,5 +43,18 @@
     "prettier": "^3.7.4",
     "typescript": "^5.3.0",
     "vitest": "^1.2.0"
+  },
+  "scripts": {
+    "dev": "tsx src/cli.ts",
+    "tmt": "./bin/tmux-team",
+    "test": "pnpm test:run",
+    "test:watch": "vitest",
+    "test:run": "vitest run --coverage && node scripts/check-coverage.mjs --threshold 95",
+    "lint": "oxlint src/",
+    "lint:fix": "oxlint src/ --fix",
+    "format": "prettier --write src/",
+    "format:check": "prettier --check src/",
+    "type:check": "tsc --noEmit",
+    "check": "pnpm type:check && pnpm lint && pnpm format:check"
   }
-}
+}

package/src/commands/talk.test.ts CHANGED Viewed

@@ -14,9 +14,12 @@ import { cmdTalk } from './talk.js';
 // Constants
 // ─────────────────────────────────────────────────────────────
-// Regex to match new end marker format
+// Regex to match the END marker (as printed by agent) - used to find markers in output
 const END_MARKER_REGEX = /---RESPONSE-END-([a-f0-9]+)---/;
+// Regex to extract nonce from instruction (instruction says "RESPONSE-END-xxxx" without dashes)
+const INSTRUCTION_NONCE_REGEX = /RESPONSE-END-([a-f0-9]+)/;
 // ─────────────────────────────────────────────────────────────
 // Test utilities
 // ─────────────────────────────────────────────────────────────
@@ -457,11 +460,13 @@ describe('cmdTalk - --wait mode', () => {
   });
   // Helper: generate mock capture output with proper marker structure
-  // The end marker must appear TWICE: once in instruction, once from "agent"
-  // New format: ---RESPONSE-END-NONCE---
+  // New protocol: instruction describes the marker verbally (doesn't contain literal marker)
+  // Include the instruction line so extraction can anchor to it for clean output
   function mockCompleteResponse(nonce: string, response: string): string {
+    const instruction = `When you finish responding, output a completion marker on its own line: three dashes, RESPONSE-END-${nonce}, three dashes (no spaces).`;
     const endMarker = `---RESPONSE-END-${nonce}---`;
-    return `Hello\n\nWhen you finish responding, print this exact line:\n${endMarker}\n${response}\n${endMarker}`;
+    // Simulate: scrollback, user message with instruction, agent response, marker
+    return `Some scrollback content\nUser message here\n\n${instruction}\n${response}\n${endMarker}`;
   }
   it('appends nonce instruction to message', async () => {
@@ -471,9 +476,9 @@ describe('cmdTalk - --wait mode', () => {
     tmux.capture = () => {
       captureCount++;
       if (captureCount === 1) return ''; // Baseline
-      // Return marker on second capture - must include instruction AND agent's end marker
+      // Extract nonce from instruction and return agent response with marker
       const sent = tmux.sends[0]?.message || '';
-      const match = sent.match(END_MARKER_REGEX);
+      const match = sent.match(INSTRUCTION_NONCE_REGEX);
       return match ? mockCompleteResponse(match[1], 'Response here') : '';
     };
@@ -494,8 +499,11 @@ describe('cmdTalk - --wait mode', () => {
     await cmdTalk(ctx, 'claude', 'Hello');
     expect(tmux.sends).toHaveLength(1);
-    expect(tmux.sends[0].message).toContain('When you finish responding, print this exact line:');
-    expect(tmux.sends[0].message).toMatch(/---RESPONSE-END-[a-f0-9]+---/);
+    // New protocol: instruction describes marker verbally, doesn't contain literal marker
+    expect(tmux.sends[0].message).toContain('output a completion marker on its own line');
+    expect(tmux.sends[0].message).toContain('three dashes, RESPONSE-END-');
+    // Should NOT contain the literal marker format
+    expect(tmux.sends[0].message).not.toMatch(/---RESPONSE-END-[a-f0-9]+---/);
   });
   it('detects nonce marker and extracts response', async () => {
@@ -507,9 +515,9 @@ describe('cmdTalk - --wait mode', () => {
     tmux.capture = () => {
       captureCount++;
       if (captureCount === 1) return 'baseline content';
-      // Extract nonce from sent message and return matching marker
+      // Extract nonce from instruction and return agent response with marker
       const sent = tmux.sends[0]?.message || '';
-      const match = sent.match(END_MARKER_REGEX);
+      const match = sent.match(INSTRUCTION_NONCE_REGEX);
       if (match) {
         return mockCompleteResponse(match[1], 'Agent response here');
       }
@@ -575,20 +583,20 @@ describe('cmdTalk - --wait mode', () => {
     expect(output.error).toContain('Timed out');
   });
-  it('isolates response using end markers in scrollback', async () => {
+  it('isolates response using end marker in scrollback', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
     const oldContent = 'Previous conversation\nOld content here';
     tmux.capture = () => {
-      // Simulate scrollback with old content, then our instruction (with end marker), response, and agent's end marker
+      // Simulate scrollback with old content, then agent response with marker
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        const endMarker = `---RESPONSE-END-${endMatch[1]}---`;
-        // Must include end marker TWICE: once in instruction, once from "agent"
-        return `${oldContent}\n\nMessage content here\n\nWhen you finish responding, print this exact line:\n${endMarker}\nNew response content\n\n${endMarker}`;
+      const nonceMatch = sent.match(INSTRUCTION_NONCE_REGEX);
+      if (nonceMatch) {
+        const endMarker = `---RESPONSE-END-${nonceMatch[1]}---`;
+        // Only ONE marker from agent
+        return `${oldContent}\nNew response content\n\n${endMarker}`;
       }
       return oldContent;
     };
@@ -612,9 +620,6 @@ describe('cmdTalk - --wait mode', () => {
     const output = ui.jsonOutput[0] as Record<string, unknown>;
     expect(output.status).toBe('completed');
-    // Response should NOT include old content
-    expect(output.response).not.toContain('Previous conversation');
-    expect(output.response).not.toContain('Old content here');
     // Response should contain the actual response content
     expect(output.response).toContain('New response content');
   });
@@ -627,7 +632,7 @@ describe('cmdTalk - --wait mode', () => {
       captureCount++;
       if (captureCount === 1) return '';
       const sent = tmux.sends[0]?.message || '';
-      const match = sent.match(END_MARKER_REGEX);
+      const match = sent.match(INSTRUCTION_NONCE_REGEX);
       return match ? mockCompleteResponse(match[1], 'Done') : '';
     };
@@ -695,7 +700,7 @@ describe('cmdTalk - --wait mode', () => {
     // Mock send to capture the nonce for each pane
     tmux.send = (pane: string, msg: string) => {
-      const match = msg.match(END_MARKER_REGEX);
+      const match = msg.match(INSTRUCTION_NONCE_REGEX);
       if (match) {
         noncesByPane[pane] = match[1];
       }
@@ -743,17 +748,18 @@ describe('cmdTalk - --wait mode', () => {
     const noncesByPane: Record<string, string> = {};
     tmux.send = (pane: string, msg: string) => {
-      const match = msg.match(END_MARKER_REGEX);
+      const match = msg.match(INSTRUCTION_NONCE_REGEX);
       if (match) {
         noncesByPane[pane] = match[1];
       }
     };
-    // Only pane 10.1 responds, 10.2 times out
+    // Only pane 10.1 responds with end marker, 10.2 never has end marker
     tmux.capture = (pane: string) => {
       if (pane === '10.1' && noncesByPane[pane]) {
         return mockCompleteResponse(noncesByPane[pane], 'Response from codex');
       }
+      // gemini has no end marker - still typing
       return 'still working...';
     };
@@ -763,10 +769,10 @@ describe('cmdTalk - --wait mode', () => {
       ui,
       tmux,
       paths,
-      flags: { wait: true, timeout: 0.1, json: true },
+      flags: { wait: true, timeout: 0.5, json: true },
       config: {
         defaults: {
-          timeout: 0.1,
+          timeout: 0.5,
           pollInterval: 0.02,
           captureLines: 100,
           preambleEvery: 3,
@@ -781,7 +787,7 @@ describe('cmdTalk - --wait mode', () => {
     try {
       await cmdTalk(ctx, 'all', 'Hello');
     } catch {
-      // Expected timeout exit
+      // Expected timeout exit for gemini
     }
     // Should have JSON output with both results
@@ -801,7 +807,7 @@ describe('cmdTalk - --wait mode', () => {
     const nonces: string[] = [];
     tmux.send = (_pane: string, msg: string) => {
-      const match = msg.match(END_MARKER_REGEX);
+      const match = msg.match(INSTRUCTION_NONCE_REGEX);
       if (match) {
         nonces.push(match[1]);
       }
@@ -910,24 +916,17 @@ describe('cmdTalk - nonce collision handling', () => {
       if (captureCount === 1) {
         return `Old question\nOld response\n${oldEndMarker}`;
       }
-      // New capture still has old markers but new request markers not complete yet
+      // New capture still has old markers but agent hasn't responded yet
       if (captureCount === 2) {
-        const sent = tmux.sends[0]?.message || '';
-        const endMatch = sent.match(END_MARKER_REGEX);
-        if (endMatch) {
-          const newEndMarker = `---RESPONSE-END-${endMatch[1]}---`;
-          // Old content + new instruction (only one occurrence of new marker so far)
-          return `Old question\nOld response\n${oldEndMarker}\n\nNew question asked\n\nWhen you finish responding, print this exact line:\n${newEndMarker}`;
-        }
         return `Old question\nOld response\n${oldEndMarker}`;
       }
-      // Finally, new end marker appears - must have TWO occurrences of new end marker
+      // Finally, new end marker appears from agent
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        const newEndMarker = `---RESPONSE-END-${endMatch[1]}---`;
-        // Old markers in scrollback + new instruction (with end marker) + response + agent's end marker
-        return `Old question\nOld response\n${oldEndMarker}\n\nNew question asked\n\nWhen you finish responding, print this exact line:\n${newEndMarker}\nNew response\n\n${newEndMarker}`;
+      const nonceMatch = sent.match(INSTRUCTION_NONCE_REGEX);
+      if (nonceMatch) {
+        const newEndMarker = `---RESPONSE-END-${nonceMatch[1]}---`;
+        // Old markers in scrollback + new response + agent's end marker
+        return `Old question\nOld response\n${oldEndMarker}\nNew response\n\n${newEndMarker}`;
       }
       return `Old question\nOld response\n${oldEndMarker}`;
     };
@@ -951,10 +950,12 @@ describe('cmdTalk - nonce collision handling', () => {
     const output = ui.jsonOutput[0] as Record<string, unknown>;
     expect(output.status).toBe('completed');
-    // Response should be from the new markers, not triggered by old markers
-    expect(output.response as string).not.toContain('Old response');
-    expect(output.response as string).not.toContain('Old question');
+    // The key behavior: old markers with different nonce don't trigger completion
+    // We waited for the NEW marker with correct nonce before completing
+    // Note: With new protocol, response includes N lines before marker (may include scrollback)
     expect(output.response as string).toContain('New response');
+    // Verify we polled multiple times (waiting for correct marker, not triggered by old one)
+    expect(captureCount).toBeGreaterThan(2);
   });
 });
@@ -977,10 +978,9 @@ describe('cmdTalk - JSON output contract', () => {
     tmux.capture = () => {
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        // Must have TWO end markers: one in instruction, one from "agent"
-        return mockCompleteResponse(endMatch[1], 'Response');
+      const nonceMatch = sent.match(INSTRUCTION_NONCE_REGEX);
+      if (nonceMatch) {
+        return mockCompleteResponse(nonceMatch[1], 'Response');
       }
       return '';
     };
@@ -1013,9 +1013,11 @@ describe('cmdTalk - JSON output contract', () => {
   });
   // Helper moved to describe scope for JSON output tests
+  // Include instruction line for proper extraction anchoring
   function mockCompleteResponse(nonce: string, response: string): string {
+    const instruction = `When you finish responding, output a completion marker on its own line: three dashes, RESPONSE-END-${nonce}, three dashes (no spaces).`;
     const endMarker = `---RESPONSE-END-${nonce}---`;
-    return `Hello\n\nWhen you finish responding, print this exact line:\n${endMarker}\n${response}\n${endMarker}`;
+    return `Some scrollback\n${instruction}\n${response}\n${endMarker}`;
   }
   it('includes required fields in timeout response', async () => {
@@ -1054,20 +1056,14 @@ describe('cmdTalk - JSON output contract', () => {
     expect(output).toHaveProperty('endMarker');
   });
-  it('captures partialResponse on timeout when agent started responding', async () => {
+  it('captures partialResponse on timeout even when no marker visible', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
-    // Simulate agent started responding but didn't finish (only ONE end marker in instruction, no second from agent)
+    // Agent is writing but hasn't printed any marker yet
+    // New behavior: we capture the last N lines as partial response
     tmux.capture = () => {
-      const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        const endMarker = `---RESPONSE-END-${endMatch[1]}---`;
-        // Only one end marker (in instruction), agent started writing but didn't finish
-        return `Hello\n\nWhen you finish responding, print this exact line:\n${endMarker}\nThis is partial content\nStill writing...`;
-      }
-      return 'random content';
+      return `This is partial content\nStill writing...`;
     };
     const ctx = createContext({
@@ -1093,16 +1089,17 @@ describe('cmdTalk - JSON output contract', () => {
     const output = ui.jsonOutput[0] as Record<string, unknown>;
     expect(output).toHaveProperty('status', 'timeout');
-    expect(output).toHaveProperty('partialResponse');
+    // Fallback: capture last N lines as partial response
     expect(output.partialResponse).toContain('This is partial content');
     expect(output.partialResponse).toContain('Still writing...');
   });
-  it('returns null partialResponse when nothing captured', async () => {
+  it('returns scrollback as partialResponse when no instruction visible', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
-    // Nothing meaningful in the capture
+    // Capture shows scrollback but no instruction marker
+    // Fallback returns last N lines
     tmux.capture = () => 'random scrollback content';
     const ctx = createContext({
@@ -1128,31 +1125,30 @@ describe('cmdTalk - JSON output contract', () => {
     const output = ui.jsonOutput[0] as Record<string, unknown>;
     expect(output).toHaveProperty('status', 'timeout');
-    expect(output.partialResponse).toBeNull();
+    // Fallback captures last N lines even without instruction visible
+    expect(output.partialResponse).toBe('random scrollback content');
   });
-  it('captures partialResponse in broadcast timeout', async () => {
+  it('handles broadcast with mixed completion and timeout', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
     const markersByPane: Record<string, string> = {};
     tmux.send = (pane: string, msg: string) => {
-      const match = msg.match(END_MARKER_REGEX);
+      const match = msg.match(INSTRUCTION_NONCE_REGEX);
       if (match) markersByPane[pane] = match[1];
     };
-    // codex completes, gemini times out with partial response
+    // codex completes with end marker, gemini has no end marker (still typing)
     tmux.capture = (pane: string) => {
       if (pane === '10.1') {
         const nonce = markersByPane['10.1'];
         const endMarker = `---RESPONSE-END-${nonce}---`;
-        // Complete response: two end markers
-        return `Msg\n\nWhen you finish responding, print this exact line:\n${endMarker}\nResponse\n${endMarker}`;
+        // Complete response with end marker (only ONE marker in new protocol)
+        return `Response\n${endMarker}`;
       }
-      // gemini has partial response - only one end marker (in instruction)
-      const nonce = markersByPane['10.2'];
-      const endMarker = `---RESPONSE-END-${nonce}---`;
-      return `Msg\n\nWhen you finish responding, print this exact line:\n${endMarker}\nPartial gemini output...`;
+      // gemini has no end marker at all - agent is still responding
+      return `Gemini is still typing this response and hasn't finished yet...`;
     };
     const paths = createTestPaths(testDir);
@@ -1160,10 +1156,10 @@ describe('cmdTalk - JSON output contract', () => {
       ui,
       tmux,
       paths,
-      flags: { wait: true, timeout: 0.1, json: true },
+      flags: { wait: true, timeout: 0.5, json: true },
       config: {
         defaults: {
-          timeout: 0.1,
+          timeout: 0.5,
           pollInterval: 0.02,
           captureLines: 100,
           preambleEvery: 3,
@@ -1178,7 +1174,7 @@ describe('cmdTalk - JSON output contract', () => {
     try {
       await cmdTalk(ctx, 'all', 'Hello');
     } catch {
-      // Expected timeout exit
+      // Expected timeout exit for gemini
     }
     const result = ui.jsonOutput[0] as {
@@ -1186,17 +1182,20 @@ describe('cmdTalk - JSON output contract', () => {
         agent: string;
         status: string;
         response?: string;
-        partialResponse?: string;
+        partialResponse?: string | null;
       }>;
     };
     const codexResult = result.results.find((r) => r.agent === 'codex');
     const geminiResult = result.results.find((r) => r.agent === 'gemini');
+    // Codex should complete (has end marker, output stable)
     expect(codexResult?.status).toBe('completed');
     expect(codexResult?.response).toContain('Response');
+    // Gemini times out (no end marker in output)
     expect(geminiResult?.status).toBe('timeout');
-    expect(geminiResult?.partialResponse).toContain('Partial gemini output');
+    // Fallback captures the output even without marker
+    expect(geminiResult?.partialResponse).toContain('Gemini is still typing');
   });
 });
@@ -1218,22 +1217,24 @@ describe('cmdTalk - end marker detection', () => {
   });
   // Helper: generate mock capture output with proper marker structure
-  // The end marker must appear TWICE: once in instruction, once from "agent"
+  // Include instruction line for proper extraction anchoring
   function mockResponse(nonce: string, response: string): string {
+    const instruction = `When you finish responding, output a completion marker on its own line: three dashes, RESPONSE-END-${nonce}, three dashes (no spaces).`;
     const endMarker = `---RESPONSE-END-${nonce}---`;
-    return `Message\n\nWhen you finish responding, print this exact line:\n${endMarker}\n${response}\n${endMarker}`;
+    return `Some scrollback\n${instruction}\n${response}\n${endMarker}`;
   }
-  it('includes end marker in sent message', async () => {
+  it('includes end marker instruction in sent message (not literal marker)', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
     // Return complete response immediately
     tmux.capture = () => {
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        return mockResponse(endMatch[1], 'Response');
+      // Extract nonce from instruction (looks for RESPONSE-END-xxxx pattern)
+      const nonceMatch = sent.match(/RESPONSE-END-([a-f0-9]+)/);
+      if (nonceMatch) {
+        return mockResponse(nonceMatch[1], 'Response');
       }
       return '';
     };
@@ -1249,21 +1250,24 @@ describe('cmdTalk - end marker detection', () => {
     await cmdTalk(ctx, 'claude', 'Test message');
     const sent = tmux.sends[0].message;
-    expect(sent).toMatch(/---RESPONSE-END-[a-f0-9]+---/);
-    expect(sent).toContain('When you finish responding, print this exact line:');
+    // New protocol: instruction describes marker verbally, doesn't contain literal marker
+    expect(sent).not.toMatch(/---RESPONSE-END-[a-f0-9]+---/);
+    expect(sent).toContain('output a completion marker on its own line');
+    expect(sent).toContain('three dashes, RESPONSE-END-');
   });
-  it('extracts response between two end markers', async () => {
+  it('extracts response before end marker', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
     tmux.capture = () => {
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        const endMarker = `---RESPONSE-END-${endMatch[1]}---`;
-        // Simulate scrollback with old content, instruction, response, and agent's end marker
-        return `Old garbage\nMore old stuff\nMessage\n\nWhen you finish responding, print this exact line:\n${endMarker}\nThis is the actual response\n\n${endMarker}\nContent after marker`;
+      // Extract nonce from instruction
+      const nonceMatch = sent.match(/RESPONSE-END-([a-f0-9]+)/);
+      if (nonceMatch) {
+        const endMarker = `---RESPONSE-END-${nonceMatch[1]}---`;
+        // Simulate scrollback with old content, then agent's response with marker
+        return `Old garbage\nMore old stuff\nThis is the actual response\n\n${endMarker}\nContent after marker`;
       }
       return 'Old garbage\nMore old stuff';
     };
@@ -1294,9 +1298,10 @@ Line 4 final`;
     tmux.capture = () => {
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        return mockResponse(endMatch[1], multilineResponse);
+      // Extract nonce from instruction
+      const nonceMatch = sent.match(/RESPONSE-END-([a-f0-9]+)/);
+      if (nonceMatch) {
+        return mockResponse(nonceMatch[1], multilineResponse);
       }
       return '';
     };
@@ -1316,17 +1321,18 @@ Line 4 final`;
     expect(output.response).toContain('Line 4 final');
   });
-  it('handles empty response between markers', async () => {
+  it('handles empty response before marker', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
     tmux.capture = () => {
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        const endMarker = `---RESPONSE-END-${endMatch[1]}---`;
-        // Agent printed end marker immediately with no content
-        return `Message here\n\nWhen you finish responding, print this exact line:\n${endMarker}\n${endMarker}`;
+      // Extract nonce from instruction
+      const nonceMatch = sent.match(/RESPONSE-END-([a-f0-9]+)/);
+      if (nonceMatch) {
+        const endMarker = `---RESPONSE-END-${nonceMatch[1]}---`;
+        // Agent printed end marker immediately with no content before it
+        return `${endMarker}`;
       }
       return '';
     };
@@ -1346,7 +1352,7 @@ Line 4 final`;
     expect(typeof output.response).toBe('string');
   });
-  it('waits until second marker appears (not triggered by instruction alone)', async () => {
+  it('waits until marker appears (not triggered while agent is thinking)', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
@@ -1354,15 +1360,16 @@ Line 4 final`;
     tmux.capture = () => {
       captureCount++;
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        const endMarker = `---RESPONSE-END-${endMatch[1]}---`;
+      // Extract nonce from instruction
+      const nonceMatch = sent.match(/RESPONSE-END-([a-f0-9]+)/);
+      if (nonceMatch) {
+        const endMarker = `---RESPONSE-END-${nonceMatch[1]}---`;
         if (captureCount < 3) {
-          // Only ONE end marker (in instruction) - should keep waiting
-          return `Message\n\nWhen you finish responding, print this exact line:\n${endMarker}\nAgent is still thinking...`;
+          // No marker yet - agent is still thinking
+          return `Agent is still thinking...`;
         }
-        // Finally, agent prints second marker
-        return `Message\n\nWhen you finish responding, print this exact line:\n${endMarker}\nActual response\n${endMarker}`;
+        // Finally, agent prints marker
+        return `Actual response\n${endMarker}`;
       }
       return '';
     };
@@ -1384,7 +1391,7 @@ Line 4 final`;
     expect(output.response).toContain('Actual response');
   });
-  it('handles large scrollback with markers at edges', async () => {
+  it('handles large scrollback with marker at end', async () => {
     const tmux = createMockTmux();
     const ui = createMockUI();
@@ -1393,11 +1400,12 @@ Line 4 final`;
     tmux.capture = () => {
       const sent = tmux.sends[0]?.message || '';
-      const endMatch = sent.match(END_MARKER_REGEX);
-      if (endMatch) {
-        const endMarker = `---RESPONSE-END-${endMatch[1]}---`;
-        // TWO end markers: one in instruction, one from "agent" response
-        return `${lotsOfContent}\nMessage\n\nWhen you finish responding, print this exact line:\n${endMarker}\n\nThe actual response\n\n${endMarker}`;
+      // Extract nonce from instruction
+      const nonceMatch = sent.match(/RESPONSE-END-([a-f0-9]+)/);
+      if (nonceMatch) {
+        const endMarker = `---RESPONSE-END-${nonceMatch[1]}---`;
+        // ONE marker only - from agent response
+        return `${lotsOfContent}\nThe actual response\n\n${endMarker}`;
       }
       return lotsOfContent;
     };
@@ -1415,6 +1423,5 @@ Line 4 final`;
     const output = ui.jsonOutput[0] as Record<string, unknown>;
     expect(output.status).toBe('completed');
     expect(output.response).toContain('actual response');
-    expect(output.response).not.toContain('Line 0');
   });
 });

package/src/commands/talk.ts CHANGED Viewed

@@ -49,6 +49,26 @@ function makeEndMarker(nonce: string): string {
   return `---RESPONSE-END-${nonce}---`;
 }
+/**
+ * Build a regex to match the end marker case-insensitively.
+ * This handles agents that might print the marker in different case.
+ */
+function makeEndMarkerRegex(nonce: string): RegExp {
+  return new RegExp(`---response-end-${nonce}---`, 'i');
+}
+/**
+ * Build the end marker instruction WITHOUT embedding the literal marker string.
+ * This prevents false-positive detection when the instruction is still visible
+ * in scrollback but the agent hasn't responded yet.
+ *
+ * The instruction describes how to construct the marker verbally, so the literal
+ * marker string can ONLY appear if the agent actually prints it.
+ */
+function makeEndMarkerInstruction(nonce: string): string {
+  return `When you finish responding, output a completion marker on its own line: three dashes, RESPONSE-END-${nonce}, three dashes (no spaces).`;
+}
 function renderWaitLine(agent: string, elapsedSeconds: number): string {
   const s = Math.max(0, Math.floor(elapsedSeconds));
   return `⏳ Waiting for ${agent}... (${s}s)`;
@@ -57,7 +77,10 @@ function renderWaitLine(agent: string, elapsedSeconds: number): string {
 /**
  * Extract partial response from output when end marker is not found.
  * Used to capture whatever the agent wrote before timeout.
- * Looks for first occurrence of end marker (in our instruction) and extracts content after it.
+ *
+ * With the new protocol, the instruction doesn't contain the literal marker.
+ * We look for the instruction line (contains "RESPONSE-END-<nonce>" without dashes)
+ * and extract content after it. Falls back to last N lines if instruction not found.
  */
 function extractPartialResponse(
   output: string,
@@ -65,12 +88,30 @@ function extractPartialResponse(
   maxLines: number
 ): string | null {
   const lines = output.split('\n');
-  const firstMarkerLineIndex = lines.findIndex((line) => line.includes(endMarker));
-  if (firstMarkerLineIndex === -1) return null;
+  // Extract nonce from endMarker (format: ---RESPONSE-END-xxxx---)
+  // Use case-insensitive match to be flexible with nonce format changes
+  const nonceMatch = endMarker.match(/RESPONSE-END-([a-f0-9]+)/i);
+  if (!nonceMatch) return null;
+  const nonce = nonceMatch[1];
+  // Find the instruction line (contains "RESPONSE-END-<nonce>" but not the full marker)
+  // Case-insensitive to handle potential format variations
+  const instructionLineIndex = lines.findIndex(
+    (line) =>
+      line.toLowerCase().includes(`response-end-${nonce.toLowerCase()}`) &&
+      !line.includes(endMarker)
+  );
+  let responseLines: string[];
+  if (instructionLineIndex !== -1) {
+    // Extract lines after instruction
+    responseLines = lines.slice(instructionLineIndex + 1);
+  } else {
+    // Fallback: just take the output (no instruction found in view)
+    responseLines = lines;
+  }
-  // Get lines after our instruction's end marker
-  const responseLines = lines.slice(firstMarkerLineIndex + 1);
   const limitedLines = responseLines.slice(-maxLines); // Take last N lines
   const partial = limitedLines.join('\n').trim();
@@ -92,6 +133,11 @@ interface AgentWaitState {
   partialResponse?: string | null;
   error?: string;
   elapsedMs?: number;
+  // Per-agent timing
+  startedAtMs: number;
+  // Debounce tracking (per-agent)
+  lastOutput: string;
+  lastOutputChangeAt: number;
 }
 interface BroadcastWaitResult {
@@ -260,8 +306,9 @@ export async function cmdTalk(ctx: Context, target: string, message: string): Pr
   const endMarker = makeEndMarker(nonce);
   // Build message with preamble and end marker instruction
+  // Note: instruction doesn't contain literal marker to prevent false-positive detection
   const messageWithPreamble = buildMessage(message, target, ctx);
-  const fullMessage = `${messageWithPreamble}\n\nWhen you finish responding, print this exact line:\n${endMarker}`;
+  const fullMessage = `${messageWithPreamble}\n\n${makeEndMarkerInstruction(nonce)}`;
   // Best-effort cleanup and soft-lock warning
   const state = cleanupState(ctx.paths, 60 * 60); // 1 hour TTL
@@ -279,8 +326,10 @@ export async function cmdTalk(ctx: Context, target: string, message: string): Pr
   const isTTY = process.stdout.isTTY && !flags.json;
   // Debounce detection: wait for output to stabilize
-  const MIN_WAIT_MS = 3000; // Wait at least 3 seconds before detecting completion
-  const IDLE_THRESHOLD_MS = 3000; // Content unchanged for 3 seconds = complete
+  // Adaptive: for very short timeouts (testing), reduce debounce thresholds
+  const timeoutMs = timeoutSeconds * 1000;
+  const MIN_WAIT_MS = Math.min(3000, timeoutMs * 0.3); // Wait at least 3s or 30% of timeout
+  const IDLE_THRESHOLD_MS = Math.min(3000, timeoutMs * 0.3); // Stable for 3s or 30% of timeout
   let lastOutput = '';
   let lastOutputChangeAt = Date.now();
@@ -352,7 +401,8 @@ export async function cmdTalk(ctx: Context, target: string, message: string): Pr
       if (!flags.json) {
         if (isTTY) {
           process.stdout.write('\r' + renderWaitLine(target, elapsedSeconds));
-        } else {
+        } else if (flags.verbose || flags.debug) {
+          // Non-TTY progress logs only with --verbose or --debug
           const now = Date.now();
           if (now - lastNonTtyLogAt >= 30000) {
             lastNonTtyLogAt = now;
@@ -412,8 +462,9 @@ export async function cmdTalk(ctx: Context, target: string, message: string): Pr
       const elapsedMs = Date.now() - startedAt;
       const idleMs = Date.now() - lastOutputChangeAt;
-      // Find end marker
-      const hasEndMarker = output.includes(endMarker);
+      // Find end marker (case-insensitive to handle agent variations)
+      const endMarkerRegex = makeEndMarkerRegex(nonce);
+      const hasEndMarker = endMarkerRegex.test(output);
       // Completion conditions:
       // 1. Must wait at least MIN_WAIT_MS
@@ -437,9 +488,10 @@ export async function cmdTalk(ctx: Context, target: string, message: string): Pr
       const lines = output.split('\n');
       // Find the line with the end marker (last occurrence = agent's marker)
+      // Find end marker line (case-insensitive)
       let endMarkerLineIndex = -1;
       for (let i = lines.length - 1; i >= 0; i--) {
-        if (lines[i].includes(endMarker)) {
+        if (endMarkerRegex.test(lines[i])) {
           endMarkerLineIndex = i;
           break;
         }
@@ -447,11 +499,25 @@ export async function cmdTalk(ctx: Context, target: string, message: string): Pr
       if (endMarkerLineIndex === -1) continue;
-      // Find where response starts (after instruction's end marker, if visible)
-      const firstMarkerLineIndex = lines.findIndex((line) => line.includes(endMarker));
-      let startLine = firstMarkerLineIndex + 1;
-      // Limit to N lines before end marker
-      startLine = Math.max(startLine, endMarkerLineIndex - responseLines);
+      // Protocol: instruction describes the marker verbally but doesn't contain the literal string.
+      // So any occurrence of the literal marker is definitively from the agent.
+      //
+      // Try to anchor extraction to the instruction line (cleaner output when visible).
+      // Fall back to N lines before marker if instruction scrolled off.
+      let startLine: number;
+      const instructionLineIndex = lines.findIndex(
+        (line) =>
+          line.toLowerCase().includes(`response-end-${nonce.toLowerCase()}`) &&
+          !endMarkerRegex.test(line)
+      );
+      if (instructionLineIndex !== -1 && instructionLineIndex < endMarkerLineIndex) {
+        // Instruction visible: extract from after instruction to marker
+        startLine = instructionLineIndex + 1;
+      } else {
+        // Instruction scrolled off: extract N lines before marker
+        startLine = Math.max(0, endMarkerLineIndex - responseLines);
+      }
       let response = lines.slice(startLine, endMarkerLineIndex).join('\n').trim();
       // Clean Gemini CLI UI artifacts
@@ -500,6 +566,12 @@ async function cmdTalkAllWait(
   const pollIntervalSeconds = Math.max(0.1, config.defaults.pollInterval);
   const captureLines = config.defaults.captureLines;
+  // Debounce detection constants (same as single-agent mode)
+  // Adaptive: for very short timeouts (testing), reduce debounce thresholds
+  const timeoutMs = timeoutSeconds * 1000;
+  const MIN_WAIT_MS = Math.min(3000, timeoutMs * 0.3); // Wait at least 3s or 30% of timeout
+  const IDLE_THRESHOLD_MS = Math.min(3000, timeoutMs * 0.3); // Stable for 3s or 30% of timeout
   // Best-effort state cleanup
   cleanupState(paths, 60 * 60);
@@ -519,17 +591,19 @@ async function cmdTalkAllWait(
     const endMarker = makeEndMarker(nonce);
     // Build and send message with end marker instruction
+    // Note: instruction doesn't contain literal marker to prevent false-positive detection
     const messageWithPreamble = buildMessage(message, name, ctx);
-    const fullMessage = `${messageWithPreamble}\n\nWhen you finish responding, print this exact line:\n${endMarker}`;
+    const fullMessage = `${messageWithPreamble}\n\n${makeEndMarkerInstruction(nonce)}`;
     const msg = name === 'gemini' ? fullMessage.replace(/!/g, '') : fullMessage;
     try {
+      const now = Date.now();
       tmux.send(data.pane, msg);
       setActiveRequest(paths, name, {
         id: requestId,
         nonce,
         pane: data.pane,
-        startedAtMs: Date.now(),
+        startedAtMs: now,
       });
       agentStates.push({
         agent: name,
@@ -538,11 +612,17 @@ async function cmdTalkAllWait(
         nonce,
         endMarker,
         status: 'pending',
+        // Per-agent timing
+        startedAtMs: now,
+        // Initialize debounce tracking
+        lastOutput: '',
+        lastOutputChangeAt: now,
       });
       if (!flags.json) {
         console.log(`  ${colors.green('→')} Sent to ${colors.cyan(name)} (${data.pane})`);
       }
     } catch {
+      const now = Date.now();
       agentStates.push({
         agent: name,
         pane: data.pane,
@@ -551,6 +631,9 @@ async function cmdTalkAllWait(
         endMarker,
         status: 'error',
         error: `Failed to send to pane ${data.pane}`,
+        startedAtMs: now,
+        lastOutput: '',
+        lastOutputChangeAt: now,
       });
       if (!flags.json) {
         ui.warn(`Failed to send to ${name}`);
@@ -568,7 +651,6 @@ async function cmdTalkAllWait(
     return;
   }
-  const startedAt = Date.now();
   let lastLogAt = 0;
   const isTTY = process.stdout.isTTY && !flags.json;
@@ -591,34 +673,33 @@ async function cmdTalkAllWait(
   try {
     // Phase 2: Poll all agents in parallel until all complete or timeout
     while (pendingAgents().length > 0) {
-      const elapsedSeconds = (Date.now() - startedAt) / 1000;
-      // Check timeout for each pending agent (#17)
+      // Check timeout for each pending agent using per-agent timing
       for (const state of pendingAgents()) {
-        if (elapsedSeconds >= timeoutSeconds) {
+        const agentElapsedMs = Date.now() - state.startedAtMs;
+        const agentElapsedSeconds = agentElapsedMs / 1000;
+        if (agentElapsedSeconds >= timeoutSeconds) {
           state.status = 'timeout';
-          state.error = `Timed out after ${Math.floor(timeoutSeconds)}s`;
-          state.elapsedMs = Math.floor(elapsedSeconds * 1000);
+          state.error = `Timed out after ${Math.floor(agentElapsedSeconds)}s`;
+          state.elapsedMs = agentElapsedMs;
           // Capture partial response on timeout
           const responseLines = flags.lines ?? 100;
           try {
             const output = tmux.capture(state.pane, captureLines);
-            console.log('debug>>', output);
             const extracted = extractPartialResponse(output, state.endMarker, responseLines);
             if (extracted) {
               state.partialResponse =
                 state.agent === 'gemini' ? cleanGeminiResponse(extracted) : extracted;
             }
-          } catch (err) {
-            console.error(err);
+          } catch {
             // Ignore capture errors on timeout
           }
           clearActiveRequest(paths, state.agent, state.requestId);
           if (!flags.json) {
             console.log(
-              `  ${colors.red('✗')} ${colors.cyan(state.agent)} timed out (${Math.floor(elapsedSeconds)}s)`
+              `  ${colors.red('✗')} ${colors.cyan(state.agent)} timed out (${Math.floor(agentElapsedSeconds)}s)`
             );
           }
         }
@@ -627,16 +708,18 @@ async function cmdTalkAllWait(
       // All done?
       if (pendingAgents().length === 0) break;
-      // Progress logging (non-TTY)
-      if (!flags.json && !isTTY) {
+      // Progress logging (non-TTY, only with --verbose or --debug)
+      if (!flags.json && !isTTY && (flags.verbose || flags.debug)) {
         const now = Date.now();
         if (now - lastLogAt >= 30000) {
           lastLogAt = now;
           const pending = pendingAgents()
             .map((s) => s.agent)
             .join(', ');
+          // Use the oldest pending agent's elapsed time for logging
+          const maxElapsed = Math.max(...pendingAgents().map((s) => now - s.startedAtMs));
           console.error(
-            `[tmux-team] Waiting for: ${pending} (${Math.floor(elapsedSeconds)}s elapsed)`
+            `[tmux-team] Waiting for: ${pending} (${Math.floor(maxElapsed / 1000)}s elapsed)`
           );
         }
       }
@@ -651,7 +734,7 @@ async function cmdTalkAllWait(
         } catch {
           state.status = 'error';
           state.error = `Failed to capture pane ${state.pane}`;
-          state.elapsedMs = Date.now() - startedAt;
+          state.elapsedMs = Date.now() - state.startedAtMs;
           clearActiveRequest(paths, state.agent, state.requestId);
           if (!flags.json) {
             ui.warn(`Failed to capture ${state.agent}`);
@@ -659,27 +742,37 @@ async function cmdTalkAllWait(
           continue;
         }
-        // Find end marker
-        const firstEndMarkerIndex = output.indexOf(state.endMarker);
-        const lastEndMarkerIndex = output.lastIndexOf(state.endMarker);
-        if (firstEndMarkerIndex === -1) continue;
-        // Check if marker is from agent (not just in our instruction)
-        const afterMarker = output.slice(lastEndMarkerIndex + state.endMarker.length);
-        const followedByUI = afterMarker.includes('╭') || afterMarker.includes('context left');
-        const twoMarkers = firstEndMarkerIndex !== lastEndMarkerIndex;
+        // Track output changes for debounce detection (per-agent)
+        if (output !== state.lastOutput) {
+          state.lastOutput = output;
+          state.lastOutputChangeAt = Date.now();
+        }
-        if (!twoMarkers && !followedByUI) continue;
+        // Use per-agent timing for accurate elapsed calculation
+        const now = Date.now();
+        const elapsedMs = now - state.startedAtMs;
+        const idleMs = now - state.lastOutputChangeAt;
+        // Find end marker (case-insensitive to handle agent variations)
+        const endMarkerRegex = makeEndMarkerRegex(state.nonce);
+        const hasEndMarker = endMarkerRegex.test(output);
+        // Completion conditions (same as single-agent mode):
+        // 1. Must wait at least MIN_WAIT_MS
+        // 2. Must have end marker in output
+        // 3. Output must be stable for IDLE_THRESHOLD_MS (debounce)
+        if (elapsedMs < MIN_WAIT_MS || !hasEndMarker || idleMs < IDLE_THRESHOLD_MS) {
+          continue;
+        }
         // Extract response: get N lines before the agent's end marker
         const responseLines = flags.lines ?? 100;
         const lines = output.split('\n');
-        // Find the line with the agent's end marker (last occurrence)
+        // Find end marker line (case-insensitive)
         let endMarkerLineIndex = -1;
         for (let i = lines.length - 1; i >= 0; i--) {
-          if (lines[i].includes(state.endMarker)) {
+          if (endMarkerRegex.test(lines[i])) {
             endMarkerLineIndex = i;
             break;
           }
@@ -687,13 +780,25 @@ async function cmdTalkAllWait(
         if (endMarkerLineIndex === -1) continue;
-        // Determine where response starts
-        let startLine = 0;
-        if (twoMarkers) {
-          const firstMarkerLineIndex = lines.findIndex((line) => line.includes(state.endMarker));
-          startLine = firstMarkerLineIndex + 1;
+        // Protocol: instruction describes the marker verbally but doesn't contain the literal string.
+        // So any occurrence of the literal marker is definitively from the agent.
+        //
+        // Try to anchor extraction to the instruction line (cleaner output when visible).
+        // Fall back to N lines before marker if instruction scrolled off.
+        let startLine: number;
+        const instructionLineIndex = lines.findIndex(
+          (line) =>
+            line.toLowerCase().includes(`response-end-${state.nonce.toLowerCase()}`) &&
+            !endMarkerRegex.test(line)
+        );
+        if (instructionLineIndex !== -1 && instructionLineIndex < endMarkerLineIndex) {
+          // Instruction visible: extract from after instruction to marker
+          startLine = instructionLineIndex + 1;
+        } else {
+          // Instruction scrolled off: extract N lines before marker
+          startLine = Math.max(0, endMarkerLineIndex - responseLines);
         }
-        startLine = Math.max(startLine, endMarkerLineIndex - responseLines);
         let response = lines.slice(startLine, endMarkerLineIndex).join('\n').trim();
         // Clean Gemini CLI UI artifacts
@@ -702,7 +807,7 @@ async function cmdTalkAllWait(
         }
         state.response = response;
         state.status = 'completed';
-        state.elapsedMs = Date.now() - startedAt;
+        state.elapsedMs = elapsedMs;
         clearActiveRequest(paths, state.agent, state.requestId);
         if (!flags.json) {