@pjmendonca/devflow 1.20.0 → 1.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/.claude/hooks/session-startup.sh +20 -0
  2. package/.claude/settings.json +0 -4
  3. package/CHANGELOG.md +5 -0
  4. package/README.md +2 -2
  5. package/package.json +1 -1
  6. package/tooling/.automation/memory/knowledge/kg_integration-test.json +128 -1
  7. package/tooling/.automation/memory/knowledge/kg_test-story.json +428 -2
  8. package/tooling/.automation/memory/shared/shared_integration-test.json +37 -1
  9. package/tooling/.automation/memory/shared/shared_test-story.json +109 -1
  10. package/tooling/.automation/memory/shared/shared_test.json +157 -1
  11. package/tooling/.automation/memory/shared/shared_validation-check.json +40 -1
  12. package/tooling/.automation/validation/history/2026-01-18_val_108c18cf.json +32 -0
  13. package/tooling/.automation/validation/history/2026-01-18_val_35ee606f.json +32 -0
  14. package/tooling/.automation/validation/history/2026-01-18_val_3fc7268b.json +41 -0
  15. package/tooling/.automation/validation/history/2026-01-18_val_49f0bb17.json +32 -0
  16. package/tooling/.automation/validation/history/2026-01-18_val_53c928d2.json +59 -0
  17. package/tooling/.automation/validation/history/2026-01-18_val_55604791.json +32 -0
  18. package/tooling/.automation/validation/history/2026-01-18_val_67e695f0.json +41 -0
  19. package/tooling/.automation/validation/history/2026-01-18_val_82784713.json +41 -0
  20. package/tooling/.automation/validation/history/2026-01-18_val_94a8e584.json +32 -0
  21. package/tooling/.automation/validation/history/2026-01-18_val_95353af0.json +32 -0
  22. package/tooling/.automation/validation/history/2026-01-18_val_9a046f3a.json +32 -0
  23. package/tooling/.automation/validation/history/2026-01-18_val_b3443d2e.json +32 -0
  24. package/tooling/.automation/validation/history/2026-01-18_val_bfd298f4.json +32 -0
  25. package/tooling/.automation/validation/history/2026-01-18_val_cfc2a362.json +32 -0
  26. package/tooling/.automation/validation/history/2026-01-18_val_e581a3d2.json +41 -0
  27. package/tooling/scripts/lib/__init__.py +1 -3
  28. package/tooling/scripts/lib/agent_router.py +0 -4
  29. package/tooling/scripts/lib/swarm_orchestrator.py +1 -1
  30. package/tooling/scripts/run-collab.py +1 -45
  31. package/.claude/commands/pair.md +0 -23
  32. package/tooling/scripts/lib/pair_programming.py +0 -690
@@ -1,690 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Pair Programming Mode - DEV + REVIEWER Interleaved Collaboration
4
-
5
- Real-time collaboration between DEV and REVIEWER where:
6
- - DEV implements in small chunks
7
- - REVIEWER provides immediate feedback
8
- - DEV addresses feedback before continuing
9
- - Results in higher quality, fewer iterations
10
-
11
- Features:
12
- - Chunk-based development
13
- - Real-time feedback loops
14
- - Incremental refinement
15
- - Shared context maintenance
16
- - Automatic issue tracking
17
-
18
- Usage:
19
- from lib.pair_programming import PairSession, start_pair_session
20
-
21
- session = start_pair_session(story_key="3-5", task="Implement user login")
22
- result = session.run()
23
- """
24
-
25
- import re
26
- import subprocess
27
- from dataclasses import dataclass, field
28
- from datetime import datetime
29
- from enum import Enum
30
- from pathlib import Path
31
- from typing import Optional
32
-
33
- # Import dependencies
34
- try:
35
- from lib.platform import IS_WINDOWS
36
- from lib.shared_memory import get_knowledge_graph, get_shared_memory
37
- except ImportError:
38
- # Fallback for when running from lib directory
39
- import sys as _sys
40
-
41
- IS_WINDOWS = _sys.platform == "win32"
42
- from shared_memory import get_knowledge_graph, get_shared_memory
43
-
44
- # Try to import validation loop
45
- try:
46
- from validation_loop import (
47
- INTER_PHASE_GATES,
48
- LoopContext,
49
- ValidationLoop,
50
- )
51
-
52
- HAS_VALIDATION = True
53
- except ImportError:
54
- try:
55
- from lib.validation_loop import (
56
- INTER_PHASE_GATES,
57
- LoopContext,
58
- ValidationLoop,
59
- )
60
-
61
- HAS_VALIDATION = True
62
- except ImportError:
63
- HAS_VALIDATION = False
64
-
65
-
66
- PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
67
- CLAUDE_CLI = "claude.cmd" if IS_WINDOWS else "claude"
68
-
69
- # Security: Maximum prompt length to prevent resource exhaustion
70
- MAX_PROMPT_LENGTH = 500_000 # ~500KB
71
-
72
-
73
- def _sanitize_prompt(prompt: str) -> str:
74
- """Sanitize prompt for safe subprocess execution.
75
-
76
- - Removes null bytes and control characters (except newlines/tabs)
77
- - Truncates to maximum length
78
- - Ensures valid UTF-8
79
-
80
- Args:
81
- prompt: Raw prompt string
82
-
83
- Returns:
84
- Sanitized prompt safe for subprocess
85
- """
86
- if not prompt:
87
- return ""
88
-
89
- # Remove null bytes and control characters (keep newlines, tabs, and printable chars)
90
- # Keep: \n (10), \t (9), \r (13), and all chars >= 32 (printable ASCII + UTF-8)
91
- sanitized = "".join(char for char in prompt if char in "\n\t\r" or ord(char) >= 32)
92
-
93
- # Truncate if too long
94
- if len(sanitized) > MAX_PROMPT_LENGTH:
95
- sanitized = sanitized[:MAX_PROMPT_LENGTH] + "\n[TRUNCATED]"
96
-
97
- return sanitized
98
-
99
-
100
- class ChunkType(Enum):
101
- """Types of code chunks."""
102
-
103
- DESIGN = "design" # Architecture/design decision
104
- IMPLEMENTATION = "implementation" # Core code
105
- TEST = "test" # Test code
106
- REFACTOR = "refactor" # Refactoring
107
- FIX = "fix" # Bug fix
108
- DOCUMENTATION = "documentation" # Docs/comments
109
-
110
-
111
- class FeedbackType(Enum):
112
- """Types of reviewer feedback."""
113
-
114
- APPROVE = "approve" # Good to proceed
115
- MINOR = "minor" # Minor issues, can proceed
116
- MAJOR = "major" # Major issues, must fix
117
- BLOCKING = "blocking" # Cannot proceed until fixed
118
- QUESTION = "question" # Needs clarification
119
-
120
-
121
- @dataclass
122
- class CodeChunk:
123
- """A chunk of code being developed."""
124
-
125
- chunk_id: str
126
- chunk_type: ChunkType
127
- description: str
128
- content: str
129
- file_path: Optional[str] = None
130
- line_range: Optional[tuple[int, int]] = None
131
- timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
132
-
133
- def to_dict(self) -> dict:
134
- return {
135
- "chunk_id": self.chunk_id,
136
- "chunk_type": self.chunk_type.value,
137
- "description": self.description,
138
- "content": self.content[:500] + "..." if len(self.content) > 500 else self.content,
139
- "file_path": self.file_path,
140
- "line_range": self.line_range,
141
- "timestamp": self.timestamp,
142
- }
143
-
144
-
145
- @dataclass
146
- class ReviewFeedback:
147
- """Feedback from reviewer on a chunk."""
148
-
149
- chunk_id: str
150
- feedback_type: FeedbackType
151
- comments: list[str]
152
- suggestions: list[str] = field(default_factory=list)
153
- must_fix: list[str] = field(default_factory=list)
154
- nice_to_have: list[str] = field(default_factory=list)
155
- approved: bool = False
156
- timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
157
-
158
- def to_dict(self) -> dict:
159
- return {
160
- "chunk_id": self.chunk_id,
161
- "feedback_type": self.feedback_type.value,
162
- "comments": self.comments,
163
- "suggestions": self.suggestions,
164
- "must_fix": self.must_fix,
165
- "nice_to_have": self.nice_to_have,
166
- "approved": self.approved,
167
- "timestamp": self.timestamp,
168
- }
169
-
170
- def has_blocking_issues(self) -> bool:
171
- return self.feedback_type in [FeedbackType.MAJOR, FeedbackType.BLOCKING]
172
-
173
-
174
- @dataclass
175
- class PairExchange:
176
- """A single exchange in the pair programming session."""
177
-
178
- exchange_id: int
179
- chunk: CodeChunk
180
- feedback: Optional[ReviewFeedback] = None
181
- revision: Optional[CodeChunk] = None
182
- resolved: bool = False
183
-
184
- def to_dict(self) -> dict:
185
- return {
186
- "exchange_id": self.exchange_id,
187
- "chunk": self.chunk.to_dict(),
188
- "feedback": self.feedback.to_dict() if self.feedback else None,
189
- "revision": self.revision.to_dict() if self.revision else None,
190
- "resolved": self.resolved,
191
- }
192
-
193
-
194
- @dataclass
195
- class PairSessionResult:
196
- """Result of a pair programming session."""
197
-
198
- story_key: str
199
- task: str
200
- exchanges: list[PairExchange]
201
- final_code: str
202
- files_created: list[str]
203
- files_modified: list[str]
204
- total_chunks: int
205
- total_revisions: int
206
- approval_rate: float
207
- start_time: str
208
- end_time: str
209
-
210
- def to_dict(self) -> dict:
211
- return {
212
- "story_key": self.story_key,
213
- "task": self.task,
214
- "exchanges": [e.to_dict() for e in self.exchanges],
215
- "final_code": self.final_code[:1000]
216
- if len(self.final_code) > 1000
217
- else self.final_code,
218
- "files_created": self.files_created,
219
- "files_modified": self.files_modified,
220
- "total_chunks": self.total_chunks,
221
- "total_revisions": self.total_revisions,
222
- "approval_rate": self.approval_rate,
223
- "start_time": self.start_time,
224
- "end_time": self.end_time,
225
- }
226
-
227
- def to_summary(self) -> str:
228
- """Generate human-readable summary."""
229
- lines = [
230
- f"## Pair Programming Result: {self.story_key}",
231
- "",
232
- f"**Task**: {self.task}",
233
- f"**Exchanges**: {len(self.exchanges)}",
234
- f"**Chunks**: {self.total_chunks}",
235
- f"**Revisions**: {self.total_revisions}",
236
- f"**Approval Rate**: {self.approval_rate:.0%}",
237
- "",
238
- "### Files Created",
239
- ]
240
-
241
- for f in self.files_created:
242
- lines.append(f"- `{f}`")
243
-
244
- if self.files_modified:
245
- lines.append("")
246
- lines.append("### Files Modified")
247
- for f in self.files_modified:
248
- lines.append(f"- `{f}`")
249
-
250
- return "\n".join(lines)
251
-
252
-
253
- @dataclass
254
- class PairConfig:
255
- """Configuration for pair programming session."""
256
-
257
- max_revisions_per_chunk: int = 3
258
- timeout_seconds: int = 180
259
- verbose: bool = True
260
- auto_apply_fixes: bool = True
261
- chunk_size_hint: str = "medium" # small, medium, large
262
- reviewer_model: str = "opus"
263
- dev_model: str = "opus"
264
- validation_enabled: bool = True # Enable validation between revisions
265
-
266
- def to_dict(self) -> dict:
267
- return {
268
- "max_revisions_per_chunk": self.max_revisions_per_chunk,
269
- "timeout_seconds": self.timeout_seconds,
270
- "auto_apply_fixes": self.auto_apply_fixes,
271
- "validation_enabled": self.validation_enabled,
272
- "chunk_size_hint": self.chunk_size_hint,
273
- "reviewer_model": self.reviewer_model,
274
- "dev_model": self.dev_model,
275
- }
276
-
277
-
278
- class PairSession:
279
- """A pair programming session between DEV and REVIEWER."""
280
-
281
- def __init__(self, story_key: str, task: str, config: Optional[PairConfig] = None):
282
- self.story_key = story_key
283
- self.task = task
284
- self.config = config or PairConfig()
285
- self.project_root = PROJECT_ROOT
286
- self.shared_memory = get_shared_memory(story_key)
287
- self.knowledge_graph = get_knowledge_graph(story_key)
288
-
289
- self.exchanges: list[PairExchange] = []
290
- self.files_created: list[str] = []
291
- self.files_modified: list[str] = []
292
- self.chunk_counter = 0
293
- self.exchange_counter = 0
294
-
295
- # Initialize validation loop if available and enabled
296
- self.validation_loop = None
297
- self.validation_context = None
298
- if HAS_VALIDATION and self.config.validation_enabled:
299
- self.validation_loop = ValidationLoop(
300
- gates=INTER_PHASE_GATES,
301
- config={"auto_fix_enabled": self.config.auto_apply_fixes},
302
- story_key=story_key,
303
- )
304
- self.validation_context = LoopContext(
305
- story_key=story_key,
306
- max_iterations=self.config.max_revisions_per_chunk,
307
- )
308
-
309
- def _log(self, message: str, agent: str = "SYSTEM"):
310
- """Log a message."""
311
- if self.config.verbose:
312
- timestamp = datetime.now().strftime("%H:%M:%S")
313
- emoji = {"DEV": "", "REVIEWER": "", "SYSTEM": ""}.get(agent, "•")
314
- print(f"[{timestamp}] {emoji} [{agent}] {message}")
315
-
316
- def _run_revision_validation(self, chunk_id: str, revision_num: int) -> bool:
317
- """Run validation between DEV revisions.
318
-
319
- Args:
320
- chunk_id: ID of the current chunk
321
- revision_num: Current revision number
322
-
323
- Returns:
324
- True if validation passed
325
- """
326
- if not self.validation_loop or not self.validation_context:
327
- return True
328
-
329
- self.validation_context.iteration = revision_num
330
- self.validation_context.phase = f"chunk_{chunk_id}_revision_{revision_num}"
331
-
332
- report = self.validation_loop.run_gates(self.validation_context, tier=2)
333
-
334
- if report.passed:
335
- self._log(f"[VALIDATION] Revision {revision_num} passed validation")
336
- return True
337
- else:
338
- for failure in report.failures:
339
- self._log(f"[VALIDATION] {failure.gate_name}: {failure.message}", "SYSTEM")
340
- return True # Don't block, just inform
341
-
342
- def _invoke_agent(self, agent: str, prompt: str) -> str:
343
- """Invoke an agent with Claude CLI."""
344
- model = self.config.dev_model if agent == "DEV" else self.config.reviewer_model
345
- sanitized_prompt = _sanitize_prompt(prompt)
346
-
347
- try:
348
- result = subprocess.run(
349
- [CLAUDE_CLI, "--print", "--model", model, "-p", sanitized_prompt],
350
- capture_output=True,
351
- text=True,
352
- timeout=self.config.timeout_seconds,
353
- cwd=str(self.project_root),
354
- )
355
- return result.stdout + result.stderr
356
- except subprocess.TimeoutExpired:
357
- return "[TIMEOUT: Agent did not respond in time]"
358
- except Exception as e:
359
- return f"[ERROR: {str(e)}]"
360
-
361
- def _generate_chunk_id(self) -> str:
362
- """Generate unique chunk ID."""
363
- self.chunk_counter += 1
364
- return f"chunk_{self.chunk_counter:03d}"
365
-
366
- def _parse_dev_output(self, output: str) -> CodeChunk:
367
- """Parse DEV output into a CodeChunk."""
368
- # Try to extract file path
369
- file_match = re.search(r'(?:file|path):\s*[`"]?([^\s`"]+)[`"]?', output, re.IGNORECASE)
370
- file_path = file_match.group(1) if file_match else None
371
-
372
- # Try to extract code blocks
373
- code_blocks = re.findall(r"```[\w]*\n(.*?)```", output, re.DOTALL)
374
- content = "\n\n".join(code_blocks) if code_blocks else output
375
-
376
- # Determine chunk type
377
- chunk_type = ChunkType.IMPLEMENTATION
378
- output_lower = output.lower()
379
- if "test" in output_lower:
380
- chunk_type = ChunkType.TEST
381
- elif "refactor" in output_lower:
382
- chunk_type = ChunkType.REFACTOR
383
- elif "design" in output_lower or "architecture" in output_lower:
384
- chunk_type = ChunkType.DESIGN
385
- elif "fix" in output_lower or "bug" in output_lower:
386
- chunk_type = ChunkType.FIX
387
-
388
- # Extract description
389
- desc_match = re.search(r"^#+\s*(.+)$", output, re.MULTILINE)
390
- description = desc_match.group(1) if desc_match else "Code implementation"
391
-
392
- return CodeChunk(
393
- chunk_id=self._generate_chunk_id(),
394
- chunk_type=chunk_type,
395
- description=description[:100],
396
- content=content,
397
- file_path=file_path,
398
- )
399
-
400
- def _parse_reviewer_output(self, output: str, chunk_id: str) -> ReviewFeedback:
401
- """Parse REVIEWER output into ReviewFeedback."""
402
- output_lower = output.lower()
403
-
404
- # Determine feedback type
405
- if any(word in output_lower for word in ["blocking", "cannot proceed", "critical"]):
406
- feedback_type = FeedbackType.BLOCKING
407
- elif any(word in output_lower for word in ["major", "significant", "must fix"]):
408
- feedback_type = FeedbackType.MAJOR
409
- elif any(word in output_lower for word in ["minor", "small", "nitpick"]):
410
- feedback_type = FeedbackType.MINOR
411
- elif any(word in output_lower for word in ["question", "clarify", "unclear"]):
412
- feedback_type = FeedbackType.QUESTION
413
- else:
414
- feedback_type = FeedbackType.APPROVE
415
-
416
- # Extract comments
417
- comments = []
418
- comment_patterns = [
419
- r"[-•]\s*(.+)",
420
- r"\d+\.\s*(.+)",
421
- ]
422
- for pattern in comment_patterns:
423
- matches = re.findall(pattern, output)
424
- comments.extend(matches[:10])
425
-
426
- # Extract must-fix issues
427
- must_fix = []
428
- must_fix_section = re.search(
429
- r"(?:must fix|blocking|required).*?:(.*?)(?:\n\n|\Z)", output, re.IGNORECASE | re.DOTALL
430
- )
431
- if must_fix_section:
432
- issues = re.findall(r"[-•]\s*(.+)", must_fix_section.group(1))
433
- must_fix.extend(issues)
434
-
435
- # Extract suggestions
436
- suggestions = []
437
- suggestion_section = re.search(
438
- r"(?:suggest|consider|recommend).*?:(.*?)(?:\n\n|\Z)", output, re.IGNORECASE | re.DOTALL
439
- )
440
- if suggestion_section:
441
- sugs = re.findall(r"[-•]\s*(.+)", suggestion_section.group(1))
442
- suggestions.extend(sugs)
443
-
444
- # Check for approval
445
- approved = feedback_type == FeedbackType.APPROVE or any(
446
- word in output_lower for word in ["lgtm", "approved", "looks good", "ship it"]
447
- )
448
-
449
- return ReviewFeedback(
450
- chunk_id=chunk_id,
451
- feedback_type=feedback_type,
452
- comments=comments[:10],
453
- suggestions=suggestions[:5],
454
- must_fix=must_fix[:5],
455
- approved=approved,
456
- )
457
-
458
- def _build_dev_prompt(
459
- self, task_part: str, context: str, previous_feedback: Optional[ReviewFeedback] = None
460
- ) -> str:
461
- """Build prompt for DEV agent."""
462
-
463
- base_prompt = f"""You are in a PAIR PROGRAMMING session with a REVIEWER.
464
- Work in small, focused chunks. After each chunk, wait for reviewer feedback.
465
-
466
- ## Task
467
- {task_part}
468
-
469
- ## Context
470
- {context}
471
- """
472
-
473
- if previous_feedback:
474
- feedback_text = "\n".join(
475
- [
476
- "## Reviewer Feedback (address these)",
477
- "",
478
- "**Issues to Fix:**",
479
- *[f"- {issue}" for issue in previous_feedback.must_fix],
480
- "",
481
- "**Suggestions:**",
482
- *[f"- {sug}" for sug in previous_feedback.suggestions],
483
- ]
484
- )
485
- base_prompt += f"\n\n{feedback_text}\n"
486
-
487
- base_prompt += """
488
- ## Instructions
489
- 1. Implement ONE focused chunk of code
490
- 2. Show the file path and code clearly
491
- 3. Explain your approach briefly
492
- 4. Keep the chunk small enough for quick review
493
- """
494
-
495
- return base_prompt
496
-
497
- def _build_reviewer_prompt(self, chunk: CodeChunk, accumulated_code: str) -> str:
498
- """Build prompt for REVIEWER agent."""
499
-
500
- return f"""You are in a PAIR PROGRAMMING session reviewing DEV's work in real-time.
501
-
502
- ## Current Chunk to Review
503
- **Type**: {chunk.chunk_type.value}
504
- **Description**: {chunk.description}
505
- **File**: {chunk.file_path or "Not specified"}
506
-
507
- ```
508
- {chunk.content}
509
- ```
510
-
511
- ## Accumulated Code So Far
512
- ```
513
- {accumulated_code[-2000:] if len(accumulated_code) > 2000 else accumulated_code}
514
- ```
515
-
516
- ## Instructions
517
- 1. Review this chunk for:
518
- - Correctness
519
- - Code quality
520
- - Best practices
521
- - Potential bugs
522
- - Test coverage needs
523
-
524
- 2. Categorize issues as:
525
- - **BLOCKING**: Cannot proceed
526
- - **MUST FIX**: Required before merge
527
- - **SUGGESTION**: Nice to have
528
-
529
- 3. If the code is good, say "LGTM" or "Approved"
530
-
531
- 4. Be constructive and specific
532
- """
533
-
534
- def run(self) -> PairSessionResult:
535
- """Run the pair programming session."""
536
- start_time = datetime.now().isoformat()
537
-
538
- self._log(f"Starting pair session for: {self.task[:50]}...")
539
-
540
- # Break task into parts (for now, treat as single task)
541
- task_parts = [self.task]
542
-
543
- accumulated_code = ""
544
- total_revisions = 0
545
- approved_chunks = 0
546
-
547
- for i, task_part in enumerate(task_parts):
548
- self._log(f"Working on part {i + 1}/{len(task_parts)}")
549
-
550
- # Get initial context
551
- context = f"Story: {self.story_key}\n"
552
- context += self.shared_memory.to_context_string(5)
553
-
554
- # DEV creates initial chunk
555
- self._log("Creating initial implementation...", "DEV")
556
- dev_prompt = self._build_dev_prompt(task_part, context)
557
- dev_output = self._invoke_agent("DEV", dev_prompt)
558
-
559
- chunk = self._parse_dev_output(dev_output)
560
- self._log(f"Created chunk: {chunk.description}", "DEV")
561
-
562
- # Track file
563
- if chunk.file_path:
564
- if (
565
- chunk.file_path not in self.files_created
566
- and chunk.file_path not in self.files_modified
567
- ):
568
- self.files_created.append(chunk.file_path)
569
-
570
- accumulated_code += f"\n\n// {chunk.description}\n{chunk.content}"
571
-
572
- # REVIEWER reviews
573
- self._log("Reviewing chunk...", "REVIEWER")
574
- reviewer_prompt = self._build_reviewer_prompt(chunk, accumulated_code)
575
- reviewer_output = self._invoke_agent("REVIEWER", reviewer_prompt)
576
-
577
- feedback = self._parse_reviewer_output(reviewer_output, chunk.chunk_id)
578
- self._log(f"Feedback: {feedback.feedback_type.value}", "REVIEWER")
579
-
580
- exchange = PairExchange(
581
- exchange_id=self.exchange_counter,
582
- chunk=chunk,
583
- feedback=feedback,
584
- resolved=feedback.approved,
585
- )
586
- self.exchange_counter += 1
587
-
588
- # Revision loop if needed
589
- revision_count = 0
590
- while (
591
- feedback.has_blocking_issues()
592
- and revision_count < self.config.max_revisions_per_chunk
593
- ):
594
- revision_count += 1
595
- total_revisions += 1
596
- self._log(f"Revision {revision_count} needed", "SYSTEM")
597
-
598
- # DEV revises
599
- self._log("Addressing feedback...", "DEV")
600
- dev_prompt = self._build_dev_prompt(task_part, context, feedback)
601
- dev_output = self._invoke_agent("DEV", dev_prompt)
602
-
603
- revised_chunk = self._parse_dev_output(dev_output)
604
- exchange.revision = revised_chunk
605
-
606
- # Update accumulated code
607
- accumulated_code += (
608
- f"\n\n// Revision: {revised_chunk.description}\n{revised_chunk.content}"
609
- )
610
-
611
- # Run validation between revisions
612
- self._run_revision_validation(chunk.chunk_id, revision_count)
613
-
614
- # REVIEWER re-reviews
615
- self._log("Re-reviewing...", "REVIEWER")
616
- reviewer_prompt = self._build_reviewer_prompt(revised_chunk, accumulated_code)
617
- reviewer_output = self._invoke_agent("REVIEWER", reviewer_prompt)
618
-
619
- feedback = self._parse_reviewer_output(reviewer_output, revised_chunk.chunk_id)
620
- exchange.feedback = feedback
621
- exchange.resolved = feedback.approved
622
-
623
- self._log(f"Feedback: {feedback.feedback_type.value}", "REVIEWER")
624
-
625
- if exchange.resolved or feedback.approved:
626
- approved_chunks += 1
627
- self._log(" Chunk approved!", "SYSTEM")
628
- else:
629
- self._log(" Moving on with unresolved issues", "SYSTEM")
630
-
631
- self.exchanges.append(exchange)
632
-
633
- # Record in shared memory
634
- self.shared_memory.add(
635
- agent="PAIR",
636
- content=f"Completed chunk: {chunk.description} ({feedback.feedback_type.value})",
637
- tags=["pair-programming", "chunk"],
638
- )
639
-
640
- # Calculate approval rate
641
- total_chunks = len(self.exchanges)
642
- approval_rate = approved_chunks / total_chunks if total_chunks > 0 else 0
643
-
644
- self._log(f"Session complete. Approval rate: {approval_rate:.0%}", "SYSTEM")
645
-
646
- return PairSessionResult(
647
- story_key=self.story_key,
648
- task=self.task,
649
- exchanges=self.exchanges,
650
- final_code=accumulated_code,
651
- files_created=self.files_created,
652
- files_modified=self.files_modified,
653
- total_chunks=total_chunks,
654
- total_revisions=total_revisions,
655
- approval_rate=approval_rate,
656
- start_time=start_time,
657
- end_time=datetime.now().isoformat(),
658
- )
659
-
660
-
661
- # Convenience functions
662
- def start_pair_session(story_key: str, task: str, **config_kwargs) -> PairSession:
663
- """Start a new pair programming session."""
664
- config = PairConfig(**config_kwargs)
665
- return PairSession(story_key, task, config)
666
-
667
-
668
- def run_pair_session(story_key: str, task: str, **config_kwargs) -> PairSessionResult:
669
- """Run a complete pair programming session."""
670
- session = start_pair_session(story_key, task, **config_kwargs)
671
- return session.run()
672
-
673
-
674
- if __name__ == "__main__":
675
- print("=== Pair Programming Mode Demo ===\n")
676
- print("This module enables real-time DEV + REVIEWER collaboration.")
677
- print("\nExample usage:")
678
- print("""
679
- from lib.pair_programming import run_pair_session
680
-
681
- result = run_pair_session(
682
- story_key="3-5",
683
- task="Implement user authentication endpoint",
684
- max_revisions_per_chunk=2,
685
- verbose=True
686
- )
687
-
688
- print(result.to_summary())
689
- print(f"Approval rate: {result.approval_rate:.0%}")
690
- """)