npm - @pjmendonca/devflow - Versions diffs - 1.13.1 → 1.18.0 - Mend

@pjmendonca/devflow 1.13.1 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

package/tooling/scripts/run-story.sh CHANGED Viewed

@@ -288,7 +288,7 @@ main() {
             swarm_args="$swarm_args --max-iterations $max_iterations"
             python3 "$SCRIPT_DIR/run-collab.py" $swarm_args
             local exit_code=$?
             if [[ $exit_code -eq 0 && "$AUTO_COMMIT" == "true" ]]; then
                 auto_commit_changes "$story_key"
             fi
@@ -298,7 +298,7 @@ main() {
             echo ""
             python3 "$SCRIPT_DIR/run-collab.py" "$story_key" --pair --max-revisions "$max_iterations"
             local exit_code=$?
             if [[ $exit_code -eq 0 && "$AUTO_COMMIT" == "true" ]]; then
                 auto_commit_changes "$story_key"
             fi
@@ -308,7 +308,7 @@ main() {
             echo ""
             python3 "$SCRIPT_DIR/run-collab.py" "$story_key" --auto
             local exit_code=$?
             if [[ $exit_code -eq 0 && "$AUTO_COMMIT" == "true" ]]; then
                 auto_commit_changes "$story_key"
             fi

package/tooling/scripts/setup-checkpoint-service.py CHANGED Viewed

@@ -22,16 +22,9 @@ import sys
 from pathlib import Path
 SCRIPT_DIR = Path(__file__).parent
+sys.path.insert(0, str(SCRIPT_DIR / "lib"))
-def get_platform():
-    """Detect the current platform."""
-    if sys.platform == "win32":
-        return "windows"
-    elif sys.platform == "darwin":
-        return "macos"
-    else:
-        return "linux"
+from platform import get_platform
 def run_windows(action):

package/tooling/scripts/tech-debt-tracker.py CHANGED Viewed

@@ -28,18 +28,7 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any
-# Colors for terminal output
-class Colors:
-    RED = "\033[0;31m"
-    GREEN = "\033[0;32m"
-    YELLOW = "\033[1;33m"
-    BLUE = "\033[0;34m"
-    CYAN = "\033[0;36m"
-    MAGENTA = "\033[0;35m"
-    BOLD = "\033[1m"
-    NC = "\033[0m"
+from lib.colors import Colors
 # Debt indicator patterns
 DEBT_PATTERNS = {

package/tooling/scripts/test_adversarial_swarm.py ADDED Viewed

@@ -0,0 +1,452 @@
+#!/usr/bin/env python3
+"""
+Adversarial Swarm Test Harness and Performance Tracker
+Tests the adversarial swarm system and tracks performance metrics
+to identify trends like diminishing returns.
+Usage:
+    python3 tooling/scripts/test_adversarial_swarm.py [--runs N] [--plot]
+Metrics tracked per round:
+- New arguments introduced
+- Challenges raised
+- Concessions made
+- Agreement score delta
+- Token usage
+- Unique issues identified
+Outputs:
+- JSON results in tooling/.automation/benchmarks/
+- Performance plots (if --plot flag used)
+"""
+import argparse
+import json
+import sys
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+# Add lib to path
+sys.path.insert(0, str(Path(__file__).parent / "lib"))
+try:
+    from personality_system import (
+        ConvergenceDetector,
+        PersonalitySelector,
+    )
+except ImportError:
+    print("[ERROR] Could not import personality_system. Run from project root.")
+    sys.exit(1)
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+BENCHMARK_DIR = PROJECT_ROOT / "tooling" / ".automation" / "benchmarks"
+@dataclass
+class RoundMetrics:
+    """Metrics for a single debate round."""
+    round_num: int
+    new_arguments: int = 0
+    challenges_raised: int = 0
+    concessions_made: int = 0
+    agreement_score: float = 0.0
+    agreement_delta: float = 0.0
+    unique_issues: int = 0
+    tokens_used: int = 0
+    cost_usd: float = 0.0
+    positions_changed: int = 0  # How many agents changed position
+@dataclass
+class SwarmBenchmarkResult:
+    """Complete benchmark result for a swarm run."""
+    test_id: str
+    task: str
+    agents: list[str]
+    personas_used: list[str]
+    total_rounds: int
+    termination_reason: str
+    final_agreement_score: float
+    total_tokens: int
+    total_cost_usd: float
+    rounds: list[RoundMetrics] = field(default_factory=list)
+    timestamp: str = ""
+    duration_seconds: float = 0.0
+    # Derived metrics
+    arguments_per_round: list[int] = field(default_factory=list)
+    agreement_progression: list[float] = field(default_factory=list)
+    marginal_value: list[float] = field(default_factory=list)  # Value gained per round
+    def to_dict(self) -> dict:
+        result = {
+            "test_id": self.test_id,
+            "task": self.task,
+            "agents": self.agents,
+            "personas_used": self.personas_used,
+            "total_rounds": self.total_rounds,
+            "termination_reason": self.termination_reason,
+            "final_agreement_score": self.final_agreement_score,
+            "total_tokens": self.total_tokens,
+            "total_cost_usd": self.total_cost_usd,
+            "timestamp": self.timestamp,
+            "duration_seconds": self.duration_seconds,
+            "rounds": [asdict(r) for r in self.rounds],
+            "arguments_per_round": self.arguments_per_round,
+            "agreement_progression": self.agreement_progression,
+            "marginal_value": self.marginal_value,
+        }
+        return result
+class AdversarialSwarmTester:
+    """Tests the adversarial swarm and collects metrics."""
+    def __init__(self, output_dir: Optional[Path] = None):
+        self.output_dir = output_dir or BENCHMARK_DIR
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.results: list[SwarmBenchmarkResult] = []
+    def run_simulated_test(
+        self,
+        task: str,
+        agents: list[str],
+        max_rounds: int = 3,
+    ) -> SwarmBenchmarkResult:
+        """Run a simulated test without actual LLM calls.
+        This tests the personality selection and convergence detection
+        without consuming tokens.
+        """
+        import random
+        import time
+        test_id = f"sim_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        start_time = time.time()
+        print(f"\n[TEST] Starting simulated adversarial swarm: {test_id}")
+        print(f"  Task: {task[:60]}...")
+        print(f"  Agents: {', '.join(agents)}")
+        # Select personas
+        selector = PersonalitySelector()
+        personas = selector.select_adversarial_personas(task, len(agents), agents)
+        print("  Selected personas:")
+        for p in personas:
+            stance = p.adversarial_stance.primary_concern if p.adversarial_stance else "general"
+            print(f"    - {p.name} ({p.agent_type}) [Focus: {stance}]")
+        # Simulate debate rounds
+        _detector = ConvergenceDetector(similarity_threshold=0.8, stability_rounds=2)
+        rounds: list[RoundMetrics] = []
+        prev_agreement = 0.0
+        cumulative_arguments = set()
+        for round_num in range(max_rounds):
+            # Simulate round metrics (decreasing novelty over rounds)
+            decay_factor = 0.7**round_num
+            new_args = int(random.randint(3, 8) * decay_factor) + 1
+            challenges = int(random.randint(2, 5) * decay_factor)
+            concessions = int(random.randint(0, 2) * (1 - decay_factor) + round_num * 0.5)
+            # Simulate agreement increasing over rounds
+            agreement_increase = random.uniform(0.1, 0.25) * decay_factor
+            agreement = min(1.0, prev_agreement + agreement_increase)
+            # Add arguments to cumulative set
+            for i in range(new_args):
+                cumulative_arguments.add(f"arg_{round_num}_{i}")
+            # Calculate marginal value (new unique insights / tokens)
+            tokens = random.randint(500, 1500)
+            cost = tokens * 0.00001  # Rough estimate
+            round_metrics = RoundMetrics(
+                round_num=round_num,
+                new_arguments=new_args,
+                challenges_raised=challenges,
+                concessions_made=concessions,
+                agreement_score=agreement,
+                agreement_delta=agreement - prev_agreement,
+                unique_issues=max(0, int((8 - round_num) * decay_factor)),
+                tokens_used=tokens,
+                cost_usd=cost,
+                positions_changed=max(0, int(len(agents) * decay_factor * 0.5)),
+            )
+            rounds.append(round_metrics)
+            print(
+                f"  Round {round_num + 1}: Agreement={agreement:.0%}, "
+                f"NewArgs={new_args}, Challenges={challenges}, Concessions={concessions}"
+            )
+            prev_agreement = agreement
+            # Check for simulated convergence
+            if agreement > 0.85 and round_num >= 1:
+                print("  [CONVERGED] High agreement reached")
+                break
+        # Build result
+        duration = time.time() - start_time
+        result = SwarmBenchmarkResult(
+            test_id=test_id,
+            task=task,
+            agents=agents,
+            personas_used=[p.name for p in personas],
+            total_rounds=len(rounds),
+            termination_reason="convergence" if prev_agreement > 0.85 else "max_rounds",
+            final_agreement_score=prev_agreement,
+            total_tokens=sum(r.tokens_used for r in rounds),
+            total_cost_usd=sum(r.cost_usd for r in rounds),
+            rounds=rounds,
+            timestamp=datetime.now().isoformat(),
+            duration_seconds=duration,
+            arguments_per_round=[r.new_arguments for r in rounds],
+            agreement_progression=[r.agreement_score for r in rounds],
+            marginal_value=self._calculate_marginal_value(rounds),
+        )
+        self.results.append(result)
+        return result
+    def _calculate_marginal_value(self, rounds: list[RoundMetrics]) -> list[float]:
+        """Calculate marginal value (insight gained per token spent) per round."""
+        marginal = []
+        for _i, r in enumerate(rounds):
+            if r.tokens_used == 0:
+                marginal.append(0.0)
+            else:
+                # Value = (new arguments + challenges + agreement delta * 10) / tokens
+                value = r.new_arguments + r.challenges_raised + r.agreement_delta * 10
+                marginal.append(value / (r.tokens_used / 1000))  # Per 1K tokens
+        return marginal
+    def run_batch_tests(self, num_runs: int = 5) -> list[SwarmBenchmarkResult]:
+        """Run a batch of simulated tests with different tasks."""
+        test_tasks = [
+            "Design a secure authentication system with OAuth2 and JWT",
+            "Implement a caching layer for the API with Redis",
+            "Refactor the monolith into microservices",
+            "Add rate limiting to protect against DDoS",
+            "Design a real-time notification system",
+            "Implement a data pipeline for analytics",
+            "Create a plugin architecture for extensibility",
+            "Design a multi-tenant database schema",
+            "Implement end-to-end encryption for messages",
+            "Build a recommendation engine using collaborative filtering",
+        ]
+        agent_combos = [
+            ["ARCHITECT", "DEV", "REVIEWER"],
+            ["DEV", "REVIEWER", "SECURITY"],
+            ["ARCHITECT", "DEV", "MAINTAINER"],
+        ]
+        results = []
+        for i in range(num_runs):
+            task = test_tasks[i % len(test_tasks)]
+            agents = agent_combos[i % len(agent_combos)]
+            result = self.run_simulated_test(task, agents)
+            results.append(result)
+        return results
+    def save_results(self, filename: Optional[str] = None):
+        """Save benchmark results to JSON."""
+        if not filename:
+            filename = f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+        filepath = self.output_dir / filename
+        with open(filepath, "w") as f:
+            json.dump([r.to_dict() for r in self.results], f, indent=2)
+        print(f"\n[OK] Results saved to {filepath}")
+        return filepath
+    def generate_summary(self) -> dict:
+        """Generate summary statistics from all runs."""
+        if not self.results:
+            return {}
+        total_runs = len(self.results)
+        avg_rounds = sum(r.total_rounds for r in self.results) / total_runs
+        avg_agreement = sum(r.final_agreement_score for r in self.results) / total_runs
+        avg_cost = sum(r.total_cost_usd for r in self.results) / total_runs
+        # Calculate average marginal value per round across all runs
+        max_rounds = max(r.total_rounds for r in self.results)
+        avg_marginal_by_round = []
+        for round_idx in range(max_rounds):
+            values = []
+            for result in self.results:
+                if round_idx < len(result.marginal_value):
+                    values.append(result.marginal_value[round_idx])
+            if values:
+                avg_marginal_by_round.append(sum(values) / len(values))
+        # Identify diminishing returns point
+        diminishing_point = None
+        for i in range(1, len(avg_marginal_by_round)):
+            if avg_marginal_by_round[i] < avg_marginal_by_round[i - 1] * 0.5:
+                diminishing_point = i + 1
+                break
+        return {
+            "total_runs": total_runs,
+            "avg_rounds": avg_rounds,
+            "avg_agreement_score": avg_agreement,
+            "avg_cost_usd": avg_cost,
+            "avg_marginal_value_by_round": avg_marginal_by_round,
+            "diminishing_returns_round": diminishing_point,
+            "convergence_rate": sum(
+                1 for r in self.results if r.termination_reason == "convergence"
+            )
+            / total_runs,
+        }
+def plot_results(results: list[SwarmBenchmarkResult], output_path: Optional[Path] = None):
+    """Generate performance plots from benchmark results."""
+    try:
+        import matplotlib.pyplot as plt
+    except ImportError:
+        print("[WARNING] matplotlib not installed. Run: pip install matplotlib")
+        return
+    if not results:
+        print("[WARNING] No results to plot")
+        return
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    fig.suptitle("Adversarial Swarm Performance Analysis", fontsize=14, fontweight="bold")
+    # Plot 1: Agreement progression over rounds
+    ax1 = axes[0, 0]
+    for result in results:
+        ax1.plot(
+            range(1, len(result.agreement_progression) + 1),
+            result.agreement_progression,
+            marker="o",
+            alpha=0.7,
+            label=result.test_id[:12],
+        )
+    ax1.set_xlabel("Round")
+    ax1.set_ylabel("Agreement Score")
+    ax1.set_title("Agreement Progression Over Rounds")
+    ax1.set_ylim(0, 1.1)
+    ax1.grid(True, alpha=0.3)
+    # Plot 2: New arguments per round (diminishing returns)
+    ax2 = axes[0, 1]
+    for result in results:
+        ax2.plot(
+            range(1, len(result.arguments_per_round) + 1),
+            result.arguments_per_round,
+            marker="s",
+            alpha=0.7,
+        )
+    ax2.set_xlabel("Round")
+    ax2.set_ylabel("New Arguments")
+    ax2.set_title("New Arguments Per Round (Diminishing Returns)")
+    ax2.grid(True, alpha=0.3)
+    # Plot 3: Marginal value per round
+    ax3 = axes[1, 0]
+    for result in results:
+        ax3.plot(
+            range(1, len(result.marginal_value) + 1),
+            result.marginal_value,
+            marker="^",
+            alpha=0.7,
+        )
+    ax3.set_xlabel("Round")
+    ax3.set_ylabel("Marginal Value (per 1K tokens)")
+    ax3.set_title("Marginal Value Per Round")
+    ax3.grid(True, alpha=0.3)
+    # Plot 4: Cost vs Agreement scatter
+    ax4 = axes[1, 1]
+    costs = [r.total_cost_usd for r in results]
+    agreements = [r.final_agreement_score for r in results]
+    rounds = [r.total_rounds for r in results]
+    scatter = ax4.scatter(costs, agreements, c=rounds, cmap="viridis", s=100, alpha=0.7)
+    ax4.set_xlabel("Total Cost (USD)")
+    ax4.set_ylabel("Final Agreement Score")
+    ax4.set_title("Cost vs Agreement (color = rounds)")
+    ax4.grid(True, alpha=0.3)
+    plt.colorbar(scatter, ax=ax4, label="Rounds")
+    plt.tight_layout()
+    if output_path:
+        plt.savefig(output_path, dpi=150, bbox_inches="tight")
+        print(f"[OK] Plot saved to {output_path}")
+    else:
+        plt.show()
+def main():
+    parser = argparse.ArgumentParser(description="Test adversarial swarm performance")
+    parser.add_argument("--runs", type=int, default=5, help="Number of test runs")
+    parser.add_argument("--plot", action="store_true", help="Generate performance plots")
+    parser.add_argument("--output", type=str, help="Output filename for results")
+    args = parser.parse_args()
+    print("=" * 60)
+    print("  ADVERSARIAL SWARM TEST HARNESS")
+    print("=" * 60)
+    tester = AdversarialSwarmTester()
+    # Run batch tests
+    print(f"\n[INFO] Running {args.runs} simulated tests...")
+    results = tester.run_batch_tests(args.runs)
+    # Save results
+    tester.save_results(args.output)
+    # Generate summary
+    summary = tester.generate_summary()
+    print("\n" + "=" * 60)
+    print("  SUMMARY")
+    print("=" * 60)
+    print(f"  Total runs: {summary['total_runs']}")
+    print(f"  Average rounds: {summary['avg_rounds']:.1f}")
+    print(f"  Average agreement: {summary['avg_agreement_score']:.0%}")
+    print(f"  Average cost: ${summary['avg_cost_usd']:.4f}")
+    print(f"  Convergence rate: {summary['convergence_rate']:.0%}")
+    if summary.get("diminishing_returns_round"):
+        print(
+            f"\n  [INSIGHT] Diminishing returns detected at round {summary['diminishing_returns_round']}"
+        )
+        print("            Consider limiting debates to this many rounds for efficiency.")
+    print("\n  Marginal value by round:")
+    for i, val in enumerate(summary.get("avg_marginal_value_by_round", [])):
+        bar = "[" + "=" * int(val * 2) + " " * (20 - int(val * 2)) + "]"
+        print(f"    Round {i + 1}: {bar} {val:.2f}")
+    # Generate plots
+    if args.plot:
+        plot_path = (
+            BENCHMARK_DIR / f"performance_plot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
+        )
+        plot_results(results, plot_path)
+    print("\n[OK] Test harness complete")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/tooling/scripts/update_version.py CHANGED Viewed

@@ -7,6 +7,7 @@ all version references throughout the project. Run this after updating
 the changelog, or use the pre-commit hook to automate it.
 Files updated:
+    - package.json (version field) - FIRST, to include in commit
     - README.md (version block)
     - pyproject.toml (version field)
     - tooling/scripts/lib/__init__.py (__version__ variable)
@@ -18,6 +19,7 @@ Usage:
 """
 import argparse
+import json
 import re
 import sys
 from datetime import date
@@ -81,6 +83,41 @@ def get_init_version(init_path: Path) -> str | None:
     return None
+def get_package_json_version(package_path: Path) -> str | None:
+    """Extract the current version from package.json."""
+    if not package_path.exists():
+        return None
+    try:
+        data = json.loads(package_path.read_text())
+        return data.get("version")
+    except json.JSONDecodeError:
+        return None
+def update_package_json_version(package_path: Path, version: str) -> bool:
+    """Update the version in package.json."""
+    if not package_path.exists():
+        print(f"Error: package.json not found at {package_path}")
+        return False
+    try:
+        content = package_path.read_text()
+        data = json.loads(content)
+        old_version = data.get("version")
+        if old_version == version:
+            print(f"package.json already at version {version}")
+            return True
+        data["version"] = version
+        package_path.write_text(json.dumps(data, indent=2) + "\n")
+        return True
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid JSON in package.json: {e}")
+        return False
 def update_readme_version(readme_path: Path, version: str) -> bool:
     """Update the version in README.md."""
     if not readme_path.exists():
@@ -173,12 +210,14 @@ def main():
     root = get_project_root()
     changelog_path = root / "CHANGELOG.md"
+    package_path = root / "package.json"
     readme_path = root / "README.md"
     pyproject_path = root / "pyproject.toml"
     init_path = root / "tooling" / "scripts" / "lib" / "__init__.py"
     # Get versions from all sources
     changelog_version = get_changelog_version(changelog_path)
+    package_version = get_package_json_version(package_path)
     readme_version = get_readme_version(readme_path)
     pyproject_version = get_pyproject_version(pyproject_path)
     init_version = get_init_version(init_path)
@@ -192,6 +231,7 @@ def main():
         sys.exit(1)
     print(f"CHANGELOG version:   {changelog_version or 'not found'}")
+    print(f"package.json:        {package_version or 'not found'}")
     print(f"README version:      {readme_version or 'not found'}")
     print(f"pyproject version:   {pyproject_version or 'not found'}")
     print(f"__init__ version:    {init_version or 'not found'}")
@@ -201,7 +241,7 @@ def main():
     if args.check:
         all_match = all(
             v == target_version
-            for v in [readme_version, pyproject_version, init_version]
+            for v in [package_version, readme_version, pyproject_version, init_version]
             if v is not None
         )
         if all_match:
@@ -211,9 +251,15 @@ def main():
             print("[X] Versions are out of sync")
             sys.exit(1)
-    # Update all files
+    # Update all files (package.json first so it's included in commit)
     success = True
+    if update_package_json_version(package_path, target_version):
+        print(f"[OK] Updated package.json to version {target_version}")
+    else:
+        print("[X] Failed to update package.json")
+        success = False
     if update_readme_version(readme_path, target_version):
         print(f"[OK] Updated README.md to version {target_version}")
     else:

package/tooling/scripts/validate-overrides.py CHANGED Viewed

@@ -28,16 +28,7 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
-# Colors for terminal output
-class Colors:
-    RED = "\033[0;31m"
-    GREEN = "\033[0;32m"
-    YELLOW = "\033[1;33m"
-    BLUE = "\033[0;34m"
-    CYAN = "\033[0;36m"
-    NC = "\033[0m"  # No Color
+from lib.colors import Colors
 # Valid values
 VALID_MODELS = ["sonnet", "opus", "haiku"]