@pjmendonca/devflow 1.13.2 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/.claude/commands/agent.md +1 -1
  2. package/.claude/commands/brainstorm.md +28 -0
  3. package/.claude/commands/bugfix.md +21 -0
  4. package/.claude/commands/checkpoint.md +0 -1
  5. package/.claude/commands/collab.md +0 -1
  6. package/.claude/commands/costs.md +88 -18
  7. package/.claude/commands/devflow.md +26 -0
  8. package/.claude/commands/handoff.md +0 -1
  9. package/.claude/commands/init.md +383 -0
  10. package/.claude/commands/memory.md +0 -1
  11. package/.claude/commands/pair.md +0 -1
  12. package/.claude/commands/review.md +27 -0
  13. package/.claude/commands/route.md +0 -1
  14. package/.claude/commands/swarm.md +0 -1
  15. package/.claude/commands/validate.md +55 -0
  16. package/.claude/hooks/session-notification.sh +44 -0
  17. package/.claude/hooks/session-startup.sh +427 -0
  18. package/.claude/hooks/session-stop.sh +38 -0
  19. package/.claude/hooks/session_tracker.py +272 -0
  20. package/.claude/settings.json +38 -0
  21. package/.claude/skills/brainstorm/SKILL.md +531 -0
  22. package/.claude/skills/costs/SKILL.md +156 -0
  23. package/.claude/skills/validate/SKILL.md +101 -0
  24. package/CHANGELOG.md +284 -0
  25. package/README.md +207 -10
  26. package/bin/devflow-install.js +2 -1
  27. package/bin/devflow.js +4 -0
  28. package/lib/constants.js +0 -1
  29. package/lib/exec-python.js +1 -1
  30. package/package.json +1 -1
  31. package/tooling/.automation/.checkpoint_lock +1 -0
  32. package/tooling/.automation/agents/architect.md +19 -0
  33. package/tooling/.automation/agents/ba.md +19 -0
  34. package/tooling/.automation/agents/maintainer.md +19 -0
  35. package/tooling/.automation/agents/pm.md +19 -0
  36. package/tooling/.automation/agents/reviewer.md +1 -1
  37. package/tooling/.automation/agents/writer.md +19 -0
  38. package/tooling/.automation/benchmarks/benchmark_20251230_100119.json +314 -0
  39. package/tooling/.automation/benchmarks/benchmark_20251230_100216.json +314 -0
  40. package/tooling/.automation/costs/config.json +31 -0
  41. package/tooling/.automation/costs/sessions/2025-12-29_20251229_164128.json +22 -0
  42. package/tooling/.automation/memory/knowledge/kg_integration-test.json +738 -1
  43. package/tooling/.automation/memory/knowledge/kg_test-story.json +3381 -2
  44. package/tooling/.automation/memory/shared/shared_integration-test.json +193 -1
  45. package/tooling/.automation/memory/shared/shared_test-story.json +757 -1
  46. package/tooling/.automation/memory/shared/shared_test.json +1332 -0
  47. package/tooling/.automation/memory/shared/shared_validation-check.json +240 -0
  48. package/tooling/.automation/overrides/templates/architect/cloud-native.yaml +5 -5
  49. package/tooling/.automation/overrides/templates/architect/enterprise-architect.yaml +23 -5
  50. package/tooling/.automation/overrides/templates/architect/pragmatic-minimalist.yaml +24 -6
  51. package/tooling/.automation/overrides/templates/ba/agile-storyteller.yaml +4 -4
  52. package/tooling/.automation/overrides/templates/ba/domain-expert.yaml +4 -4
  53. package/tooling/.automation/overrides/templates/ba/requirements-engineer.yaml +4 -4
  54. package/tooling/.automation/overrides/templates/dev/performance-engineer.yaml +18 -0
  55. package/tooling/.automation/overrides/templates/dev/rapid-prototyper.yaml +19 -1
  56. package/tooling/.automation/overrides/templates/dev/security-focused.yaml +18 -0
  57. package/tooling/.automation/overrides/templates/dev/user-advocate.yaml +54 -0
  58. package/tooling/.automation/overrides/templates/maintainer/devops-maintainer.yaml +4 -4
  59. package/tooling/.automation/overrides/templates/maintainer/legacy-steward.yaml +4 -4
  60. package/tooling/.automation/overrides/templates/maintainer/oss-maintainer.yaml +4 -4
  61. package/tooling/.automation/overrides/templates/maintainer/reliability-engineer.yaml +55 -0
  62. package/tooling/.automation/overrides/templates/pm/agile-pm.yaml +4 -4
  63. package/tooling/.automation/overrides/templates/pm/hybrid-delivery.yaml +3 -3
  64. package/tooling/.automation/overrides/templates/pm/traditional-pm.yaml +4 -4
  65. package/tooling/.automation/overrides/templates/reviewer/quick-sanity.yaml +18 -0
  66. package/tooling/.automation/overrides/templates/reviewer/thorough-critic.yaml +18 -0
  67. package/tooling/.automation/overrides/templates/sm/agile-coach.yaml +2 -2
  68. package/tooling/.automation/overrides/templates/sm/startup-pm.yaml +3 -3
  69. package/tooling/.automation/overrides/templates/writer/api-documentarian.yaml +5 -5
  70. package/tooling/.automation/overrides/templates/writer/docs-as-code.yaml +4 -4
  71. package/tooling/.automation/overrides/templates/writer/user-guide-author.yaml +5 -5
  72. package/tooling/.automation/validation/history/2025-12-29_val_002a28c1.json +32 -0
  73. package/tooling/.automation/validation/history/2025-12-29_val_01273bb1.json +32 -0
  74. package/tooling/.automation/validation/history/2025-12-29_val_03369914.json +41 -0
  75. package/tooling/.automation/validation/history/2025-12-29_val_07a449ba.json +32 -0
  76. package/tooling/.automation/validation/history/2025-12-29_val_0df1f0a2.json +41 -0
  77. package/tooling/.automation/validation/history/2025-12-29_val_10ff3d34.json +41 -0
  78. package/tooling/.automation/validation/history/2025-12-29_val_110771d7.json +32 -0
  79. package/tooling/.automation/validation/history/2025-12-29_val_13f3a7f9.json +32 -0
  80. package/tooling/.automation/validation/history/2025-12-29_val_17ba9d21.json +41 -0
  81. package/tooling/.automation/validation/history/2025-12-29_val_22247089.json +32 -0
  82. package/tooling/.automation/validation/history/2025-12-29_val_227ea6a4.json +32 -0
  83. package/tooling/.automation/validation/history/2025-12-29_val_2335d5ae.json +32 -0
  84. package/tooling/.automation/validation/history/2025-12-29_val_246824bb.json +41 -0
  85. package/tooling/.automation/validation/history/2025-12-29_val_28b4b9cd.json +32 -0
  86. package/tooling/.automation/validation/history/2025-12-29_val_2abd12cc.json +32 -0
  87. package/tooling/.automation/validation/history/2025-12-29_val_2c801b2f.json +59 -0
  88. package/tooling/.automation/validation/history/2025-12-29_val_2c8cfa8e.json +32 -0
  89. package/tooling/.automation/validation/history/2025-12-29_val_2ce76eb0.json +32 -0
  90. package/tooling/.automation/validation/history/2025-12-29_val_30351948.json +41 -0
  91. package/tooling/.automation/validation/history/2025-12-29_val_30eb7229.json +41 -0
  92. package/tooling/.automation/validation/history/2025-12-29_val_34df0e77.json +41 -0
  93. package/tooling/.automation/validation/history/2025-12-29_val_376e4d6a.json +32 -0
  94. package/tooling/.automation/validation/history/2025-12-29_val_3a4e8a1a.json +59 -0
  95. package/tooling/.automation/validation/history/2025-12-29_val_3b77a628.json +32 -0
  96. package/tooling/.automation/validation/history/2025-12-29_val_3ea4e1cf.json +59 -0
  97. package/tooling/.automation/validation/history/2025-12-29_val_44aacdb4.json +59 -0
  98. package/tooling/.automation/validation/history/2025-12-29_val_457ddfa8.json +32 -0
  99. package/tooling/.automation/validation/history/2025-12-29_val_45af6238.json +41 -0
  100. package/tooling/.automation/validation/history/2025-12-29_val_4735dba1.json +41 -0
  101. package/tooling/.automation/validation/history/2025-12-29_val_486b203c.json +41 -0
  102. package/tooling/.automation/validation/history/2025-12-29_val_49dc56cd.json +59 -0
  103. package/tooling/.automation/validation/history/2025-12-29_val_4d863d6d.json +32 -0
  104. package/tooling/.automation/validation/history/2025-12-29_val_5149a808.json +59 -0
  105. package/tooling/.automation/validation/history/2025-12-29_val_52e0bb43.json +32 -0
  106. package/tooling/.automation/validation/history/2025-12-29_val_585d6319.json +59 -0
  107. package/tooling/.automation/validation/history/2025-12-29_val_5b2d859a.json +32 -0
  108. package/tooling/.automation/validation/history/2025-12-29_val_635a7081.json +41 -0
  109. package/tooling/.automation/validation/history/2025-12-29_val_64df4905.json +32 -0
  110. package/tooling/.automation/validation/history/2025-12-29_val_70634cee.json +41 -0
  111. package/tooling/.automation/validation/history/2025-12-29_val_714553f9.json +32 -0
  112. package/tooling/.automation/validation/history/2025-12-29_val_7f7bfdbf.json +41 -0
  113. package/tooling/.automation/validation/history/2025-12-29_val_7faad91d.json +32 -0
  114. package/tooling/.automation/validation/history/2025-12-29_val_81821f8f.json +41 -0
  115. package/tooling/.automation/validation/history/2025-12-29_val_8249f3c9.json +32 -0
  116. package/tooling/.automation/validation/history/2025-12-29_val_8422b50f.json +41 -0
  117. package/tooling/.automation/validation/history/2025-12-29_val_8446c134.json +32 -0
  118. package/tooling/.automation/validation/history/2025-12-29_val_879f4e26.json +59 -0
  119. package/tooling/.automation/validation/history/2025-12-29_val_8b6d5bd7.json +32 -0
  120. package/tooling/.automation/validation/history/2025-12-29_val_8c5cd787.json +32 -0
  121. package/tooling/.automation/validation/history/2025-12-29_val_91d20bc7.json +32 -0
  122. package/tooling/.automation/validation/history/2025-12-29_val_958a12b7.json +41 -0
  123. package/tooling/.automation/validation/history/2025-12-29_val_95d91108.json +41 -0
  124. package/tooling/.automation/validation/history/2025-12-29_val_980dbb74.json +32 -0
  125. package/tooling/.automation/validation/history/2025-12-29_val_9e40c79b.json +32 -0
  126. package/tooling/.automation/validation/history/2025-12-29_val_9f499b7c.json +32 -0
  127. package/tooling/.automation/validation/history/2025-12-29_val_9f7c3b57.json +32 -0
  128. package/tooling/.automation/validation/history/2025-12-29_val_a30d5bd4.json +32 -0
  129. package/tooling/.automation/validation/history/2025-12-29_val_a6eb09c7.json +32 -0
  130. package/tooling/.automation/validation/history/2025-12-29_val_a86f7b83.json +41 -0
  131. package/tooling/.automation/validation/history/2025-12-29_val_ad5347e1.json +41 -0
  132. package/tooling/.automation/validation/history/2025-12-29_val_b0a5a993.json +32 -0
  133. package/tooling/.automation/validation/history/2025-12-29_val_bcb0192e.json +32 -0
  134. package/tooling/.automation/validation/history/2025-12-29_val_bf3c9aaa.json +32 -0
  135. package/tooling/.automation/validation/history/2025-12-29_val_c461ff88.json +32 -0
  136. package/tooling/.automation/validation/history/2025-12-29_val_c4f4e258.json +41 -0
  137. package/tooling/.automation/validation/history/2025-12-29_val_c7f0fa6d.json +41 -0
  138. package/tooling/.automation/validation/history/2025-12-29_val_c911b0e6.json +32 -0
  139. package/tooling/.automation/validation/history/2025-12-29_val_cc581964.json +32 -0
  140. package/tooling/.automation/validation/history/2025-12-29_val_cdd5a33b.json +32 -0
  141. package/tooling/.automation/validation/history/2025-12-29_val_cfd42495.json +32 -0
  142. package/tooling/.automation/validation/history/2025-12-29_val_d1c7a4ee.json +41 -0
  143. package/tooling/.automation/validation/history/2025-12-29_val_d2280d0e.json +32 -0
  144. package/tooling/.automation/validation/history/2025-12-29_val_d2a6ff69.json +32 -0
  145. package/tooling/.automation/validation/history/2025-12-29_val_d8c53ab2.json +59 -0
  146. package/tooling/.automation/validation/history/2025-12-29_val_d9c1247a.json +41 -0
  147. package/tooling/.automation/validation/history/2025-12-29_val_d9d58569.json +32 -0
  148. package/tooling/.automation/validation/history/2025-12-29_val_dabb4fd9.json +32 -0
  149. package/tooling/.automation/validation/history/2025-12-29_val_dd8fe359.json +32 -0
  150. package/tooling/.automation/validation/history/2025-12-29_val_decdffc9.json +32 -0
  151. package/tooling/.automation/validation/history/2025-12-29_val_e3a95476.json +59 -0
  152. package/tooling/.automation/validation/history/2025-12-29_val_e776dfca.json +32 -0
  153. package/tooling/.automation/validation/history/2025-12-29_val_ea70969f.json +59 -0
  154. package/tooling/.automation/validation/history/2025-12-29_val_ef41ea95.json +32 -0
  155. package/tooling/.automation/validation/history/2025-12-29_val_f384f9b1.json +32 -0
  156. package/tooling/.automation/validation/history/2025-12-29_val_f8adc38c.json +41 -0
  157. package/tooling/.automation/validation/history/2025-12-29_val_fa40b69e.json +32 -0
  158. package/tooling/.automation/validation/history/2025-12-29_val_fc538d54.json +41 -0
  159. package/tooling/.automation/validation/history/2025-12-29_val_fe814665.json +32 -0
  160. package/tooling/.automation/validation/history/2025-12-29_val_ffea4b12.json +32 -0
  161. package/tooling/.automation/validation/history/2025-12-30_val_02d001e5.json +59 -0
  162. package/tooling/.automation/validation/history/2025-12-30_val_0b8966dc.json +32 -0
  163. package/tooling/.automation/validation/history/2025-12-30_val_15455fbf.json +59 -0
  164. package/tooling/.automation/validation/history/2025-12-30_val_157e34b9.json +32 -0
  165. package/tooling/.automation/validation/history/2025-12-30_val_28d1d933.json +32 -0
  166. package/tooling/.automation/validation/history/2025-12-30_val_3442a52c.json +32 -0
  167. package/tooling/.automation/validation/history/2025-12-30_val_37f1ce1e.json +32 -0
  168. package/tooling/.automation/validation/history/2025-12-30_val_4f1d8a93.json +32 -0
  169. package/tooling/.automation/validation/history/2025-12-30_val_56ff1de3.json +32 -0
  170. package/tooling/.automation/validation/history/2025-12-30_val_664fd4e2.json +41 -0
  171. package/tooling/.automation/validation/history/2025-12-30_val_66afb0a7.json +32 -0
  172. package/tooling/.automation/validation/history/2025-12-30_val_7634663c.json +41 -0
  173. package/tooling/.automation/validation/history/2025-12-30_val_8ea830c3.json +41 -0
  174. package/tooling/.automation/validation/history/2025-12-30_val_998957c2.json +32 -0
  175. package/tooling/.automation/validation/history/2025-12-30_val_a52177db.json +32 -0
  176. package/tooling/.automation/validation/history/2025-12-30_val_a5b65a63.json +32 -0
  177. package/tooling/.automation/validation/history/2025-12-30_val_ae391d0e.json +32 -0
  178. package/tooling/.automation/validation/history/2025-12-30_val_c7895339.json +41 -0
  179. package/tooling/.automation/validation/history/2025-12-30_val_ca416593.json +41 -0
  180. package/tooling/.automation/validation/history/2025-12-30_val_cee19422.json +32 -0
  181. package/tooling/.automation/validation/history/2025-12-30_val_ddd4f4e6.json +32 -0
  182. package/tooling/.automation/validation/history/2025-12-30_val_f2e1394b.json +32 -0
  183. package/tooling/.automation/validation/history/2025-12-30_val_f4a7fa06.json +41 -0
  184. package/tooling/.automation/validation/history/2025-12-30_val_ffea3369.json +32 -0
  185. package/tooling/.automation/validation/history/2026-01-03_val_1287a74c.json +41 -0
  186. package/tooling/.automation/validation/history/2026-01-03_val_3b24071f.json +32 -0
  187. package/tooling/.automation/validation/history/2026-01-03_val_44d77573.json +32 -0
  188. package/tooling/.automation/validation/history/2026-01-03_val_5b31dc51.json +32 -0
  189. package/tooling/.automation/validation/history/2026-01-03_val_74267244.json +32 -0
  190. package/tooling/.automation/validation/history/2026-01-03_val_8b2d95c7.json +59 -0
  191. package/tooling/.automation/validation/history/2026-01-03_val_d875b297.json +41 -0
  192. package/tooling/.automation/validation-config.yaml +103 -0
  193. package/tooling/completions/DevflowCompletion.ps1 +21 -21
  194. package/tooling/completions/_run-story +3 -3
  195. package/tooling/completions/run-story-completion.bash +8 -8
  196. package/tooling/docs/DOC-STANDARD.md +14 -14
  197. package/tooling/docs/stories/.gitkeep +0 -0
  198. package/tooling/docs/templates/brainstorm-guide.md +314 -0
  199. package/tooling/docs/templates/migration-spec.md +4 -4
  200. package/tooling/docs/templates/story.md +66 -0
  201. package/tooling/scripts/context_checkpoint.py +5 -15
  202. package/tooling/scripts/cost_dashboard.py +610 -13
  203. package/tooling/scripts/create-persona.py +1 -12
  204. package/tooling/scripts/create-persona.sh +44 -44
  205. package/tooling/scripts/lib/__init__.py +12 -1
  206. package/tooling/scripts/lib/agent_handoff.py +11 -2
  207. package/tooling/scripts/lib/agent_router.py +31 -10
  208. package/tooling/scripts/lib/colors.py +106 -0
  209. package/tooling/scripts/lib/context_monitor.py +766 -0
  210. package/tooling/scripts/lib/cost_config.py +229 -10
  211. package/tooling/scripts/lib/cost_display.py +20 -45
  212. package/tooling/scripts/lib/cost_tracker.py +462 -15
  213. package/tooling/scripts/lib/currency_converter.py +28 -5
  214. package/tooling/scripts/lib/pair_programming.py +102 -3
  215. package/tooling/scripts/lib/personality_system.py +949 -0
  216. package/tooling/scripts/lib/platform.py +55 -0
  217. package/tooling/scripts/lib/shared_memory.py +9 -3
  218. package/tooling/scripts/lib/swarm_orchestrator.py +514 -75
  219. package/tooling/scripts/lib/validation_loop.py +1014 -0
  220. package/tooling/scripts/memory_summarize.py +9 -2
  221. package/tooling/scripts/new-doc.py +2 -9
  222. package/tooling/scripts/personalize_agent.py +1 -12
  223. package/tooling/scripts/rollback-migration.sh +60 -60
  224. package/tooling/scripts/run-collab.ps1 +16 -16
  225. package/tooling/scripts/run-collab.py +88 -53
  226. package/tooling/scripts/run-collab.sh +4 -4
  227. package/tooling/scripts/run-story.py +278 -20
  228. package/tooling/scripts/run-story.sh +3 -3
  229. package/tooling/scripts/setup-checkpoint-service.py +2 -9
  230. package/tooling/scripts/tech-debt-tracker.py +1 -12
  231. package/tooling/scripts/test_adversarial_swarm.py +452 -0
  232. package/tooling/scripts/validate-overrides.py +1 -10
  233. package/tooling/scripts/validate-overrides.sh +40 -40
  234. package/tooling/scripts/validate_loop.py +162 -0
  235. package/tooling/scripts/validate_setup.py +2 -30
  236. package/.claude/skills/init/SKILL.md +0 -496
@@ -288,7 +288,7 @@ main() {
288
288
  swarm_args="$swarm_args --max-iterations $max_iterations"
289
289
  python3 "$SCRIPT_DIR/run-collab.py" $swarm_args
290
290
  local exit_code=$?
291
-
291
+
292
292
  if [[ $exit_code -eq 0 && "$AUTO_COMMIT" == "true" ]]; then
293
293
  auto_commit_changes "$story_key"
294
294
  fi
@@ -298,7 +298,7 @@ main() {
298
298
  echo ""
299
299
  python3 "$SCRIPT_DIR/run-collab.py" "$story_key" --pair --max-revisions "$max_iterations"
300
300
  local exit_code=$?
301
-
301
+
302
302
  if [[ $exit_code -eq 0 && "$AUTO_COMMIT" == "true" ]]; then
303
303
  auto_commit_changes "$story_key"
304
304
  fi
@@ -308,7 +308,7 @@ main() {
308
308
  echo ""
309
309
  python3 "$SCRIPT_DIR/run-collab.py" "$story_key" --auto
310
310
  local exit_code=$?
311
-
311
+
312
312
  if [[ $exit_code -eq 0 && "$AUTO_COMMIT" == "true" ]]; then
313
313
  auto_commit_changes "$story_key"
314
314
  fi
@@ -22,16 +22,9 @@ import sys
22
22
  from pathlib import Path
23
23
 
24
24
  SCRIPT_DIR = Path(__file__).parent
25
+ sys.path.insert(0, str(SCRIPT_DIR / "lib"))
25
26
 
26
-
27
- def get_platform():
28
- """Detect the current platform."""
29
- if sys.platform == "win32":
30
- return "windows"
31
- elif sys.platform == "darwin":
32
- return "macos"
33
- else:
34
- return "linux"
27
+ from platform import get_platform
35
28
 
36
29
 
37
30
  def run_windows(action):
@@ -28,18 +28,7 @@ from datetime import datetime
28
28
  from pathlib import Path
29
29
  from typing import Any
30
30
 
31
-
32
- # Colors for terminal output
33
- class Colors:
34
- RED = "\033[0;31m"
35
- GREEN = "\033[0;32m"
36
- YELLOW = "\033[1;33m"
37
- BLUE = "\033[0;34m"
38
- CYAN = "\033[0;36m"
39
- MAGENTA = "\033[0;35m"
40
- BOLD = "\033[1m"
41
- NC = "\033[0m"
42
-
31
+ from lib.colors import Colors
43
32
 
44
33
  # Debt indicator patterns
45
34
  DEBT_PATTERNS = {
@@ -0,0 +1,452 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Adversarial Swarm Test Harness and Performance Tracker
4
+
5
+ Tests the adversarial swarm system and tracks performance metrics
6
+ to identify trends like diminishing returns.
7
+
8
+ Usage:
9
+ python3 tooling/scripts/test_adversarial_swarm.py [--runs N] [--plot]
10
+
11
+ Metrics tracked per round:
12
+ - New arguments introduced
13
+ - Challenges raised
14
+ - Concessions made
15
+ - Agreement score delta
16
+ - Token usage
17
+ - Unique issues identified
18
+
19
+ Outputs:
20
+ - JSON results in tooling/.automation/benchmarks/
21
+ - Performance plots (if --plot flag used)
22
+ """
23
+
24
+ import argparse
25
+ import json
26
+ import sys
27
+ from dataclasses import asdict, dataclass, field
28
+ from datetime import datetime
29
+ from pathlib import Path
30
+ from typing import Optional
31
+
32
+ # Add lib to path
33
+ sys.path.insert(0, str(Path(__file__).parent / "lib"))
34
+
35
+ try:
36
+ from personality_system import (
37
+ ConvergenceDetector,
38
+ PersonalitySelector,
39
+ )
40
+ except ImportError:
41
+ print("[ERROR] Could not import personality_system. Run from project root.")
42
+ sys.exit(1)
43
+
44
+
45
+ PROJECT_ROOT = Path(__file__).parent.parent.parent
46
+ BENCHMARK_DIR = PROJECT_ROOT / "tooling" / ".automation" / "benchmarks"
47
+
48
+
49
+ @dataclass
50
+ class RoundMetrics:
51
+ """Metrics for a single debate round."""
52
+
53
+ round_num: int
54
+ new_arguments: int = 0
55
+ challenges_raised: int = 0
56
+ concessions_made: int = 0
57
+ agreement_score: float = 0.0
58
+ agreement_delta: float = 0.0
59
+ unique_issues: int = 0
60
+ tokens_used: int = 0
61
+ cost_usd: float = 0.0
62
+ positions_changed: int = 0 # How many agents changed position
63
+
64
+
65
+ @dataclass
66
+ class SwarmBenchmarkResult:
67
+ """Complete benchmark result for a swarm run."""
68
+
69
+ test_id: str
70
+ task: str
71
+ agents: list[str]
72
+ personas_used: list[str]
73
+ total_rounds: int
74
+ termination_reason: str
75
+ final_agreement_score: float
76
+ total_tokens: int
77
+ total_cost_usd: float
78
+ rounds: list[RoundMetrics] = field(default_factory=list)
79
+ timestamp: str = ""
80
+ duration_seconds: float = 0.0
81
+
82
+ # Derived metrics
83
+ arguments_per_round: list[int] = field(default_factory=list)
84
+ agreement_progression: list[float] = field(default_factory=list)
85
+ marginal_value: list[float] = field(default_factory=list) # Value gained per round
86
+
87
+ def to_dict(self) -> dict:
88
+ result = {
89
+ "test_id": self.test_id,
90
+ "task": self.task,
91
+ "agents": self.agents,
92
+ "personas_used": self.personas_used,
93
+ "total_rounds": self.total_rounds,
94
+ "termination_reason": self.termination_reason,
95
+ "final_agreement_score": self.final_agreement_score,
96
+ "total_tokens": self.total_tokens,
97
+ "total_cost_usd": self.total_cost_usd,
98
+ "timestamp": self.timestamp,
99
+ "duration_seconds": self.duration_seconds,
100
+ "rounds": [asdict(r) for r in self.rounds],
101
+ "arguments_per_round": self.arguments_per_round,
102
+ "agreement_progression": self.agreement_progression,
103
+ "marginal_value": self.marginal_value,
104
+ }
105
+ return result
106
+
107
+
108
+ class AdversarialSwarmTester:
109
+ """Tests the adversarial swarm and collects metrics."""
110
+
111
+ def __init__(self, output_dir: Optional[Path] = None):
112
+ self.output_dir = output_dir or BENCHMARK_DIR
113
+ self.output_dir.mkdir(parents=True, exist_ok=True)
114
+ self.results: list[SwarmBenchmarkResult] = []
115
+
116
+ def run_simulated_test(
117
+ self,
118
+ task: str,
119
+ agents: list[str],
120
+ max_rounds: int = 3,
121
+ ) -> SwarmBenchmarkResult:
122
+ """Run a simulated test without actual LLM calls.
123
+
124
+ This tests the personality selection and convergence detection
125
+ without consuming tokens.
126
+ """
127
+ import random
128
+ import time
129
+
130
+ test_id = f"sim_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
131
+ start_time = time.time()
132
+
133
+ print(f"\n[TEST] Starting simulated adversarial swarm: {test_id}")
134
+ print(f" Task: {task[:60]}...")
135
+ print(f" Agents: {', '.join(agents)}")
136
+
137
+ # Select personas
138
+ selector = PersonalitySelector()
139
+ personas = selector.select_adversarial_personas(task, len(agents), agents)
140
+
141
+ print(" Selected personas:")
142
+ for p in personas:
143
+ stance = p.adversarial_stance.primary_concern if p.adversarial_stance else "general"
144
+ print(f" - {p.name} ({p.agent_type}) [Focus: {stance}]")
145
+
146
+ # Simulate debate rounds
147
+ _detector = ConvergenceDetector(similarity_threshold=0.8, stability_rounds=2)
148
+ rounds: list[RoundMetrics] = []
149
+ prev_agreement = 0.0
150
+ cumulative_arguments = set()
151
+
152
+ for round_num in range(max_rounds):
153
+ # Simulate round metrics (decreasing novelty over rounds)
154
+ decay_factor = 0.7**round_num
155
+ new_args = int(random.randint(3, 8) * decay_factor) + 1
156
+ challenges = int(random.randint(2, 5) * decay_factor)
157
+ concessions = int(random.randint(0, 2) * (1 - decay_factor) + round_num * 0.5)
158
+
159
+ # Simulate agreement increasing over rounds
160
+ agreement_increase = random.uniform(0.1, 0.25) * decay_factor
161
+ agreement = min(1.0, prev_agreement + agreement_increase)
162
+
163
+ # Add arguments to cumulative set
164
+ for i in range(new_args):
165
+ cumulative_arguments.add(f"arg_{round_num}_{i}")
166
+
167
+ # Calculate marginal value (new unique insights / tokens)
168
+ tokens = random.randint(500, 1500)
169
+ cost = tokens * 0.00001 # Rough estimate
170
+
171
+ round_metrics = RoundMetrics(
172
+ round_num=round_num,
173
+ new_arguments=new_args,
174
+ challenges_raised=challenges,
175
+ concessions_made=concessions,
176
+ agreement_score=agreement,
177
+ agreement_delta=agreement - prev_agreement,
178
+ unique_issues=max(0, int((8 - round_num) * decay_factor)),
179
+ tokens_used=tokens,
180
+ cost_usd=cost,
181
+ positions_changed=max(0, int(len(agents) * decay_factor * 0.5)),
182
+ )
183
+ rounds.append(round_metrics)
184
+
185
+ print(
186
+ f" Round {round_num + 1}: Agreement={agreement:.0%}, "
187
+ f"NewArgs={new_args}, Challenges={challenges}, Concessions={concessions}"
188
+ )
189
+
190
+ prev_agreement = agreement
191
+
192
+ # Check for simulated convergence
193
+ if agreement > 0.85 and round_num >= 1:
194
+ print(" [CONVERGED] High agreement reached")
195
+ break
196
+
197
+ # Build result
198
+ duration = time.time() - start_time
199
+ result = SwarmBenchmarkResult(
200
+ test_id=test_id,
201
+ task=task,
202
+ agents=agents,
203
+ personas_used=[p.name for p in personas],
204
+ total_rounds=len(rounds),
205
+ termination_reason="convergence" if prev_agreement > 0.85 else "max_rounds",
206
+ final_agreement_score=prev_agreement,
207
+ total_tokens=sum(r.tokens_used for r in rounds),
208
+ total_cost_usd=sum(r.cost_usd for r in rounds),
209
+ rounds=rounds,
210
+ timestamp=datetime.now().isoformat(),
211
+ duration_seconds=duration,
212
+ arguments_per_round=[r.new_arguments for r in rounds],
213
+ agreement_progression=[r.agreement_score for r in rounds],
214
+ marginal_value=self._calculate_marginal_value(rounds),
215
+ )
216
+
217
+ self.results.append(result)
218
+ return result
219
+
220
+ def _calculate_marginal_value(self, rounds: list[RoundMetrics]) -> list[float]:
221
+ """Calculate marginal value (insight gained per token spent) per round."""
222
+ marginal = []
223
+ for _i, r in enumerate(rounds):
224
+ if r.tokens_used == 0:
225
+ marginal.append(0.0)
226
+ else:
227
+ # Value = (new arguments + challenges + agreement delta * 10) / tokens
228
+ value = r.new_arguments + r.challenges_raised + r.agreement_delta * 10
229
+ marginal.append(value / (r.tokens_used / 1000)) # Per 1K tokens
230
+ return marginal
231
+
232
+ def run_batch_tests(self, num_runs: int = 5) -> list[SwarmBenchmarkResult]:
233
+ """Run a batch of simulated tests with different tasks."""
234
+ test_tasks = [
235
+ "Design a secure authentication system with OAuth2 and JWT",
236
+ "Implement a caching layer for the API with Redis",
237
+ "Refactor the monolith into microservices",
238
+ "Add rate limiting to protect against DDoS",
239
+ "Design a real-time notification system",
240
+ "Implement a data pipeline for analytics",
241
+ "Create a plugin architecture for extensibility",
242
+ "Design a multi-tenant database schema",
243
+ "Implement end-to-end encryption for messages",
244
+ "Build a recommendation engine using collaborative filtering",
245
+ ]
246
+
247
+ agent_combos = [
248
+ ["ARCHITECT", "DEV", "REVIEWER"],
249
+ ["DEV", "REVIEWER", "SECURITY"],
250
+ ["ARCHITECT", "DEV", "MAINTAINER"],
251
+ ]
252
+
253
+ results = []
254
+ for i in range(num_runs):
255
+ task = test_tasks[i % len(test_tasks)]
256
+ agents = agent_combos[i % len(agent_combos)]
257
+ result = self.run_simulated_test(task, agents)
258
+ results.append(result)
259
+
260
+ return results
261
+
262
+ def save_results(self, filename: Optional[str] = None):
263
+ """Save benchmark results to JSON."""
264
+ if not filename:
265
+ filename = f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
266
+
267
+ filepath = self.output_dir / filename
268
+ with open(filepath, "w") as f:
269
+ json.dump([r.to_dict() for r in self.results], f, indent=2)
270
+
271
+ print(f"\n[OK] Results saved to {filepath}")
272
+ return filepath
273
+
274
+ def generate_summary(self) -> dict:
275
+ """Generate summary statistics from all runs."""
276
+ if not self.results:
277
+ return {}
278
+
279
+ total_runs = len(self.results)
280
+ avg_rounds = sum(r.total_rounds for r in self.results) / total_runs
281
+ avg_agreement = sum(r.final_agreement_score for r in self.results) / total_runs
282
+ avg_cost = sum(r.total_cost_usd for r in self.results) / total_runs
283
+
284
+ # Calculate average marginal value per round across all runs
285
+ max_rounds = max(r.total_rounds for r in self.results)
286
+ avg_marginal_by_round = []
287
+
288
+ for round_idx in range(max_rounds):
289
+ values = []
290
+ for result in self.results:
291
+ if round_idx < len(result.marginal_value):
292
+ values.append(result.marginal_value[round_idx])
293
+ if values:
294
+ avg_marginal_by_round.append(sum(values) / len(values))
295
+
296
+ # Identify diminishing returns point
297
+ diminishing_point = None
298
+ for i in range(1, len(avg_marginal_by_round)):
299
+ if avg_marginal_by_round[i] < avg_marginal_by_round[i - 1] * 0.5:
300
+ diminishing_point = i + 1
301
+ break
302
+
303
+ return {
304
+ "total_runs": total_runs,
305
+ "avg_rounds": avg_rounds,
306
+ "avg_agreement_score": avg_agreement,
307
+ "avg_cost_usd": avg_cost,
308
+ "avg_marginal_value_by_round": avg_marginal_by_round,
309
+ "diminishing_returns_round": diminishing_point,
310
+ "convergence_rate": sum(
311
+ 1 for r in self.results if r.termination_reason == "convergence"
312
+ )
313
+ / total_runs,
314
+ }
315
+
316
+
317
+ def plot_results(results: list[SwarmBenchmarkResult], output_path: Optional[Path] = None):
318
+ """Generate performance plots from benchmark results."""
319
+ try:
320
+ import matplotlib.pyplot as plt
321
+ except ImportError:
322
+ print("[WARNING] matplotlib not installed. Run: pip install matplotlib")
323
+ return
324
+
325
+ if not results:
326
+ print("[WARNING] No results to plot")
327
+ return
328
+
329
+ fig, axes = plt.subplots(2, 2, figsize=(14, 10))
330
+ fig.suptitle("Adversarial Swarm Performance Analysis", fontsize=14, fontweight="bold")
331
+
332
+ # Plot 1: Agreement progression over rounds
333
+ ax1 = axes[0, 0]
334
+ for result in results:
335
+ ax1.plot(
336
+ range(1, len(result.agreement_progression) + 1),
337
+ result.agreement_progression,
338
+ marker="o",
339
+ alpha=0.7,
340
+ label=result.test_id[:12],
341
+ )
342
+ ax1.set_xlabel("Round")
343
+ ax1.set_ylabel("Agreement Score")
344
+ ax1.set_title("Agreement Progression Over Rounds")
345
+ ax1.set_ylim(0, 1.1)
346
+ ax1.grid(True, alpha=0.3)
347
+
348
+ # Plot 2: New arguments per round (diminishing returns)
349
+ ax2 = axes[0, 1]
350
+ for result in results:
351
+ ax2.plot(
352
+ range(1, len(result.arguments_per_round) + 1),
353
+ result.arguments_per_round,
354
+ marker="s",
355
+ alpha=0.7,
356
+ )
357
+ ax2.set_xlabel("Round")
358
+ ax2.set_ylabel("New Arguments")
359
+ ax2.set_title("New Arguments Per Round (Diminishing Returns)")
360
+ ax2.grid(True, alpha=0.3)
361
+
362
+ # Plot 3: Marginal value per round
363
+ ax3 = axes[1, 0]
364
+ for result in results:
365
+ ax3.plot(
366
+ range(1, len(result.marginal_value) + 1),
367
+ result.marginal_value,
368
+ marker="^",
369
+ alpha=0.7,
370
+ )
371
+ ax3.set_xlabel("Round")
372
+ ax3.set_ylabel("Marginal Value (per 1K tokens)")
373
+ ax3.set_title("Marginal Value Per Round")
374
+ ax3.grid(True, alpha=0.3)
375
+
376
+ # Plot 4: Cost vs Agreement scatter
377
+ ax4 = axes[1, 1]
378
+ costs = [r.total_cost_usd for r in results]
379
+ agreements = [r.final_agreement_score for r in results]
380
+ rounds = [r.total_rounds for r in results]
381
+
382
+ scatter = ax4.scatter(costs, agreements, c=rounds, cmap="viridis", s=100, alpha=0.7)
383
+ ax4.set_xlabel("Total Cost (USD)")
384
+ ax4.set_ylabel("Final Agreement Score")
385
+ ax4.set_title("Cost vs Agreement (color = rounds)")
386
+ ax4.grid(True, alpha=0.3)
387
+ plt.colorbar(scatter, ax=ax4, label="Rounds")
388
+
389
+ plt.tight_layout()
390
+
391
+ if output_path:
392
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
393
+ print(f"[OK] Plot saved to {output_path}")
394
+ else:
395
+ plt.show()
396
+
397
+
398
+ def main():
399
+ parser = argparse.ArgumentParser(description="Test adversarial swarm performance")
400
+ parser.add_argument("--runs", type=int, default=5, help="Number of test runs")
401
+ parser.add_argument("--plot", action="store_true", help="Generate performance plots")
402
+ parser.add_argument("--output", type=str, help="Output filename for results")
403
+ args = parser.parse_args()
404
+
405
+ print("=" * 60)
406
+ print(" ADVERSARIAL SWARM TEST HARNESS")
407
+ print("=" * 60)
408
+
409
+ tester = AdversarialSwarmTester()
410
+
411
+ # Run batch tests
412
+ print(f"\n[INFO] Running {args.runs} simulated tests...")
413
+ results = tester.run_batch_tests(args.runs)
414
+
415
+ # Save results
416
+ tester.save_results(args.output)
417
+
418
+ # Generate summary
419
+ summary = tester.generate_summary()
420
+ print("\n" + "=" * 60)
421
+ print(" SUMMARY")
422
+ print("=" * 60)
423
+ print(f" Total runs: {summary['total_runs']}")
424
+ print(f" Average rounds: {summary['avg_rounds']:.1f}")
425
+ print(f" Average agreement: {summary['avg_agreement_score']:.0%}")
426
+ print(f" Average cost: ${summary['avg_cost_usd']:.4f}")
427
+ print(f" Convergence rate: {summary['convergence_rate']:.0%}")
428
+
429
+ if summary.get("diminishing_returns_round"):
430
+ print(
431
+ f"\n [INSIGHT] Diminishing returns detected at round {summary['diminishing_returns_round']}"
432
+ )
433
+ print(" Consider limiting debates to this many rounds for efficiency.")
434
+
435
+ print("\n Marginal value by round:")
436
+ for i, val in enumerate(summary.get("avg_marginal_value_by_round", [])):
437
+ bar = "[" + "=" * int(val * 2) + " " * (20 - int(val * 2)) + "]"
438
+ print(f" Round {i + 1}: {bar} {val:.2f}")
439
+
440
+ # Generate plots
441
+ if args.plot:
442
+ plot_path = (
443
+ BENCHMARK_DIR / f"performance_plot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
444
+ )
445
+ plot_results(results, plot_path)
446
+
447
+ print("\n[OK] Test harness complete")
448
+ return 0
449
+
450
+
451
+ if __name__ == "__main__":
452
+ sys.exit(main())
@@ -28,16 +28,7 @@ from dataclasses import dataclass, field
28
28
  from pathlib import Path
29
29
  from typing import Any
30
30
 
31
-
32
- # Colors for terminal output
33
- class Colors:
34
- RED = "\033[0;31m"
35
- GREEN = "\033[0;32m"
36
- YELLOW = "\033[1;33m"
37
- BLUE = "\033[0;34m"
38
- CYAN = "\033[0;36m"
39
- NC = "\033[0m" # No Color
40
-
31
+ from lib.colors import Colors
41
32
 
42
33
  # Valid values
43
34
  VALID_MODELS = ["sonnet", "opus", "haiku"]