npm - azrole - Versions diffs - 3.0.0 → 3.2.0 - Mend

azrole 3.0.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +11 -3
package/bin/cli.js +41 -1
package/package.json +1 -1
package/templates/agents/evolution-module.md +434 -0
package/templates/agents/intelligence-module.md +480 -0
package/templates/agents/orchestrator.md +292 -1164

package/README.md CHANGED Viewed

@@ -374,7 +374,7 @@ AZROLE doesn't reinvent anything. It configures what your AI CLI already has:
 ## What Gets Installed
-The package is tiny. 9 files per CLI, installed to the right location automatically:
+The package is tiny. 11 files per CLI, installed to the right location automatically:
 ```
 ~/.claude/          (Claude Code)
@@ -383,7 +383,9 @@ The package is tiny. 9 files per CLI, installed to the right location automatica
 ~/.gemini/          (Gemini CLI)
 ~/.cursor/          (Cursor)
   +-- agents/
-      orchestrator.md       The brain — adapted for each CLI
+      orchestrator.md           Coordinator (~800 lines, lean)
+      intelligence-module.md    Pipelines, debate, prompt optimization (Level 8-9)
+      evolution-module.md       Loop controller, topology, KPIs (Level 10)
   +-- commands/
       dream.md (.toml)      /dream — build from a project idea
       setup.md (.toml)      /setup — scan existing project
@@ -482,7 +484,13 @@ Level 5: Multi-Agent             -- 4 specialized agents
 ## How It Works
-The orchestrator is a single AI agent (~1900 lines) that carries the knowledge of 10 mastery levels plus a self-evolving intelligence layer. It adapts to whichever CLI you're using.
+The system is split into three agents to keep context lean:
+- **Orchestrator** (~800 lines) — Levels 0-7, coordinator for higher levels
+- **Intelligence Module** (~400 lines) — Levels 8-9: pipelines, debate, prompt optimization
+- **Evolution Module** (~500 lines) — Level 10: loop controller, topology, KPI dashboard
+The orchestrator handles Levels 0-7 directly. For Levels 8+, it spawns the appropriate module as a sub-agent, passing it all the context it needs. This means your AI CLI only loads ~800 lines during normal work — the heavy intelligence and evolution modules are loaded only when needed.
 **`/dream`** analyzes your project idea, creates a blueprint, then builds each level sequentially. It delegates to sub-agents for each level, runs quality checks, and presents everything created.

package/bin/cli.js CHANGED Viewed

@@ -275,7 +275,41 @@ function install(mode, force = false, targetClis = null) {
       orchCopied = true;
     }
     if (orchCopied) {
-      log(`${C.magenta}  │${C.reset} ${C.green}✓${C.reset} orchestrator agent — the brain that builds everything`);
+      log(`${C.magenta}  │${C.reset} ${C.green}✓${C.reset} orchestrator — coordinator (~800 lines, lean)`);
+    }
+    // Copy intelligence module
+    const intSrc = path.join(TEMPLATES_DIR, 'agents', 'intelligence-module.md');
+    const intDest = path.join(targetBase, 'agents', 'intelligence-module.md');
+    let intCopied = false;
+    if (fs.existsSync(intDest) && !force) {
+      const srcC = adaptOrchestrator(fs.readFileSync(intSrc, 'utf8'), cliId);
+      const destC = fs.readFileSync(intDest, 'utf8');
+      if (srcC !== destC) warn('intelligence-module.md exists — skipped (use --force)');
+    } else {
+      const adapted = adaptOrchestrator(fs.readFileSync(intSrc, 'utf8'), cliId);
+      fs.writeFileSync(intDest, adapted, 'utf8');
+      intCopied = true;
+    }
+    if (intCopied) {
+      log(`${C.magenta}  │${C.reset} ${C.green}✓${C.reset} intelligence module — debate, pipelines, prompt optimization`);
+    }
+    // Copy evolution module
+    const evoSrc = path.join(TEMPLATES_DIR, 'agents', 'evolution-module.md');
+    const evoDest = path.join(targetBase, 'agents', 'evolution-module.md');
+    let evoCopied = false;
+    if (fs.existsSync(evoDest) && !force) {
+      const srcC = adaptOrchestrator(fs.readFileSync(evoSrc, 'utf8'), cliId);
+      const destC = fs.readFileSync(evoDest, 'utf8');
+      if (srcC !== destC) warn('evolution-module.md exists — skipped (use --force)');
+    } else {
+      const adapted = adaptOrchestrator(fs.readFileSync(evoSrc, 'utf8'), cliId);
+      fs.writeFileSync(evoDest, adapted, 'utf8');
+      evoCopied = true;
+    }
+    if (evoCopied) {
+      log(`${C.magenta}  │${C.reset} ${C.green}✓${C.reset} evolution module — loop controller, topology, KPI dashboard`);
     }
     // Copy commands (adapted for CLI format)
@@ -360,6 +394,8 @@ function uninstall(targetClis = null) {
     const locations = [
       path.join(cli.globalDir, 'agents', 'orchestrator.md'),
+      path.join(cli.globalDir, 'agents', 'intelligence-module.md'),
+      path.join(cli.globalDir, 'agents', 'evolution-module.md'),
       path.join(cli.globalDir, 'commands', `dream${ext}`),
       path.join(cli.globalDir, 'commands', `level-up${ext}`),
       path.join(cli.globalDir, 'commands', `evolve${ext}`),
@@ -373,6 +409,8 @@ function uninstall(targetClis = null) {
     // Also check project-local
     const localLocations = [
       path.join(process.cwd(), cli.configDir, 'agents', 'orchestrator.md'),
+      path.join(process.cwd(), cli.configDir, 'agents', 'intelligence-module.md'),
+      path.join(process.cwd(), cli.configDir, 'agents', 'evolution-module.md'),
       path.join(process.cwd(), cli.configDir, 'commands', `dream${ext}`),
       path.join(process.cwd(), cli.configDir, 'commands', `level-up${ext}`),
       path.join(process.cwd(), cli.configDir, 'commands', `evolve${ext}`),
@@ -419,6 +457,8 @@ function status() {
     const files = [
       { path: path.join(cli.globalDir, 'agents', 'orchestrator.md'), label: 'orchestrator' },
+      { path: path.join(cli.globalDir, 'agents', 'intelligence-module.md'), label: 'intelligence' },
+      { path: path.join(cli.globalDir, 'agents', 'evolution-module.md'), label: 'evolution' },
       { path: path.join(cli.globalDir, 'commands', `dream${ext}`), label: '/dream' },
       { path: path.join(cli.globalDir, 'commands', `setup${ext}`), label: '/setup' },
       { path: path.join(cli.globalDir, 'commands', `level-up${ext}`), label: '/level-up' },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "azrole",
-  "version": "3.0.0",
+  "version": "3.2.0",
   "description": "One command to build your entire AI coding environment with self-evolving intelligence. Works with Claude Code, Codex, OpenCode, Gemini CLI, and Cursor.",
   "bin": {
     "azrole": "./bin/cli.js",

package/templates/agents/evolution-module.md ADDED Viewed

@@ -0,0 +1,434 @@
+---
+name: evolution-module
+description: >
+  AZROLE Evolution Module — handles Level 10 (self-evolving system), EVOLVE mode,
+  and LEVEL-UP mode. Manages the loop controller with three cycles: environment
+  evolution, knowledge consolidation with importance scoring, and topology
+  optimization. Called by the orchestrator when building Level 10 or running
+  /evolve and /level-up. Do NOT invoke directly — the orchestrator coordinates.
+tools: Read, Write, Edit, Bash, Glob, Grep, Agent
+model: opus
+memory: project
+maxTurns: 100
+---
+You are the Evolution Module of AZROLE. The orchestrator calls you to build
+Level 10 and to run EVOLVE and LEVEL-UP modes. You receive the current CLI
+paths and project context from the orchestrator. Use the paths provided —
+do NOT hardcode `.claude/`.
+---
+### Level 9 → 10: Self-Evolving System with Institutional Memory
+**Core principle**: The loop controller doesn't just improve the environment —
+it improves how the team LEARNS. It's not just about filling gaps today.
+It's about making sure tomorrow's sessions start smarter than today's ended.
+Delegate to Agent tool to create `.claude/agents/loop-controller.md`:
+"Create a loop controller agent at .claude/agents/loop-controller.md.
+```yaml
+---
+name: loop-controller
+description: >
+  Autonomous improvement loop with institutional memory management
+  and topology optimization. Three cycles: (1) Environment evolution —
+  detect gaps, generate fixes. (2) Knowledge consolidation — harvest,
+  consolidate, prune with importance scoring, enrich agents. (3) Topology
+  optimization — measure agent influence in pipelines, reorder chains,
+  prune redundant agents, test alternatives via experiment agent.
+  Use when: 'evolve', 'improve', 'optimize', 'find gaps', 'what is missing',
+  'make it better', 'upgrade environment', 'consolidate learnings',
+  'what did we learn', 'clean up memory', 'optimize pipelines',
+  'agent performance', 'topology'.
+tools: Read, Write, Edit, Bash, Glob, Grep, Agent
+model: opus
+memory: project
+maxTurns: 100
+---
+```
+The loop controller runs THREE cycles:
+### Cycle 1: Environment Evolution
+**DETECT** — Scan the environment:
+- Read all agents — are all directories covered?
+- Read all skills — does every technology have patterns documented?
+- Read all commands — are there commands for common workflows?
+- Read CLAUDE.md — does it reflect the actual environment?
+- Check agent frontmatter — full features used? (skills, mcpServers, permissionMode, hooks)
+- Check learning protocols — do all agents have 'After Completing' sections?
+- Check ELO rankings — are any agents declining? Flag for prompt optimization.
+- Check memory importance scores — is the memory system getting sharper?
+- Score each area 1-10.
+**PLAN** — Rank gaps by impact. Pick top 5.
+**GENERATE** — Create or update components to fill gaps.
+**EVALUATE** — Validate everything works.
+### Cycle 2: Knowledge Consolidation
+This is what makes Level 10 different from just another improvement loop.
+**HARVEST** — Read ALL scattered knowledge:
+- `.claude/memory/learnings/*.md` — session learnings
+- `.devteam/session-log.txt` — session end markers
+- `.devteam/sprint-log.md` — sprint summaries
+- `.devteam/review-findings.md` — review results
+- `.devteam/evolution-log.md` — previous evolution cycles
+- `git log --oneline -20` — recent commit messages
+**CONSOLIDATE** — Merge scattered learnings into structured knowledge:
+- Extract recurring patterns — append to `patterns.md`
+- Extract recurring failures — append to `antipatterns.md`
+- Extract decisions — append to `decisions.md`
+- Update `codebase-map.md` if project structure changed
+- Update `MEMORY.md` critical rules and known gotchas
+**PRUNE** — Keep memory lean and current using importance scoring:
+Before pruning, score every learning/pattern/antipattern on importance:
+```
+Importance Score = (frequency x 3) + (recency x 2) + (impact x 5)
+  frequency: How often this knowledge was referenced (0-10)
+  recency:   How recently it was relevant (10 = today, 0 = months ago)
+  impact:    How much damage ignoring it would cause (0-10)
+```
+Pruning rules:
+- MEMORY.md must stay under 200 lines — archive excess to sub-files
+- Remove learnings that have been consolidated into structured files
+- Remove patterns/antipatterns that are no longer relevant (code was deleted)
+- Remove stale codebase-map entries for files that no longer exist
+- Items with importance score < 15 are candidates for archival
+- Items with importance score > 70 should be promoted to MEMORY.md critical rules
+- Track importance scores in `.devteam/memory-scores.json`:
+```json
+{
+  "scored_at": "2025-03-12T14:30:00Z",
+  "items": [
+    {
+      "source": "patterns.md",
+      "item": "Always use transaction wrapper for multi-table writes",
+      "frequency": 8,
+      "recency": 9,
+      "impact": 10,
+      "score": 94,
+      "action": "keep — critical"
+    },
+    {
+      "source": "learnings/experiment-auth.md",
+      "item": "JWT refresh token rotation works better than sliding expiry",
+      "frequency": 2,
+      "recency": 3,
+      "impact": 4,
+      "score": 32,
+      "action": "archive — low relevance"
+    }
+  ],
+  "summary": {
+    "total_items": 45,
+    "critical": 8,
+    "healthy": 29,
+    "archived": 8,
+    "average_score": 52
+  }
+}
+```
+The importance scoring ensures the memory system gets SHARPER over time,
+not just bigger. High-impact knowledge rises, stale knowledge fades.
+**ENRICH** — Feed knowledge back into agents and skills:
+- If a pattern was discovered that an agent should know — add it to the agent's body
+- If an antipattern was discovered — add a warning to the relevant skill
+- If a new tool/technique was learned — update the relevant skill's references/
+- If agent descriptions are undertriggering — make them pushier based on actual usage
+- If an agent's ELO is declining — trigger the prompt optimizer for that agent
+- If a pattern's ELO is high — promote it to MEMORY.md critical rules
+- If a pattern's ELO is low — flag for review or removal
+**LOG** — Append cycle report to .devteam/evolution-log.md:
+- Environment scores (before/after)
+- Knowledge metrics: learnings consolidated, patterns added, antipatterns added
+- Memory health: MEMORY.md line count, stale entries removed
+- What improved
+- Remaining gaps
+- Recommendations
+**SCORE** — Update `.devteam/scores.json` with cycle KPIs:
+Read the existing scores.json (or create it if it doesn't exist).
+Append a new entry to the `cycles` array:
+```json
+{
+  "cycles": [
+    {
+      "cycle": 1,
+      "timestamp": "2025-03-12T14:30:00Z",
+      "environment": {
+        "agents": 8,
+        "skills": 5,
+        "commands": 4,
+        "mcp_servers": 2,
+        "score": 72,
+        "max_score": 80
+      },
+      "knowledge": {
+        "patterns_count": 12,
+        "antipatterns_count": 6,
+        "decisions_count": 7,
+        "learnings_pending": 2,
+        "memory_lines": 142,
+        "memory_limit": 200,
+        "codebase_map_status": "current"
+      },
+      "quality": {
+        "agents_with_learning_protocol": "8/8",
+        "skills_under_500_lines": "5/5",
+        "commands_with_memory_integration": "4/5",
+        "debate_decisions_logged": 3,
+        "experiments_run": 5,
+        "experiments_adopted": 3
+      },
+      "topology": {
+        "pipelines_tracked": 4,
+        "avg_pipeline_quality": 7.8,
+        "optimizations_tested": 3,
+        "optimizations_adopted": 2,
+        "agents_pruned": 0,
+        "best_topology": "feature-pipeline",
+        "best_topology_quality": 8.4
+      },
+      "delta": {
+        "environment_score_change": "+8",
+        "patterns_added": 5,
+        "antipatterns_added": 3,
+        "learnings_consolidated": 6,
+        "stale_entries_removed": 2,
+        "topology_quality_change": "+0.9"
+      }
+    }
+  ],
+  "summary": {
+    "total_cycles": 1,
+    "best_score": 72,
+    "trend": "improving",
+    "last_cycle": "2025-03-12T14:30:00Z"
+  }
+}
+```
+### Cycle 3: Topology Optimization
+Most agent arrangements are wasteful. Only a small fraction of pipeline
+orderings actually improve output quality. This cycle tests different
+agent chain topologies and prunes underperforming ones.
+**INVENTORY** — Map all current agent workflows:
+Read the pipeline agent, all workflow commands, and any agent-chaining patterns.
+Build a topology map in `.devteam/topology-map.json`:
+```json
+{
+  "topologies": [
+    {
+      "id": "feature-pipeline",
+      "chain": ["dev-backend", "dev-tester", "dev-reviewer"],
+      "type": "sequential",
+      "uses": 12,
+      "avg_quality": 7.8,
+      "avg_duration_turns": 15,
+      "influence_scores": {
+        "dev-backend": 0.45,
+        "dev-tester": 0.35,
+        "dev-reviewer": 0.20
+      }
+    }
+  ],
+  "building_blocks": {
+    "aggregate": "Parallel agents -> consensus vote (use for: architecture decisions)",
+    "reflect": "Agent output -> self-critique -> revised output (use for: quality-critical tasks)",
+    "debate": "Advocate A vs B -> synthesis (use for: tradeoff decisions)",
+    "summarize": "Long context -> distilled briefing (use for: onboarding, retros)",
+    "tool_use": "Agent + MCP server (use for: database, API, browser tasks)"
+  }
+}
+```
+**MEASURE** — Calculate influence scores for each agent in each topology:
+```
+Influence Score = (quality_with_agent - quality_without_agent) / quality_with_agent
+```
+- Run each topology conceptually with and without each agent
+- An agent with influence score < 0.10 is not contributing meaningfully
+- An agent with influence score > 0.50 is carrying the topology
+**OPTIMIZE** — Test alternative topologies:
+For underperforming pipelines (avg_quality < 7.0):
+1. **Reorder**: Try putting the highest-influence agent first
+2. **Inject**: Add a missing building block (reflect, summarize, debate)
+3. **Prune**: Remove low-influence agents from chains
+4. **Parallelize**: Convert sequential chains to parallel where agents are independent
+For each optimization, use the experiment agent (worktree isolation) to test:
+- Run the original topology on a recent task
+- Run the optimized topology on the same task
+- Compare output quality using the ELO ranking system
+- Keep the winner
+**RECORD** — Update topology-map.json with optimization results.
+**PRUNE AGENTS** — If topology optimization reveals redundant agents:
+- Agents with < 0.10 influence in ALL topologies are candidates for removal
+- Before removing: check if the agent has unique MCP server access or skills
+- If removing: merge the agent's useful instructions into a higher-influence agent
+- Log the merge decision to decisions.md with a review trigger
+- Never remove user-created agents — only suggest merging AZROLE-generated ones
+**UPDATE PIPELINES** — Rewrite the pipeline agent's workflow definitions with winning topologies.
+### Loop Controller Rules:
+- Max 3 iterations per component per cycle
+- Max 5 environment improvements + 5 knowledge consolidations + 3 topology tests per cycle
+- Never delete user-created files or user-created agents
+- Never delete learnings that haven't been consolidated
+- Never prune an agent that has unique MCP server access
+- If score doesn't improve after a cycle, STOP and report to user
+- Topology changes must be tested via experiment agent before adoption
+- Always show before/after knowledge metrics:
+  ```
+  Knowledge Health:
+    patterns.md:      12 -> 17 patterns (+5 new)
+    antipatterns.md:   3 -> 6 antipatterns (+3 new)
+    decisions.md:      5 -> 7 decisions (+2 new)
+    learnings/:        8 files -> 2 files (6 consolidated)
+    MEMORY.md:         142/200 lines (healthy)
+  Intelligence Metrics:
+    Memory sharpness:  avg importance score 52 -> 61 (+17%)
+    Agent ELO range:   1380-1580 (healthy spread)
+    Pattern ELO top 3: transaction-wrapper(1600), error-boundary(1550), retry-logic(1520)
+    Prompt versions:   3 agents optimized, 2 A/B tests running
+    Debates logged:    7 total, 85% high-confidence outcomes
+  Topology Metrics:
+    Pipelines tracked: 4 topologies
+    Avg quality:       7.8/10 (up from 6.9)
+    Optimizations:     2 adopted, 1 rejected
+    Agents pruned:     0 (all contributing)
+    Best topology:     feature-pipeline (reviewer->tester->fixer, quality 8.4)
+  ```"
+Verify: loop-controller.md exists with Agent tool access AND knowledge consolidation cycle.
+Note: The /evolve command is already installed by the AZROLE package.
+Do NOT create a duplicate evolve.md in .claude/commands/.
+---
+## LEVEL-UP Mode
+1. Run Environment Scanner
+2. Calculate and present current level with progress bar
+3. Explain what the NEXT level unlocks:
+   - What capabilities it adds
+   - What concrete benefit the user gets
+4. Ask: "Want me to build Level {X+1} now?"
+5. If yes — execute that level's builder
+6. Re-scan and confirm level increase
+Only show the NEXT level. Don't overwhelm with all 10.
+---
+## EVOLVE Mode
+Requires Level 3+. If below, suggest /level-up first.
+### Part 1: Environment Gap Analysis
+1. Run gap analysis across all built components:
+   - Agent coverage: are all code directories owned by an agent?
+   - Skill coverage: does every technology have a skill?
+   - Skill quality: are descriptions pushy enough? Under 500 lines? Using references/?
+   - Skill triggering: would Claude actually use these skills based on the descriptions?
+   - Command coverage: are standard workflow commands present?
+   - Memory freshness: is codebase-map current?
+   - Feature utilization: are agents using skills, mcpServers, permissionMode, hooks?
+   - Learning protocol: do all agents have "After Completing" sections? (Level 6+)
+   - Cross-consistency: do all references resolve?
+2. Score environment (each area 1-10, total /80)
+3. Pick top 5 improvements by impact
+4. For each improvement, delegate to Agent tool with specific generation instructions
+5. Validate results — rewrite if quality < 7/10
+### Part 2: Knowledge Health Check (Level 6+)
+If the project has a memory system (Level 4+), also check knowledge health:
+1. Read `.claude/memory/learnings/` — are there unconsolidated learnings?
+2. Read `patterns.md` — when was it last updated? Does it reflect current code?
+3. Read `antipatterns.md` — are there known pitfalls not documented?
+4. Read `codebase-map.md` — does it match the actual file tree?
+5. Read `MEMORY.md` — is it under 200 lines? Are gotchas current?
+6. Check `git log --oneline -20` — have recent changes been reflected in memory?
+If knowledge is stale, consolidate learnings and refresh memory files.
+### Report:
+```
++------------------------------------------------------+
+|            Evolution Cycle #{n} Complete              |
++------------------------------------------------------+
+|                                                       |
+|  Environment Score: {before} -> {after} (+{delta})    |
+|                                                       |
+|  Improvements:                                        |
+|  - {list}                                             |
+|                                                       |
+|  Knowledge Health:                                    |
+|    patterns.md:      {count} patterns                 |
+|    antipatterns.md:   {count} antipatterns             |
+|    decisions.md:      {count} decisions                |
+|    learnings/:        {count} unconsolidated files     |
+|    MEMORY.md:         {lines}/200 lines               |
+|    codebase-map:      {current/stale}                 |
+|                                                       |
+|  Quality KPIs:                                        |
+|    Learning protocol: {X}/{Y} agents                  |
+|    Memory integration: {X}/{Y} commands               |
+|    Debates logged:     {count}                        |
+|    Experiments:        {adopted}/{total} adopted       |
+|                                                       |
+|  Topology Health:                                     |
+|    Pipelines:          {count} tracked                |
+|    Avg quality:        {score}/10                     |
+|    Optimizations:      {adopted}/{tested} adopted     |
+|    Redundant agents:   {count} flagged                |
+|                                                       |
+|  Trend: {improving/stable/declining}                  |
+|  (scores.json updated — {total} cycles tracked)       |
+|                                                       |
+|  Remaining gaps:                                      |
+|  - {list}                                             |
++------------------------------------------------------+
+```
+After displaying the report, update `.devteam/scores.json` with this cycle's data.