mindforge-cc 10.0.2 → 10.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mindforge/config.json +50 -2
- package/.mindforge/engine/autonomous/cross-iteration-bridge.md +96 -0
- package/.mindforge/engine/cost-tracking/budget-enforcer.md +68 -0
- package/.mindforge/engine/cost-tracking/router.md +58 -0
- package/.mindforge/engine/cost-tracking/token-ledger.md +77 -0
- package/.mindforge/engine/council/council-protocol.md +96 -0
- package/.mindforge/engine/council/council-templates.md +85 -0
- package/.mindforge/engine/council/synthesis-engine.md +71 -0
- package/.mindforge/engine/instincts/capture-engine.md +63 -0
- package/.mindforge/engine/instincts/instinct-schema.md +76 -0
- package/.mindforge/engine/instincts/promotion-engine.md +77 -0
- package/.mindforge/engine/skills/composition.md +83 -0
- package/.mindforge/engine/skills/loader.md +16 -0
- package/.mindforge/personas/cost-optimizer.md +71 -0
- package/.mindforge/personas/council-architect.md +66 -0
- package/.mindforge/personas/council-critic.md +67 -0
- package/.mindforge/personas/council-pragmatist.md +71 -0
- package/.mindforge/personas/council-skeptic.md +73 -0
- package/.mindforge/personas/doc-auditor.md +84 -0
- package/.mindforge/personas/instinct-curator.md +83 -0
- package/.mindforge/personas/multi-model-bridge.md +86 -0
- package/.mindforge/personas/swarm-templates.json +28 -1
- package/.mindforge/personas/threat-modeler.md +82 -0
- package/.mindforge/skills/agent-introspection-debugging/SKILL.md +88 -0
- package/.mindforge/skills/agent-loops/SKILL.md +84 -0
- package/.mindforge/skills/autonomous-loops/SKILL.md +105 -0
- package/.mindforge/skills/continuous-learning/SKILL.md +84 -0
- package/.mindforge/skills/cost-aware-routing/SKILL.md +83 -0
- package/.mindforge/skills/council/SKILL.md +68 -0
- package/.mindforge/skills/doc-health-audit/SKILL.md +102 -0
- package/.mindforge/skills/multi-llm-consult/SKILL.md +75 -0
- package/.mindforge/skills/threat-modeling/SKILL.md +109 -0
- package/.mindforge/skills/verification-loop/SKILL.md +85 -0
- package/CHANGELOG.md +19 -0
- package/MINDFORGE.md +4 -4
- package/README.md +2 -2
- package/RELEASENOTES.md +66 -0
- package/bin/installer-core.js +1 -1
- package/bin/wizard/theme.js +2 -2
- package/docs/commands-reference.md +18 -1
- package/package.json +1 -1
package/.mindforge/config.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "10.0.
|
|
2
|
+
"version": "10.0.3",
|
|
3
3
|
"environment": "development",
|
|
4
4
|
"governance": {
|
|
5
5
|
"drift_threshold": 0.75,
|
|
6
6
|
"critical_drift_threshold": 0.5,
|
|
7
7
|
"res_threshold": 0.8,
|
|
8
|
-
"active_did": "did:mindforge:
|
|
8
|
+
"active_did": "did:mindforge:8aed528b-5d41-4655-9724-19152c943644"
|
|
9
9
|
},
|
|
10
10
|
"revops": {
|
|
11
11
|
"market_registry": {
|
|
@@ -44,6 +44,12 @@
|
|
|
44
44
|
"cost_output": 0.0004,
|
|
45
45
|
"benchmark": 82,
|
|
46
46
|
"provider": "Anthropic"
|
|
47
|
+
},
|
|
48
|
+
"claude-opus-4-7": {
|
|
49
|
+
"cost_input": 0.015,
|
|
50
|
+
"cost_output": 0.075,
|
|
51
|
+
"benchmark": 100,
|
|
52
|
+
"provider": "Anthropic"
|
|
47
53
|
}
|
|
48
54
|
},
|
|
49
55
|
"default_baseline_model": "claude-sonnet-4-6",
|
|
@@ -62,5 +68,47 @@
|
|
|
62
68
|
"min_success_count": 3,
|
|
63
69
|
"max_drift_threshold": 0.1,
|
|
64
70
|
"auto_verify": false
|
|
71
|
+
},
|
|
72
|
+
"instincts": {
|
|
73
|
+
"mode": "auto-capture",
|
|
74
|
+
"max_active_per_project": 100,
|
|
75
|
+
"promotion_confidence_threshold": 0.85,
|
|
76
|
+
"promotion_min_applications": 5,
|
|
77
|
+
"prune_below_confidence": 0.2,
|
|
78
|
+
"prune_after_days_inactive": 30,
|
|
79
|
+
"max_capture_per_session": 5,
|
|
80
|
+
"store_path": ".mindforge/engine/instincts/instinct-store.jsonl"
|
|
81
|
+
},
|
|
82
|
+
"council": {
|
|
83
|
+
"voices": [
|
|
84
|
+
"architect",
|
|
85
|
+
"skeptic",
|
|
86
|
+
"pragmatist",
|
|
87
|
+
"critic"
|
|
88
|
+
],
|
|
89
|
+
"max_rounds": 2,
|
|
90
|
+
"consensus_threshold": 0.75,
|
|
91
|
+
"word_limit_per_voice": 200,
|
|
92
|
+
"output_path": ".planning/decisions/"
|
|
93
|
+
},
|
|
94
|
+
"cost_routing": {
|
|
95
|
+
"enabled": true,
|
|
96
|
+
"simple_threshold": 3,
|
|
97
|
+
"standard_threshold": 6,
|
|
98
|
+
"complex_threshold": 8,
|
|
99
|
+
"model_tiers": {
|
|
100
|
+
"simple": "claude-haiku-4-5",
|
|
101
|
+
"standard": "claude-sonnet-4-6",
|
|
102
|
+
"complex": "claude-opus-4-7",
|
|
103
|
+
"research": "gemini-2.5-pro",
|
|
104
|
+
"consult": "gpt-4o"
|
|
105
|
+
},
|
|
106
|
+
"budget": {
|
|
107
|
+
"session_warn_usd": 5,
|
|
108
|
+
"session_hard_limit_usd": 20,
|
|
109
|
+
"project_weekly_warn_usd": 50,
|
|
110
|
+
"project_weekly_hard_limit_usd": 200
|
|
111
|
+
},
|
|
112
|
+
"ledger_path": ".mindforge/metrics/token-ledger.jsonl"
|
|
65
113
|
}
|
|
66
114
|
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Autonomous Engine — Cross-Iteration Bridge
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Provides semantic context bridging across autonomous mode iterations via
|
|
5
|
+
a shared notes file. Complements the structural `HANDOFF.json` (which tracks
|
|
6
|
+
task state) with reasoning context (WHY decisions were made).
|
|
7
|
+
|
|
8
|
+
## The Problem
|
|
9
|
+
In autonomous mode, each task gets a fresh context window. While HANDOFF.json
|
|
10
|
+
preserves structural state (what tasks remain, what was completed), it loses:
|
|
11
|
+
- Why a particular approach was chosen over alternatives
|
|
12
|
+
- Observations that inform subsequent tasks
|
|
13
|
+
- Warnings discovered during earlier tasks
|
|
14
|
+
- Cross-task patterns that only emerge over multiple iterations
|
|
15
|
+
|
|
16
|
+
## Solution: SHARED_TASK_NOTES.md
|
|
17
|
+
|
|
18
|
+
### Location
|
|
19
|
+
`.planning/SHARED_TASK_NOTES.md` — single file, managed by the autonomous engine.
|
|
20
|
+
|
|
21
|
+
### Format
|
|
22
|
+
```markdown
|
|
23
|
+
# Shared Task Notes (Auto-Managed)
|
|
24
|
+
<!-- Entries are append-only during autonomous execution. Pruned after phase completion. -->
|
|
25
|
+
|
|
26
|
+
## [2026-05-25T10:30:00Z] Task: implement-auth-middleware
|
|
27
|
+
**Observation:** The existing session store uses Redis, not in-memory. All new auth code must use the Redis client at `src/lib/redis.ts`.
|
|
28
|
+
**Decision:** Chose JWT + Redis session store over pure stateless JWT because existing code already depends on session lookups.
|
|
29
|
+
**Warning:** The Redis connection pool is limited to 10 — avoid opening new connections per request.
|
|
30
|
+
|
|
31
|
+
## [2026-05-25T10:45:00Z] Task: add-rate-limiting
|
|
32
|
+
**Observation:** Rate limiting should use the same Redis instance (discovered in previous task).
|
|
33
|
+
**Decision:** Implemented sliding window counter in Redis rather than in-memory Map.
|
|
34
|
+
**Cross-ref:** Auth middleware (previous task) sets `req.userId` which rate limiter needs.
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Entry Schema
|
|
38
|
+
Each entry contains:
|
|
39
|
+
- **Timestamp** — When the note was written
|
|
40
|
+
- **Task** — Which task produced this note
|
|
41
|
+
- **Observation** — Facts discovered (not opinions)
|
|
42
|
+
- **Decision** — Choices made and WHY (the reasoning)
|
|
43
|
+
- **Warning** (optional) — Hazards for future tasks
|
|
44
|
+
- **Cross-ref** (optional) — Dependencies on other tasks
|
|
45
|
+
|
|
46
|
+
## Lifecycle Rules
|
|
47
|
+
|
|
48
|
+
### Writing
|
|
49
|
+
- Auto-executor appends an entry after EACH task completion
|
|
50
|
+
- Only write if the task produced non-obvious insights
|
|
51
|
+
- Keep entries under 100 words each (concise, not verbose)
|
|
52
|
+
- Never write secrets, credentials, or full code blocks
|
|
53
|
+
|
|
54
|
+
### Reading
|
|
55
|
+
- Auto-executor reads SHARED_TASK_NOTES.md at the START of each new task
|
|
56
|
+
- Only the last 20 entries are loaded (FIFO window)
|
|
57
|
+
- Notes inform task execution but do NOT override task specifications
|
|
58
|
+
|
|
59
|
+
### Pruning
|
|
60
|
+
- After a phase completes: review all notes
|
|
61
|
+
- Notes with lasting value → migrate to knowledge base (`.mindforge/memory/`)
|
|
62
|
+
- Remaining notes → archive to `.planning/history/shared-notes-[phase]-[date].md`
|
|
63
|
+
- Clear SHARED_TASK_NOTES.md for next phase
|
|
64
|
+
|
|
65
|
+
### Size Limits
|
|
66
|
+
- Maximum entries: 50
|
|
67
|
+
- Maximum total size: 10K tokens
|
|
68
|
+
- When limit is reached: evict oldest entries (FIFO)
|
|
69
|
+
- Evicted entries are NOT archived (they served their purpose)
|
|
70
|
+
|
|
71
|
+
## Integration with HANDOFF.json
|
|
72
|
+
|
|
73
|
+
HANDOFF.json tracks WHAT (task state, completion, queue).
|
|
74
|
+
SHARED_TASK_NOTES tracks WHY (reasoning, context, warnings).
|
|
75
|
+
|
|
76
|
+
Both are read at task start:
|
|
77
|
+
1. Read HANDOFF.json → know what to do next
|
|
78
|
+
2. Read SHARED_TASK_NOTES.md → know what context to carry forward
|
|
79
|
+
3. Execute task with both structural and semantic context
|
|
80
|
+
4. Write HANDOFF.json → update task state
|
|
81
|
+
5. Write SHARED_TASK_NOTES.md → preserve reasoning for next iteration
|
|
82
|
+
|
|
83
|
+
## When NOT to Write Notes
|
|
84
|
+
|
|
85
|
+
- Trivial tasks (single-line changes, formatting)
|
|
86
|
+
- Tasks with no cross-task implications
|
|
87
|
+
- Duplicate information already in HANDOFF.json
|
|
88
|
+
- Information derivable from git history
|
|
89
|
+
|
|
90
|
+
## Failure Recovery
|
|
91
|
+
|
|
92
|
+
If SHARED_TASK_NOTES.md becomes corrupted or too large:
|
|
93
|
+
1. Archive current file as-is
|
|
94
|
+
2. Create fresh empty file
|
|
95
|
+
3. Log incident in AUDIT
|
|
96
|
+
4. Continue — notes are advisory, not required for execution
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Cost Tracking — Budget Enforcer
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Monitor and enforce token budgets across sessions, preventing runaway costs
|
|
5
|
+
while maintaining quality standards.
|
|
6
|
+
|
|
7
|
+
## Budget Hierarchy
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
Organization budget (monthly)
|
|
11
|
+
└── Project budget (weekly)
|
|
12
|
+
└── Session budget (per-session)
|
|
13
|
+
└── Task budget (per-task)
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Default Budgets (configurable in config.json)
|
|
17
|
+
|
|
18
|
+
| Scope | Default | Hard Limit |
|
|
19
|
+
|-------|---------|-----------|
|
|
20
|
+
| Session | $5.00 | $20.00 |
|
|
21
|
+
| Task | $1.00 | $5.00 |
|
|
22
|
+
| Project (weekly) | $50.00 | $200.00 |
|
|
23
|
+
|
|
24
|
+
## Enforcement Rules
|
|
25
|
+
|
|
26
|
+
### Pre-Task Check
|
|
27
|
+
Before each task starts:
|
|
28
|
+
1. Estimate tokens required (based on task type + file count)
|
|
29
|
+
2. Compare estimate against remaining budget at all levels
|
|
30
|
+
3. If estimate > remaining at ANY level: warn user with breakdown
|
|
31
|
+
4. If hard limit reached at ANY level: block execution, require override
|
|
32
|
+
|
|
33
|
+
### During-Task Monitoring
|
|
34
|
+
- Track actual tokens consumed per model call
|
|
35
|
+
- Running total visible via `/mindforge:cost-report`
|
|
36
|
+
- If task exceeds its estimate by 2x: pause and report
|
|
37
|
+
|
|
38
|
+
### Post-Task Accounting
|
|
39
|
+
After each task completes:
|
|
40
|
+
1. Record actual tokens: input, output, cached, by model
|
|
41
|
+
2. Update all budget levels (session, project)
|
|
42
|
+
3. Compare actual vs estimated (for future estimation accuracy)
|
|
43
|
+
|
|
44
|
+
## Token Counting
|
|
45
|
+
|
|
46
|
+
For each model interaction, record:
|
|
47
|
+
```json
|
|
48
|
+
{
|
|
49
|
+
"timestamp": "ISO-8601",
|
|
50
|
+
"task_id": "task-uuid",
|
|
51
|
+
"model": "claude-sonnet-4-6",
|
|
52
|
+
"tokens_input": 12500,
|
|
53
|
+
"tokens_output": 3200,
|
|
54
|
+
"tokens_cached": 8000,
|
|
55
|
+
"estimated_cost_usd": 0.085,
|
|
56
|
+
"routing_tier": "standard",
|
|
57
|
+
"routing_reason": "difficulty_score_5"
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Optimization Recommendations
|
|
62
|
+
|
|
63
|
+
The enforcer generates recommendations when patterns indicate waste:
|
|
64
|
+
- "Task X used opus but produced a simple edit → suggest standard tier"
|
|
65
|
+
- "80% of tokens were re-sent context → suggest compaction"
|
|
66
|
+
- "3 consecutive tasks hit the same files → suggest batching"
|
|
67
|
+
|
|
68
|
+
These appear in `/mindforge:cost-report` output.
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Cost Tracking — Model Router
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Select the optimal model for each task based on complexity scoring, token budget,
|
|
5
|
+
and cost-performance tradeoffs. Routes tasks to the cheapest model that can handle them.
|
|
6
|
+
|
|
7
|
+
## Model Tiers
|
|
8
|
+
|
|
9
|
+
| Tier | Model | Cost/1M tokens (est.) | Use When |
|
|
10
|
+
|------|-------|----------------------|----------|
|
|
11
|
+
| simple | claude-haiku-4-5 | $0.25/$1.25 | Single-file edits, formatting, simple lookups, file reads |
|
|
12
|
+
| standard | claude-sonnet-4-6 | $3/$15 | Multi-file implementation, code review, 90% of daily tasks |
|
|
13
|
+
| complex | claude-opus-4-7 | $15/$75 | Architecture decisions, security audits, debugging hard problems |
|
|
14
|
+
| research | gemini-2.5-pro | ~$1.25/$10 | External research, web search synthesis, long-context analysis |
|
|
15
|
+
| consult | gpt-4o | ~$2.50/$10 | Second opinions, alternative perspectives, validation |
|
|
16
|
+
|
|
17
|
+
## Routing Decision Matrix
|
|
18
|
+
|
|
19
|
+
The router uses the difficulty score from `.mindforge/intelligence/difficulty-scorer.md`:
|
|
20
|
+
|
|
21
|
+
| Difficulty Score | Files Touched | Recommended Tier |
|
|
22
|
+
|-----------------|---------------|-----------------|
|
|
23
|
+
| 1-3 | 1 file | simple |
|
|
24
|
+
| 3-6 | 1-3 files | standard |
|
|
25
|
+
| 6-8 | 3-7 files | complex |
|
|
26
|
+
| 8-10 | 7+ files OR architectural | complex |
|
|
27
|
+
| Any (research needed) | N/A | research |
|
|
28
|
+
|
|
29
|
+
## Override Rules
|
|
30
|
+
|
|
31
|
+
These always override the matrix:
|
|
32
|
+
- Security-sensitive tasks (auth/payment/PII): minimum **standard** tier
|
|
33
|
+
- Architectural decisions (ADRs, new systems): minimum **complex** tier
|
|
34
|
+
- Simple file reads/exploration: always **simple** tier regardless of score
|
|
35
|
+
- Multi-LLM consult requests: use **consult** + **research** tiers
|
|
36
|
+
|
|
37
|
+
## Routing Protocol
|
|
38
|
+
|
|
39
|
+
1. Receive task description and file list
|
|
40
|
+
2. Run difficulty scorer → get score 1-10
|
|
41
|
+
3. Map score to tier via decision matrix
|
|
42
|
+
4. Apply override rules
|
|
43
|
+
5. Check budget: if remaining budget < estimated cost, downgrade one tier with WARNING
|
|
44
|
+
6. Log routing decision to token-ledger.jsonl
|
|
45
|
+
7. Return selected model ID
|
|
46
|
+
|
|
47
|
+
## Budget Guard
|
|
48
|
+
|
|
49
|
+
If the session's cumulative cost exceeds thresholds from MINDFORGE.md:
|
|
50
|
+
- `[COST_WARN_USD]` → Log warning, continue
|
|
51
|
+
- `[COST_HARD_LIMIT_USD]` → HALT, require user confirmation to continue
|
|
52
|
+
|
|
53
|
+
## Downgrade Protocol
|
|
54
|
+
|
|
55
|
+
When budget is tight but task is critical:
|
|
56
|
+
1. Attempt at lower tier first (standard instead of complex)
|
|
57
|
+
2. If result quality is insufficient (verification fails): escalate to correct tier
|
|
58
|
+
3. Log the escalation with reason in AUDIT
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Cost Tracking — Token Ledger Specification
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Append-only ledger recording all token usage for analytics, budgeting, and optimization.
|
|
5
|
+
|
|
6
|
+
## Storage
|
|
7
|
+
|
|
8
|
+
- Location: `.mindforge/metrics/token-ledger.jsonl`
|
|
9
|
+
- Format: JSON Lines (one entry per model interaction)
|
|
10
|
+
- Rotation: Archive entries older than 30 days to `.mindforge/metrics/archive/`
|
|
11
|
+
- Retention: Archives kept for 90 days, then deleted
|
|
12
|
+
|
|
13
|
+
## Entry Format
|
|
14
|
+
|
|
15
|
+
Each line in the ledger is a complete JSON object:
|
|
16
|
+
|
|
17
|
+
```json
|
|
18
|
+
{
|
|
19
|
+
"id": "txn-[uuid]",
|
|
20
|
+
"timestamp": "2026-05-25T10:30:00Z",
|
|
21
|
+
"session_id": "session-abc123",
|
|
22
|
+
"task_id": "task-def456",
|
|
23
|
+
"phase": "execute",
|
|
24
|
+
"model": "claude-sonnet-4-6",
|
|
25
|
+
"tier": "standard",
|
|
26
|
+
"routing_reason": "difficulty_score_5_multi_file",
|
|
27
|
+
"tokens": {
|
|
28
|
+
"input": 12500,
|
|
29
|
+
"output": 3200,
|
|
30
|
+
"cached_input": 8000,
|
|
31
|
+
"total": 15700
|
|
32
|
+
},
|
|
33
|
+
"cost_usd": 0.085,
|
|
34
|
+
"budget_remaining": {
|
|
35
|
+
"session": 4.915,
|
|
36
|
+
"project_weekly": 49.915
|
|
37
|
+
},
|
|
38
|
+
"task_type": "implementation",
|
|
39
|
+
"files_touched": 3,
|
|
40
|
+
"skills_loaded": ["code-quality", "testing-standards"],
|
|
41
|
+
"outcome": "success"
|
|
42
|
+
}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Reporting Queries
|
|
46
|
+
|
|
47
|
+
The `/mindforge:cost-report` command reads this ledger to produce:
|
|
48
|
+
|
|
49
|
+
### By Model
|
|
50
|
+
```
|
|
51
|
+
Model | Calls | Tokens | Cost | % of Total
|
|
52
|
+
----------------|-------|-----------|---------|----------
|
|
53
|
+
claude-sonnet | 45 | 580,000 | $4.50 | 62%
|
|
54
|
+
claude-opus | 8 | 210,000 | $8.25 | 28%
|
|
55
|
+
claude-haiku | 120 | 350,000 | $0.75 | 10%
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### By Task Type
|
|
59
|
+
```
|
|
60
|
+
Type | Avg Cost | Avg Tokens | Count
|
|
61
|
+
----------------|----------|------------|------
|
|
62
|
+
implementation | $0.42 | 18,500 | 23
|
|
63
|
+
code-review | $0.15 | 8,200 | 15
|
|
64
|
+
debugging | $0.85 | 32,000 | 8
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Efficiency Metrics
|
|
68
|
+
- Cache hit rate: % of input tokens served from cache
|
|
69
|
+
- Routing accuracy: % of tasks where tier matched actual complexity
|
|
70
|
+
- Over-spend rate: % of tasks that exceeded their estimated budget
|
|
71
|
+
|
|
72
|
+
## Integration
|
|
73
|
+
|
|
74
|
+
- Written to by the budget-enforcer after every model interaction
|
|
75
|
+
- Read by `/mindforge:cost-report` command
|
|
76
|
+
- Summarized weekly into `.mindforge/metrics/weekly-cost-summary.json`
|
|
77
|
+
- Referenced by AgRevOps dashboard for ROI tracking
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Council Framework — Decision Protocol
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
A structured multi-voice decision harness for resolving ambiguous architectural
|
|
5
|
+
decisions. Four specialist voices debate from different perspectives, producing
|
|
6
|
+
a verdict with confidence scoring and documented dissent.
|
|
7
|
+
|
|
8
|
+
## When to Invoke Council
|
|
9
|
+
|
|
10
|
+
Council is appropriate when:
|
|
11
|
+
- Multiple valid approaches exist with non-obvious tradeoffs
|
|
12
|
+
- The decision affects system architecture (new systems, breaking changes)
|
|
13
|
+
- Team members (or prior sessions) have expressed conflicting preferences
|
|
14
|
+
- The Adversarial Decision Loop (ADS) produces a split verdict
|
|
15
|
+
- Tier 3 security escalations require multi-perspective review
|
|
16
|
+
|
|
17
|
+
Council is NOT appropriate for:
|
|
18
|
+
- Simple implementation choices with one obvious answer
|
|
19
|
+
- Bug fixes with clear root causes
|
|
20
|
+
- Tasks where the user has already made the decision
|
|
21
|
+
|
|
22
|
+
## The Four Voices
|
|
23
|
+
|
|
24
|
+
| Voice | Persona | Perspective | Bias (intentional) |
|
|
25
|
+
|-------|---------|-------------|-------------------|
|
|
26
|
+
| Architect | council-architect | System design, scalability, long-term | Favors elegant, extensible solutions |
|
|
27
|
+
| Skeptic | council-skeptic | Adversarial, edge cases, failure modes | Favors caution, surfaces hidden risks |
|
|
28
|
+
| Pragmatist | council-pragmatist | Delivery, time-to-value, incremental | Favors shipping, "good enough" |
|
|
29
|
+
| Critic | council-critic | Quality, craftsmanship, standards | Favors excellence, refuses shortcuts |
|
|
30
|
+
|
|
31
|
+
## Protocol Steps
|
|
32
|
+
|
|
33
|
+
### Step 1 — Frame the Decision
|
|
34
|
+
Present the decision to all four voices with:
|
|
35
|
+
- Context: what prompted this decision
|
|
36
|
+
- Options: the 2-4 approaches being considered
|
|
37
|
+
- Constraints: deadlines, budget, team capacity, existing tech debt
|
|
38
|
+
- Stakes: what happens if we get this wrong
|
|
39
|
+
|
|
40
|
+
### Step 2 — Individual Positions (parallel)
|
|
41
|
+
Each voice independently analyzes and states:
|
|
42
|
+
- Their recommended option
|
|
43
|
+
- Their top 3 reasons (from their perspective)
|
|
44
|
+
- Their biggest concern with the other options
|
|
45
|
+
- Confidence in their position (0.0-1.0)
|
|
46
|
+
|
|
47
|
+
### Step 3 — Challenge Round
|
|
48
|
+
Each voice responds to the strongest counterargument against their position:
|
|
49
|
+
- Acknowledge valid concerns
|
|
50
|
+
- Rebut where possible
|
|
51
|
+
- Adjust confidence if swayed
|
|
52
|
+
|
|
53
|
+
### Step 4 — Synthesis
|
|
54
|
+
The synthesis engine (see `synthesis-engine.md`) produces:
|
|
55
|
+
- Final verdict (the recommended option)
|
|
56
|
+
- Consensus score (0.0-1.0)
|
|
57
|
+
- Key factors that decided it
|
|
58
|
+
- Documented dissent (any voice that disagrees with confidence > 0.6)
|
|
59
|
+
- Risk register (concerns raised by Skeptic that remain unmitigated)
|
|
60
|
+
|
|
61
|
+
### Step 5 — Output
|
|
62
|
+
Write to `.planning/decisions/COUNCIL-[timestamp].md`:
|
|
63
|
+
```markdown
|
|
64
|
+
# Council Decision: [Title]
|
|
65
|
+
Date: [timestamp]
|
|
66
|
+
Consensus: [score]
|
|
67
|
+
|
|
68
|
+
## Verdict
|
|
69
|
+
[The recommended approach in 2-3 sentences]
|
|
70
|
+
|
|
71
|
+
## Positions
|
|
72
|
+
| Voice | Recommendation | Confidence | Aligned with Verdict? |
|
|
73
|
+
...
|
|
74
|
+
|
|
75
|
+
## Key Deciding Factors
|
|
76
|
+
1. ...
|
|
77
|
+
2. ...
|
|
78
|
+
3. ...
|
|
79
|
+
|
|
80
|
+
## Dissent
|
|
81
|
+
[Any voice that disagreed, with their reasoning]
|
|
82
|
+
|
|
83
|
+
## Risk Register
|
|
84
|
+
[Unmitigated concerns from the Skeptic]
|
|
85
|
+
|
|
86
|
+
## Action Items
|
|
87
|
+
[Concrete next steps based on the verdict]
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Guardrails
|
|
91
|
+
|
|
92
|
+
- Council is advisory — the user ALWAYS has final say (User Sovereignty principle)
|
|
93
|
+
- If consensus < 0.5: report "No consensus reached" and present all positions equally
|
|
94
|
+
- Council never auto-executes decisions; it only recommends
|
|
95
|
+
- Maximum 2 rounds (initial + challenge); no infinite debates
|
|
96
|
+
- Each voice is limited to 200 words per round
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Council Framework — Reusable Templates
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Pre-configured council invocations for common decision types. Each template
|
|
5
|
+
adjusts the framing and weight of voices for the specific domain.
|
|
6
|
+
|
|
7
|
+
## Template: Architecture Decision
|
|
8
|
+
|
|
9
|
+
**Trigger:** New system design, major refactor, technology choice
|
|
10
|
+
**Voice weights:** Architect (1.2x), Skeptic (1.0x), Pragmatist (0.9x), Critic (0.9x)
|
|
11
|
+
|
|
12
|
+
**Framing prompt:**
|
|
13
|
+
> We need to decide on [DECISION]. This affects the system's [scalability/maintainability/performance].
|
|
14
|
+
> Current constraints: [CONSTRAINTS].
|
|
15
|
+
> Options under consideration: [OPTIONS].
|
|
16
|
+
> The decision is hard to reverse because: [IRREVERSIBILITY FACTORS].
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Template: Breaking Change
|
|
21
|
+
|
|
22
|
+
**Trigger:** API change, schema migration, dependency upgrade
|
|
23
|
+
**Voice weights:** Skeptic (1.3x), Pragmatist (1.1x), Architect (0.8x), Critic (0.8x)
|
|
24
|
+
|
|
25
|
+
**Framing prompt:**
|
|
26
|
+
> We're considering a breaking change: [CHANGE].
|
|
27
|
+
> Affected consumers: [WHO].
|
|
28
|
+
> Migration path: [HOW].
|
|
29
|
+
> Timeline pressure: [WHEN].
|
|
30
|
+
> What could go wrong and is it worth the risk?
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Template: Security Escalation
|
|
35
|
+
|
|
36
|
+
**Trigger:** Tier 3 security finding, compliance concern
|
|
37
|
+
**Voice weights:** Skeptic (1.4x), Critic (1.2x), Architect (0.8x), Pragmatist (0.6x)
|
|
38
|
+
|
|
39
|
+
**Framing prompt:**
|
|
40
|
+
> Security concern identified: [FINDING].
|
|
41
|
+
> Severity: [LEVEL]. Exploitability: [EASE].
|
|
42
|
+
> Current mitigation: [IF ANY].
|
|
43
|
+
> Options: [FIX OPTIONS WITH EFFORT ESTIMATES].
|
|
44
|
+
> User data at risk: [YES/NO + SCOPE].
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Template: Ship-or-Wait
|
|
49
|
+
|
|
50
|
+
**Trigger:** Feature complete but uncertain quality, deadline pressure
|
|
51
|
+
**Voice weights:** Pragmatist (1.3x), Critic (1.2x), Skeptic (1.0x), Architect (0.5x)
|
|
52
|
+
|
|
53
|
+
**Framing prompt:**
|
|
54
|
+
> Feature [NAME] is at [COMPLETION%]. Deadline is [DATE].
|
|
55
|
+
> Known issues: [ISSUES].
|
|
56
|
+
> Test coverage: [COVERAGE%].
|
|
57
|
+
> User impact of delay: [IMPACT].
|
|
58
|
+
> Should we ship now with known issues, or delay for quality?
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Template: Build-vs-Buy
|
|
63
|
+
|
|
64
|
+
**Trigger:** Evaluating third-party dependencies, SaaS vs self-hosted
|
|
65
|
+
**Voice weights:** Pragmatist (1.2x), Architect (1.1x), Skeptic (1.0x), Critic (0.7x)
|
|
66
|
+
|
|
67
|
+
**Framing prompt:**
|
|
68
|
+
> We need [CAPABILITY]. Options:
|
|
69
|
+
> Build: [EFFORT ESTIMATE, MAINTENANCE COST]
|
|
70
|
+
> Buy: [COST, VENDOR LOCK-IN RISK, FEATURE GAPS]
|
|
71
|
+
> Current team capacity: [AVAILABLE BANDWIDTH]
|
|
72
|
+
> Strategic importance of owning this: [LOW/MEDIUM/HIGH]
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Using Templates
|
|
77
|
+
|
|
78
|
+
Templates are invoked by the `/mindforge:council` command:
|
|
79
|
+
```
|
|
80
|
+
/mindforge:council --template architecture "Should we use PostgreSQL or DynamoDB?"
|
|
81
|
+
/mindforge:council --template breaking-change "Removing the v1 API"
|
|
82
|
+
/mindforge:council --template security "SQL injection in user search"
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
If no template is specified, the command uses the generic protocol from `council-protocol.md`.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Council Framework — Synthesis Engine
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Merge four independent council voices into a single coherent verdict with
|
|
5
|
+
confidence scoring, documented dissent, and actionable output.
|
|
6
|
+
|
|
7
|
+
## Synthesis Algorithm
|
|
8
|
+
|
|
9
|
+
### Input
|
|
10
|
+
Four voice outputs, each containing:
|
|
11
|
+
- recommended_option: string
|
|
12
|
+
- reasons: string[3]
|
|
13
|
+
- concerns_with_others: string[]
|
|
14
|
+
- confidence: float (0.0-1.0)
|
|
15
|
+
- post_challenge_confidence: float (after challenge round)
|
|
16
|
+
|
|
17
|
+
### Step 1 — Vote Counting
|
|
18
|
+
```
|
|
19
|
+
For each option:
|
|
20
|
+
weighted_votes = sum(voice.confidence for voice if voice.recommended_option == option)
|
|
21
|
+
|
|
22
|
+
winner = option with highest weighted_votes
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Step 2 — Consensus Scoring
|
|
26
|
+
```
|
|
27
|
+
consensus = weighted_votes_for_winner / sum(all_confidences)
|
|
28
|
+
|
|
29
|
+
Interpretation:
|
|
30
|
+
>= 0.85: Strong consensus (proceed with confidence)
|
|
31
|
+
0.65-0.84: Moderate consensus (proceed but monitor risks)
|
|
32
|
+
0.50-0.64: Weak consensus (consider user input)
|
|
33
|
+
< 0.50: No consensus (present all options, defer to user)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Step 3 — Dissent Identification
|
|
37
|
+
A voice is marked as dissenting if:
|
|
38
|
+
- They did NOT vote for the winning option AND
|
|
39
|
+
- Their post-challenge confidence remains > 0.6
|
|
40
|
+
|
|
41
|
+
Dissenting voices have their full reasoning preserved in the output.
|
|
42
|
+
|
|
43
|
+
### Step 4 — Risk Extraction
|
|
44
|
+
From the Skeptic voice (regardless of their vote):
|
|
45
|
+
- Extract all concerns rated "high" or "critical"
|
|
46
|
+
- These become the Risk Register
|
|
47
|
+
- Each risk must have a proposed mitigation or be flagged as unmitigated
|
|
48
|
+
|
|
49
|
+
### Step 5 — Factor Identification
|
|
50
|
+
Identify the top 3 deciding factors by:
|
|
51
|
+
1. Finding reasons that appear in 2+ voices' reasoning
|
|
52
|
+
2. Finding reasons from the winning voices with highest confidence
|
|
53
|
+
3. Prioritizing reasons that address the Skeptic's concerns
|
|
54
|
+
|
|
55
|
+
## Edge Cases
|
|
56
|
+
|
|
57
|
+
### Tie (2v2 split)
|
|
58
|
+
- If exactly 2 voices choose each option: report "Split verdict"
|
|
59
|
+
- Present both options with their respective advocates
|
|
60
|
+
- Recommend the option favored by Architect (as tiebreaker for technical decisions)
|
|
61
|
+
- Flag for user decision
|
|
62
|
+
|
|
63
|
+
### Single Dissenter with Very High Confidence
|
|
64
|
+
- If one voice has confidence > 0.9 while disagreeing with the majority:
|
|
65
|
+
- Include a "Strong Dissent Warning" in the output
|
|
66
|
+
- The dissenter's concerns get elevated visibility
|
|
67
|
+
|
|
68
|
+
### All Agree (4-0)
|
|
69
|
+
- Consensus = 1.0
|
|
70
|
+
- Still include the Skeptic's concerns (they may have low-confidence risks)
|
|
71
|
+
- Fast-path: skip challenge round if all initial confidences > 0.8
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Instinct Engine — Auto-Capture Protocol
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Defines how the instinct engine observes sessions and automatically creates
|
|
5
|
+
instinct entries. This runs in AUTO-CAPTURE mode (always observing).
|
|
6
|
+
|
|
7
|
+
## Observation Triggers
|
|
8
|
+
|
|
9
|
+
The capture engine watches for these signals during every session:
|
|
10
|
+
|
|
11
|
+
### 1. User Corrections
|
|
12
|
+
When the user corrects agent behavior ("no, don't do that", "always do X instead"):
|
|
13
|
+
- Extract the correction as a behavior rule
|
|
14
|
+
- Create instinct with initial confidence 0.6 (user-stated is higher than observed)
|
|
15
|
+
|
|
16
|
+
### 2. Repeated Patterns (3+ occurrences)
|
|
17
|
+
When the agent performs the same action pattern 3+ times in a session:
|
|
18
|
+
- Extract the pattern as a potential instinct
|
|
19
|
+
- Create with initial confidence 0.4 (observed but not confirmed)
|
|
20
|
+
|
|
21
|
+
### 3. Successful Outcomes After Specific Actions
|
|
22
|
+
When a verify/test pass follows a specific non-obvious action:
|
|
23
|
+
- Extract the action-outcome pair
|
|
24
|
+
- Create with initial confidence 0.5
|
|
25
|
+
|
|
26
|
+
### 4. Manual Capture
|
|
27
|
+
When user invokes `/mindforge:learn-instinct`:
|
|
28
|
+
- User provides observation + behavior directly
|
|
29
|
+
- Create with user-specified confidence (default 0.7)
|
|
30
|
+
|
|
31
|
+
## Deduplication
|
|
32
|
+
|
|
33
|
+
Before creating a new instinct:
|
|
34
|
+
1. Compare `observation` field against all active instincts (same project)
|
|
35
|
+
2. Similarity threshold: >80% word overlap → treat as duplicate
|
|
36
|
+
3. If duplicate found: increment `times_applied` on existing instinct instead
|
|
37
|
+
4. If near-duplicate (60-80% overlap): create new but link via tags
|
|
38
|
+
|
|
39
|
+
## Auto-Capture Rate Limits
|
|
40
|
+
|
|
41
|
+
To prevent noise:
|
|
42
|
+
- Maximum 5 new instincts per session
|
|
43
|
+
- Maximum 100 active instincts per project
|
|
44
|
+
- If at 100: prune lowest-confidence instinct before adding new one
|
|
45
|
+
- Never auto-capture from autonomous mode sessions (too noisy)
|
|
46
|
+
|
|
47
|
+
## Session-End Summary
|
|
48
|
+
|
|
49
|
+
At the end of each session where instincts were captured:
|
|
50
|
+
```
|
|
51
|
+
📝 Instincts captured this session:
|
|
52
|
+
- [NEW] "observation text" (confidence: 0.5)
|
|
53
|
+
- [REINFORCED] "existing instinct" (confidence: 0.5 → 0.6)
|
|
54
|
+
|
|
55
|
+
Active instincts: 47/100 | Ready for promotion: 3
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Integration Points
|
|
59
|
+
|
|
60
|
+
- Hooks into existing memory capture at `.mindforge/memory/engine/capture-protocol.md`
|
|
61
|
+
- Instinct observations flow through the same memory pipeline
|
|
62
|
+
- Instincts are SEPARATE from memories (memories are facts, instincts are behaviors)
|
|
63
|
+
- Both share the project-scoping mechanism
|