mindforge-cc 10.0.2 → 10.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mindforge/config.json +73 -2
- package/.mindforge/engine/autonomous/cross-iteration-bridge.md +96 -0
- package/.mindforge/engine/cost-tracking/budget-enforcer.md +68 -0
- package/.mindforge/engine/cost-tracking/router.md +58 -0
- package/.mindforge/engine/cost-tracking/token-ledger.md +77 -0
- package/.mindforge/engine/council/council-protocol.md +96 -0
- package/.mindforge/engine/council/council-templates.md +85 -0
- package/.mindforge/engine/council/synthesis-engine.md +71 -0
- package/.mindforge/engine/cross-model-eval.md +74 -0
- package/.mindforge/engine/instincts/capture-engine.md +63 -0
- package/.mindforge/engine/instincts/instinct-schema.md +76 -0
- package/.mindforge/engine/instincts/promotion-engine.md +77 -0
- package/.mindforge/engine/proactive/signal-detector.md +60 -0
- package/.mindforge/engine/proactive/suggestion-engine.md +100 -0
- package/.mindforge/engine/skills/composition.md +83 -0
- package/.mindforge/engine/skills/loader.md +16 -0
- package/.mindforge/personas/agent-architect.md +57 -0
- package/.mindforge/personas/agent-evaluator.md +162 -0
- package/.mindforge/personas/agent-memory-designer.md +157 -0
- package/.mindforge/personas/agent-ops-engineer.md +120 -0
- package/.mindforge/personas/agent-orchestrator.md +112 -0
- package/.mindforge/personas/ai-economist.md +57 -0
- package/.mindforge/personas/ai-safety-engineer.md +57 -0
- package/.mindforge/personas/analytics-engineer.md +57 -0
- package/.mindforge/personas/anti-pattern-hunter.md +61 -0
- package/.mindforge/personas/api-gateway-designer.md +132 -0
- package/.mindforge/personas/auth-engineer.md +112 -0
- package/.mindforge/personas/build-engineer.md +57 -0
- package/.mindforge/personas/business-analyst.md +56 -0
- package/.mindforge/personas/cache-architect.md +100 -0
- package/.mindforge/personas/causal-scientist.md +57 -0
- package/.mindforge/personas/cdn-architect.md +118 -0
- package/.mindforge/personas/change-agent.md +104 -0
- package/.mindforge/personas/code-narrator.md +52 -0
- package/.mindforge/personas/codegen-specialist.md +68 -0
- package/.mindforge/personas/communication-architect.md +102 -0
- package/.mindforge/personas/compliance-engineer.md +96 -0
- package/.mindforge/personas/consensus-engineer.md +116 -0
- package/.mindforge/personas/contract-tester.md +60 -192
- package/.mindforge/personas/cost-optimizer.md +71 -0
- package/.mindforge/personas/council-architect.md +66 -0
- package/.mindforge/personas/council-critic.md +67 -0
- package/.mindforge/personas/council-pragmatist.md +71 -0
- package/.mindforge/personas/council-skeptic.md +73 -0
- package/.mindforge/personas/data-architect.md +108 -0
- package/.mindforge/personas/data-mesh-architect.md +57 -0
- package/.mindforge/personas/data-pipeline-architect.md +120 -0
- package/.mindforge/personas/de-sloppifier.md +60 -0
- package/.mindforge/personas/debt-manager.md +66 -0
- package/.mindforge/personas/decision-architect.md +82 -51
- package/.mindforge/personas/deployment-captain.md +74 -0
- package/.mindforge/personas/design-system-lead.md +112 -0
- package/.mindforge/personas/dmux-orchestrator.md +75 -0
- package/.mindforge/personas/doc-auditor.md +84 -0
- package/.mindforge/personas/dx-engineer.md +96 -0
- package/.mindforge/personas/ecommerce-engineer.md +57 -0
- package/.mindforge/personas/edge-engineer.md +94 -0
- package/.mindforge/personas/edtech-architect.md +106 -0
- package/.mindforge/personas/embedding-architect.md +57 -0
- package/.mindforge/personas/environment-engineer.md +57 -0
- package/.mindforge/personas/eval-judge.md +55 -0
- package/.mindforge/personas/event-architect.md +102 -0
- package/.mindforge/personas/experiment-designer.md +138 -0
- package/.mindforge/personas/feature-store-engineer.md +57 -0
- package/.mindforge/personas/finops-analyst.md +66 -0
- package/.mindforge/personas/fintech-architect.md +57 -0
- package/.mindforge/personas/flutter-engineer.md +104 -0
- package/.mindforge/personas/gaming-engineer.md +57 -0
- package/.mindforge/personas/graphql-designer.md +73 -0
- package/.mindforge/personas/healthcare-engineer.md +57 -0
- package/.mindforge/personas/hiring-strategist.md +105 -0
- package/.mindforge/personas/hitl-architect.md +165 -0
- package/.mindforge/personas/i18n-architect.md +69 -0
- package/.mindforge/personas/instinct-curator.md +83 -0
- package/.mindforge/personas/iot-architect.md +105 -0
- package/.mindforge/personas/knowledge-curator.md +139 -0
- package/.mindforge/personas/knowledge-engineer.md +57 -0
- package/.mindforge/personas/lakehouse-architect.md +57 -0
- package/.mindforge/personas/llm-orchestrator.md +57 -0
- package/.mindforge/personas/logistics-architect.md +106 -0
- package/.mindforge/personas/market-analyst.md +53 -0
- package/.mindforge/personas/marketplace-engineer.md +105 -0
- package/.mindforge/personas/mcp-designer.md +54 -0
- package/.mindforge/personas/meeting-designer.md +104 -0
- package/.mindforge/personas/mentorship-lead.md +106 -0
- package/.mindforge/personas/migration-architect.md +57 -0
- package/.mindforge/personas/ml-ops-engineer.md +101 -0
- package/.mindforge/personas/mobile-architect.md +105 -0
- package/.mindforge/personas/mobile-security-engineer.md +106 -0
- package/.mindforge/personas/multi-model-bridge.md +86 -0
- package/.mindforge/personas/multi-tenancy-architect.md +71 -0
- package/.mindforge/personas/multimodal-engineer.md +57 -0
- package/.mindforge/personas/offline-specialist.md +105 -0
- package/.mindforge/personas/onboarding-navigator.md +63 -0
- package/.mindforge/personas/payments-engineer.md +135 -0
- package/.mindforge/personas/pipeline-engineer.md +115 -0
- package/.mindforge/personas/platform-engineer.md +97 -0
- package/.mindforge/personas/platform-lead.md +57 -0
- package/.mindforge/personas/privacy-engineer.md +57 -0
- package/.mindforge/personas/product-owner.md +56 -0
- package/.mindforge/personas/productivity-analyst.md +57 -0
- package/.mindforge/personas/prompt-architect.md +101 -0
- package/.mindforge/personas/proofreader.md +53 -0
- package/.mindforge/personas/pwa-architect.md +105 -0
- package/.mindforge/personas/quality-scorer.md +63 -0
- package/.mindforge/personas/react-native-engineer.md +106 -0
- package/.mindforge/personas/resilience-engineer.md +69 -0
- package/.mindforge/personas/rfc-architect.md +64 -0
- package/.mindforge/personas/saga-orchestrator.md +80 -0
- package/.mindforge/personas/secrets-engineer.md +57 -0
- package/.mindforge/personas/skill-smith.md +79 -0
- package/.mindforge/personas/sre-lead.md +107 -0
- package/.mindforge/personas/stream-engineer.md +57 -0
- package/.mindforge/personas/streaming-engineer.md +64 -0
- package/.mindforge/personas/swarm-templates.json +695 -38
- package/.mindforge/personas/system-designer.md +57 -0
- package/.mindforge/personas/team-coach.md +120 -0
- package/.mindforge/personas/tech-lead-coach.md +103 -0
- package/.mindforge/personas/technical-writer-lead.md +111 -0
- package/.mindforge/personas/threat-modeler.md +82 -0
- package/.mindforge/personas/vibe-checker.md +75 -0
- package/.mindforge/personas/worktree-manager.md +56 -0
- package/.mindforge/personas/zero-trust-engineer.md +113 -0
- package/.mindforge/skills/a11y-testing/SKILL.md +143 -0
- package/.mindforge/skills/agent-evaluation-framework/SKILL.md +227 -0
- package/.mindforge/skills/agent-introspection-debugging/SKILL.md +88 -0
- package/.mindforge/skills/agent-loops/SKILL.md +84 -0
- package/.mindforge/skills/agent-memory-design/SKILL.md +199 -0
- package/.mindforge/skills/agent-orchestration-patterns/SKILL.md +129 -0
- package/.mindforge/skills/agent-tool-selection/SKILL.md +204 -0
- package/.mindforge/skills/ai-agent-deployment/SKILL.md +176 -0
- package/.mindforge/skills/ai-cost-management/SKILL.md +57 -0
- package/.mindforge/skills/ai-safety-alignment/SKILL.md +53 -0
- package/.mindforge/skills/analytics-instrumentation/SKILL.md +172 -0
- package/.mindforge/skills/api-gateway-patterns/SKILL.md +177 -0
- package/.mindforge/skills/api-marketplace/SKILL.md +56 -0
- package/.mindforge/skills/api-versioning/SKILL.md +100 -0
- package/.mindforge/skills/app-store-deployment/SKILL.md +44 -0
- package/.mindforge/skills/architecture-tradeoff-analysis/SKILL.md +97 -0
- package/.mindforge/skills/audit-logging/SKILL.md +140 -0
- package/.mindforge/skills/auth-patterns/SKILL.md +148 -0
- package/.mindforge/skills/autonomous-agent-harness/SKILL.md +218 -0
- package/.mindforge/skills/autonomous-agents/SKILL.md +59 -0
- package/.mindforge/skills/autonomous-loops/SKILL.md +105 -0
- package/.mindforge/skills/build-system-optimization/SKILL.md +54 -0
- package/.mindforge/skills/build-vs-buy/SKILL.md +80 -0
- package/.mindforge/skills/bundle-optimization/SKILL.md +174 -0
- package/.mindforge/skills/business-analyst/SKILL.md +82 -0
- package/.mindforge/skills/caching-strategies/SKILL.md +132 -0
- package/.mindforge/skills/capacity-planning/SKILL.md +96 -0
- package/.mindforge/skills/causal-inference/SKILL.md +42 -0
- package/.mindforge/skills/cdn-optimization/SKILL.md +212 -0
- package/.mindforge/skills/change-management/SKILL.md +106 -0
- package/.mindforge/skills/chaos-engineering/SKILL.md +99 -0
- package/.mindforge/skills/ci-cd-pipeline/SKILL.md +118 -0
- package/.mindforge/skills/cli-design/SKILL.md +118 -0
- package/.mindforge/skills/code-generation-patterns/SKILL.md +92 -0
- package/.mindforge/skills/code-review-methodology/SKILL.md +180 -0
- package/.mindforge/skills/code-tour/SKILL.md +145 -0
- package/.mindforge/skills/codebase-onboarding/SKILL.md +95 -0
- package/.mindforge/skills/compliance-as-code/SKILL.md +195 -0
- package/.mindforge/skills/conflict-resolution/SKILL.md +87 -0
- package/.mindforge/skills/connection-pooling/SKILL.md +151 -0
- package/.mindforge/skills/container-security/SKILL.md +151 -0
- package/.mindforge/skills/context-engineering/SKILL.md +114 -0
- package/.mindforge/skills/continuous-learning/SKILL.md +84 -0
- package/.mindforge/skills/contract-testing/SKILL.md +85 -0
- package/.mindforge/skills/cost-aware-routing/SKILL.md +83 -0
- package/.mindforge/skills/cost-estimation/SKILL.md +82 -0
- package/.mindforge/skills/council/SKILL.md +68 -0
- package/.mindforge/skills/cqrs-event-sourcing/SKILL.md +95 -0
- package/.mindforge/skills/cross-platform-testing/SKILL.md +43 -0
- package/.mindforge/skills/data-governance/SKILL.md +42 -0
- package/.mindforge/skills/data-lakehouse/SKILL.md +42 -0
- package/.mindforge/skills/data-mesh/SKILL.md +42 -0
- package/.mindforge/skills/data-modeling/SKILL.md +107 -0
- package/.mindforge/skills/data-pipeline-design/SKILL.md +171 -0
- package/.mindforge/skills/data-privacy-engineering/SKILL.md +42 -0
- package/.mindforge/skills/database-performance/SKILL.md +174 -0
- package/.mindforge/skills/database-sharding-advanced/SKILL.md +206 -0
- package/.mindforge/skills/de-sloppify/SKILL.md +120 -0
- package/.mindforge/skills/defense-in-depth/SKILL.md +84 -0
- package/.mindforge/skills/delegation-patterns/SKILL.md +123 -0
- package/.mindforge/skills/dependency-management/SKILL.md +94 -0
- package/.mindforge/skills/deployment-workflow/SKILL.md +135 -0
- package/.mindforge/skills/design-system/SKILL.md +113 -0
- package/.mindforge/skills/developer-onboarding/SKILL.md +99 -0
- package/.mindforge/skills/developer-productivity-metrics/SKILL.md +59 -0
- package/.mindforge/skills/distributed-consensus/SKILL.md +141 -0
- package/.mindforge/skills/dmux-workflows/SKILL.md +141 -0
- package/.mindforge/skills/dns-architecture/SKILL.md +167 -0
- package/.mindforge/skills/doc-health-audit/SKILL.md +102 -0
- package/.mindforge/skills/ecommerce-architecture/SKILL.md +41 -0
- package/.mindforge/skills/edge-computing/SKILL.md +91 -0
- package/.mindforge/skills/edtech-platform/SKILL.md +41 -0
- package/.mindforge/skills/email-deliverability/SKILL.md +177 -0
- package/.mindforge/skills/embedding-systems/SKILL.md +55 -0
- package/.mindforge/skills/environment-management/SKILL.md +54 -0
- package/.mindforge/skills/error-handling-architecture/SKILL.md +118 -0
- package/.mindforge/skills/estimation-techniques/SKILL.md +113 -0
- package/.mindforge/skills/eval-harness/SKILL.md +180 -0
- package/.mindforge/skills/event-driven-architecture/SKILL.md +162 -0
- package/.mindforge/skills/experiment-design/SKILL.md +139 -0
- package/.mindforge/skills/experiment-platform/SKILL.md +43 -0
- package/.mindforge/skills/feature-engineering/SKILL.md +42 -0
- package/.mindforge/skills/feature-flag-management/SKILL.md +183 -0
- package/.mindforge/skills/fine-tuning-workflow/SKILL.md +189 -0
- package/.mindforge/skills/fintech-patterns/SKILL.md +41 -0
- package/.mindforge/skills/flutter-architecture/SKILL.md +42 -0
- package/.mindforge/skills/gaming-backend/SKILL.md +41 -0
- package/.mindforge/skills/git-workflow-design/SKILL.md +129 -0
- package/.mindforge/skills/graceful-degradation/SKILL.md +95 -0
- package/.mindforge/skills/graphql-patterns/SKILL.md +243 -0
- package/.mindforge/skills/guardrails-and-safety/SKILL.md +137 -0
- package/.mindforge/skills/healthcare-systems/SKILL.md +40 -0
- package/.mindforge/skills/hiring-engineering/SKILL.md +119 -0
- package/.mindforge/skills/human-in-the-loop-design/SKILL.md +234 -0
- package/.mindforge/skills/i18n-architecture/SKILL.md +147 -0
- package/.mindforge/skills/idempotency-patterns/SKILL.md +84 -0
- package/.mindforge/skills/incident-communication/SKILL.md +96 -0
- package/.mindforge/skills/incident-management/SKILL.md +97 -0
- package/.mindforge/skills/infrastructure-as-code/SKILL.md +98 -0
- package/.mindforge/skills/instinct-clustering/SKILL.md +190 -0
- package/.mindforge/skills/internal-developer-platform/SKILL.md +51 -0
- package/.mindforge/skills/iot-platform/SKILL.md +41 -0
- package/.mindforge/skills/k8s-deployment/SKILL.md +358 -0
- package/.mindforge/skills/knowledge-graphs/SKILL.md +56 -0
- package/.mindforge/skills/knowledge-sharing-systems/SKILL.md +112 -0
- package/.mindforge/skills/llm-cost-optimization/SKILL.md +198 -0
- package/.mindforge/skills/llm-orchestration/SKILL.md +56 -0
- package/.mindforge/skills/load-testing/SKILL.md +84 -0
- package/.mindforge/skills/logistics-optimization/SKILL.md +40 -0
- package/.mindforge/skills/market-researcher/SKILL.md +99 -0
- package/.mindforge/skills/marketplace-trust/SKILL.md +40 -0
- package/.mindforge/skills/mcp-server-patterns/SKILL.md +264 -0
- package/.mindforge/skills/media-streaming/SKILL.md +41 -0
- package/.mindforge/skills/meeting-architecture/SKILL.md +146 -0
- package/.mindforge/skills/mentoring-patterns/SKILL.md +77 -0
- package/.mindforge/skills/microservices-patterns/SKILL.md +83 -0
- package/.mindforge/skills/migration-platform/SKILL.md +61 -0
- package/.mindforge/skills/migration-strategies/SKILL.md +129 -0
- package/.mindforge/skills/ml-feature-store/SKILL.md +56 -0
- package/.mindforge/skills/ml-monitoring/SKILL.md +42 -0
- package/.mindforge/skills/mobile-performance/SKILL.md +44 -0
- package/.mindforge/skills/mobile-security/SKILL.md +45 -0
- package/.mindforge/skills/model-evaluation/SKILL.md +53 -0
- package/.mindforge/skills/monorepo-management/SKILL.md +100 -0
- package/.mindforge/skills/multi-llm-consult/SKILL.md +75 -0
- package/.mindforge/skills/multi-tenancy-patterns/SKILL.md +145 -0
- package/.mindforge/skills/multi-turn-conversation-design/SKILL.md +206 -0
- package/.mindforge/skills/multimodal-ai/SKILL.md +51 -0
- package/.mindforge/skills/mutation-testing/SKILL.md +97 -0
- package/.mindforge/skills/notification-system-design/SKILL.md +168 -0
- package/.mindforge/skills/observability-stack/SKILL.md +136 -0
- package/.mindforge/skills/offline-first-design/SKILL.md +43 -0
- package/.mindforge/skills/on-call-design/SKILL.md +111 -0
- package/.mindforge/skills/pagination-patterns/SKILL.md +230 -0
- package/.mindforge/skills/payment-integration/SKILL.md +176 -0
- package/.mindforge/skills/performance-reviews/SKILL.md +140 -0
- package/.mindforge/skills/platform-observability/SKILL.md +58 -0
- package/.mindforge/skills/platform-reliability/SKILL.md +52 -0
- package/.mindforge/skills/post-incident-learning/SKILL.md +96 -0
- package/.mindforge/skills/product-manager/SKILL.md +104 -0
- package/.mindforge/skills/progressive-web-app/SKILL.md +44 -0
- package/.mindforge/skills/prompt-engineering/SKILL.md +94 -0
- package/.mindforge/skills/proofreader/SKILL.md +158 -0
- package/.mindforge/skills/push-notification-architecture/SKILL.md +45 -0
- package/.mindforge/skills/python-performance/SKILL.md +183 -0
- package/.mindforge/skills/quality-audit/SKILL.md +171 -0
- package/.mindforge/skills/queue-design/SKILL.md +85 -0
- package/.mindforge/skills/rag-architecture/SKILL.md +176 -0
- package/.mindforge/skills/rate-limiting-design/SKILL.md +94 -0
- package/.mindforge/skills/react-native-patterns/SKILL.md +42 -0
- package/.mindforge/skills/react-performance/SKILL.md +229 -0
- package/.mindforge/skills/real-time-analytics/SKILL.md +42 -0
- package/.mindforge/skills/real-time-sync/SKILL.md +83 -0
- package/.mindforge/skills/responsive-native/SKILL.md +44 -0
- package/.mindforge/skills/responsive-patterns/SKILL.md +141 -0
- package/.mindforge/skills/rfc-pipeline/SKILL.md +114 -0
- package/.mindforge/skills/saas-multi-tenant/SKILL.md +41 -0
- package/.mindforge/skills/santa-method/SKILL.md +134 -0
- package/.mindforge/skills/search-implementation/SKILL.md +98 -0
- package/.mindforge/skills/secrets-platform/SKILL.md +56 -0
- package/.mindforge/skills/secrets-rotation/SKILL.md +173 -0
- package/.mindforge/skills/self-serve-infrastructure/SKILL.md +51 -0
- package/.mindforge/skills/serverless-patterns/SKILL.md +119 -0
- package/.mindforge/skills/skill-creator-meta/SKILL.md +146 -0
- package/.mindforge/skills/sprint-retrospective-facilitation/SKILL.md +112 -0
- package/.mindforge/skills/stakeholder-communication/SKILL.md +85 -0
- package/.mindforge/skills/state-management/SKILL.md +104 -0
- package/.mindforge/skills/stream-processing/SKILL.md +43 -0
- package/.mindforge/skills/streaming-architecture/SKILL.md +81 -0
- package/.mindforge/skills/supply-chain-security/SKILL.md +145 -0
- package/.mindforge/skills/synthetic-data-generation/SKILL.md +52 -0
- package/.mindforge/skills/system-design/SKILL.md +88 -0
- package/.mindforge/skills/team-topology-design/SKILL.md +107 -0
- package/.mindforge/skills/technical-debt-management/SKILL.md +86 -0
- package/.mindforge/skills/technical-interview-design/SKILL.md +98 -0
- package/.mindforge/skills/technical-leadership/SKILL.md +75 -0
- package/.mindforge/skills/technical-writing/SKILL.md +237 -0
- package/.mindforge/skills/technology-radar/SKILL.md +88 -0
- package/.mindforge/skills/testing-anti-patterns/SKILL.md +288 -0
- package/.mindforge/skills/threat-modeling/SKILL.md +109 -0
- package/.mindforge/skills/tool-design/SKILL.md +138 -0
- package/.mindforge/skills/typescript-advanced/SKILL.md +198 -0
- package/.mindforge/skills/using-git-worktrees/SKILL.md +139 -0
- package/.mindforge/skills/verification-loop/SKILL.md +97 -0
- package/.mindforge/skills/vibe-security/SKILL.md +165 -0
- package/.mindforge/skills/visual-regression-testing/SKILL.md +97 -0
- package/.mindforge/skills/websocket-patterns/SKILL.md +203 -0
- package/.mindforge/skills/writing-plans/SKILL.md +170 -0
- package/.mindforge/skills/writing-skills/SKILL.md +216 -0
- package/.mindforge/skills/zero-trust-architecture/SKILL.md +166 -0
- package/CHANGELOG.md +195 -0
- package/MINDFORGE.md +4 -4
- package/README.md +2 -2
- package/RELEASENOTES.md +66 -0
- package/bin/installer-core.js +1 -1
- package/bin/wizard/theme.js +2 -2
- package/docs/commands-reference.md +18 -1
- package/package.json +2 -2
- package/.mindforge/personas/data-privacy-engineer.md +0 -187
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Cross-Model Eval — Multi-Model Comparison Protocol
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Route the same task to two different models and compare outputs. Divergence
|
|
5
|
+
between models is a quality signal; agreement is a confidence booster.
|
|
6
|
+
|
|
7
|
+
## When to Trigger
|
|
8
|
+
- Architecture decisions (high stakes, hard to reverse)
|
|
9
|
+
- Security-critical code (auth, payment, PII handling)
|
|
10
|
+
- Agent confidence < 0.7 on current approach
|
|
11
|
+
- User explicitly requests second opinion (via /mindforge:consult)
|
|
12
|
+
- Eval-harness model-grader needs calibration
|
|
13
|
+
|
|
14
|
+
## Model Selection Logic
|
|
15
|
+
|
|
16
|
+
| Primary Model | Comparison Model | Rationale |
|
|
17
|
+
|--------------|-----------------|-----------|
|
|
18
|
+
| claude-sonnet-4-6 | gemini-2.5-pro | Different training, different strengths |
|
|
19
|
+
| claude-opus-4-7 | gpt-4o | Independent validation of complex reasoning |
|
|
20
|
+
| gemini-2.5-pro | claude-sonnet-4-6 | Verify research findings independently |
|
|
21
|
+
|
|
22
|
+
Selection follows the cost-routing tier: comparison model is always from a DIFFERENT provider than the primary.
|
|
23
|
+
|
|
24
|
+
## Comparison Method
|
|
25
|
+
|
|
26
|
+
### Step 1 — Sanitize Context
|
|
27
|
+
Same sanitization as multi-llm-consult skill:
|
|
28
|
+
- Remove internal file paths, variable names, proprietary logic
|
|
29
|
+
- Keep abstract question and public references
|
|
30
|
+
|
|
31
|
+
### Step 2 — Parallel Dispatch
|
|
32
|
+
Send identical sanitized prompt to both models simultaneously.
|
|
33
|
+
|
|
34
|
+
### Step 3 — Structural Comparison
|
|
35
|
+
Compare responses structurally (not token-by-token):
|
|
36
|
+
- Do they recommend the same approach/pattern?
|
|
37
|
+
- Do they identify the same risks?
|
|
38
|
+
- Do they agree on the key trade-offs?
|
|
39
|
+
|
|
40
|
+
### Step 4 — Divergence Classification
|
|
41
|
+
|
|
42
|
+
| Agreement Level | Meaning | Action |
|
|
43
|
+
|----------------|---------|--------|
|
|
44
|
+
| Full agreement | Both recommend same approach with same reasoning | High confidence — proceed |
|
|
45
|
+
| Partial agreement | Same recommendation, different reasoning | Moderate confidence — note alternate reasoning |
|
|
46
|
+
| Approach divergence | Different recommendations, shared concerns | Flag for human review with both perspectives |
|
|
47
|
+
| Full divergence | Different recommendations, different concerns | STOP — present both to user, defer decision |
|
|
48
|
+
|
|
49
|
+
### Step 5 — Output
|
|
50
|
+
Log to AUDIT entry:
|
|
51
|
+
```json
|
|
52
|
+
{
|
|
53
|
+
"event": "cross_model_eval",
|
|
54
|
+
"primary_model": "claude-sonnet-4-6",
|
|
55
|
+
"comparison_model": "gemini-2.5-pro",
|
|
56
|
+
"agreement_level": "partial",
|
|
57
|
+
"primary_recommendation": "...",
|
|
58
|
+
"comparison_recommendation": "...",
|
|
59
|
+
"divergence_points": ["..."],
|
|
60
|
+
"action_taken": "proceed_with_note"
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Budget Guard
|
|
65
|
+
- Maximum 2 cross-model evals per session (expensive operation)
|
|
66
|
+
- Each eval costs ~2x a normal model call
|
|
67
|
+
- Only trigger automatically on high-stakes decisions (not routine tasks)
|
|
68
|
+
- User can always override via /mindforge:consult (manual, no limit)
|
|
69
|
+
|
|
70
|
+
## Integration Points
|
|
71
|
+
- Cost-routing module determines which comparison model to use
|
|
72
|
+
- Multi-LLM consult skill handles the actual external dispatch
|
|
73
|
+
- Token-ledger records both model calls
|
|
74
|
+
- Council framework may trigger cross-model eval when consensus < 0.5
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Instinct Engine — Auto-Capture Protocol
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Defines how the instinct engine observes sessions and automatically creates
|
|
5
|
+
instinct entries. This runs in AUTO-CAPTURE mode (always observing).
|
|
6
|
+
|
|
7
|
+
## Observation Triggers
|
|
8
|
+
|
|
9
|
+
The capture engine watches for these signals during every session:
|
|
10
|
+
|
|
11
|
+
### 1. User Corrections
|
|
12
|
+
When the user corrects agent behavior ("no, don't do that", "always do X instead"):
|
|
13
|
+
- Extract the correction as a behavior rule
|
|
14
|
+
- Create instinct with initial confidence 0.6 (user-stated is higher than observed)
|
|
15
|
+
|
|
16
|
+
### 2. Repeated Patterns (3+ occurrences)
|
|
17
|
+
When the agent performs the same action pattern 3+ times in a session:
|
|
18
|
+
- Extract the pattern as a potential instinct
|
|
19
|
+
- Create with initial confidence 0.4 (observed but not confirmed)
|
|
20
|
+
|
|
21
|
+
### 3. Successful Outcomes After Specific Actions
|
|
22
|
+
When a verify/test pass follows a specific non-obvious action:
|
|
23
|
+
- Extract the action-outcome pair
|
|
24
|
+
- Create with initial confidence 0.5
|
|
25
|
+
|
|
26
|
+
### 4. Manual Capture
|
|
27
|
+
When user invokes `/mindforge:learn-instinct`:
|
|
28
|
+
- User provides observation + behavior directly
|
|
29
|
+
- Create with user-specified confidence (default 0.7)
|
|
30
|
+
|
|
31
|
+
## Deduplication
|
|
32
|
+
|
|
33
|
+
Before creating a new instinct:
|
|
34
|
+
1. Compare `observation` field against all active instincts (same project)
|
|
35
|
+
2. Similarity threshold: >80% word overlap → treat as duplicate
|
|
36
|
+
3. If duplicate found: increment `times_applied` on existing instinct instead
|
|
37
|
+
4. If near-duplicate (60-80% overlap): create new but link via tags
|
|
38
|
+
|
|
39
|
+
## Auto-Capture Rate Limits
|
|
40
|
+
|
|
41
|
+
To prevent noise:
|
|
42
|
+
- Maximum 5 new instincts per session
|
|
43
|
+
- Maximum 100 active instincts per project
|
|
44
|
+
- If at 100: prune lowest-confidence instinct before adding new one
|
|
45
|
+
- Never auto-capture from autonomous mode sessions (too noisy)
|
|
46
|
+
|
|
47
|
+
## Session-End Summary
|
|
48
|
+
|
|
49
|
+
At the end of each session where instincts were captured:
|
|
50
|
+
```
|
|
51
|
+
📝 Instincts captured this session:
|
|
52
|
+
- [NEW] "observation text" (confidence: 0.5)
|
|
53
|
+
- [REINFORCED] "existing instinct" (confidence: 0.5 → 0.6)
|
|
54
|
+
|
|
55
|
+
Active instincts: 47/100 | Ready for promotion: 3
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Integration Points
|
|
59
|
+
|
|
60
|
+
- Hooks into existing memory capture at `.mindforge/memory/engine/capture-protocol.md`
|
|
61
|
+
- Instinct observations flow through the same memory pipeline
|
|
62
|
+
- Instincts are SEPARATE from memories (memories are facts, instincts are behaviors)
|
|
63
|
+
- Both share the project-scoping mechanism
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Instinct Engine — Schema Definition
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Defines the data schema for learned behavioral instincts. Instincts are lightweight
|
|
5
|
+
patterns observed during sessions that may evolve into full skills over time.
|
|
6
|
+
|
|
7
|
+
## Instinct Entry Schema
|
|
8
|
+
|
|
9
|
+
Each instinct is a single JSON line in `instinct-store.jsonl`:
|
|
10
|
+
|
|
11
|
+
```json
|
|
12
|
+
{
|
|
13
|
+
"id": "inst-[uuid]",
|
|
14
|
+
"created_at": "2026-05-25T10:30:00Z",
|
|
15
|
+
"updated_at": "2026-05-25T14:20:00Z",
|
|
16
|
+
"observation": "When writing database queries, the team always adds an index comment explaining the chosen index strategy",
|
|
17
|
+
"behavior": "After writing any new database query, add a brief inline comment explaining which index will serve this query and why",
|
|
18
|
+
"confidence": 0.72,
|
|
19
|
+
"times_applied": 8,
|
|
20
|
+
"times_succeeded": 6,
|
|
21
|
+
"times_failed": 2,
|
|
22
|
+
"project": "mindforge",
|
|
23
|
+
"tags": ["database", "documentation", "patterns"],
|
|
24
|
+
"status": "active",
|
|
25
|
+
"promoted_to_skill": null,
|
|
26
|
+
"last_applied_at": "2026-05-25T14:20:00Z",
|
|
27
|
+
"source_sessions": ["session-abc123", "session-def456"]
|
|
28
|
+
}
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Field Definitions
|
|
32
|
+
|
|
33
|
+
| Field | Type | Required | Description |
|
|
34
|
+
|-------|------|----------|-------------|
|
|
35
|
+
| id | string | yes | Unique identifier, prefixed with `inst-` |
|
|
36
|
+
| created_at | ISO-8601 | yes | When the instinct was first observed |
|
|
37
|
+
| updated_at | ISO-8601 | yes | Last modification timestamp |
|
|
38
|
+
| observation | string | yes | What pattern was observed (the trigger condition) |
|
|
39
|
+
| behavior | string | yes | What the agent should do when this pattern is detected |
|
|
40
|
+
| confidence | float | yes | 0.0-1.0, computed from success/failure ratio + application count |
|
|
41
|
+
| times_applied | int | yes | Total times this instinct was applied |
|
|
42
|
+
| times_succeeded | int | yes | Times application led to positive outcome |
|
|
43
|
+
| times_failed | int | yes | Times application led to negative outcome or correction |
|
|
44
|
+
| project | string | yes | Project scope (instincts never leak between projects) |
|
|
45
|
+
| tags | string[] | yes | Classification tags for retrieval |
|
|
46
|
+
| status | enum | yes | One of: active, promoted, deprecated, pruned |
|
|
47
|
+
| promoted_to_skill | string|null | yes | Skill name if promoted, null otherwise |
|
|
48
|
+
| last_applied_at | ISO-8601 | yes | When instinct was last used |
|
|
49
|
+
| source_sessions | string[] | yes | Session IDs where this instinct was observed/reinforced |
|
|
50
|
+
|
|
51
|
+
## Confidence Scoring
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
confidence = (times_succeeded / times_applied) * weight_factor
|
|
55
|
+
|
|
56
|
+
where weight_factor = min(1.0, times_applied / 10)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
- New instincts start at 0.5 confidence (neutral)
|
|
60
|
+
- Each success: recalculate with updated counts
|
|
61
|
+
- Each failure: recalculate with updated counts
|
|
62
|
+
- Weight factor prevents high confidence from single observations
|
|
63
|
+
- Minimum 5 applications before promotion is considered
|
|
64
|
+
|
|
65
|
+
## Status Transitions
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
[new observation] → active (confidence: 0.5)
|
|
69
|
+
↓
|
|
70
|
+
confidence >= 0.85 AND times_applied >= 5
|
|
71
|
+
↓
|
|
72
|
+
promoted → creates SKILL.md
|
|
73
|
+
|
|
74
|
+
active → deprecated (manual user action)
|
|
75
|
+
active → pruned (confidence < 0.2 after 10+ applications OR 30 days inactive)
|
|
76
|
+
```
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Instinct Engine — Promotion Protocol
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Defines the rules and process for promoting mature instincts into full MindForge skills.
|
|
5
|
+
|
|
6
|
+
## Promotion Criteria
|
|
7
|
+
|
|
8
|
+
An instinct is eligible for promotion when ALL of these are true:
|
|
9
|
+
1. `confidence >= 0.85`
|
|
10
|
+
2. `times_applied >= 5`
|
|
11
|
+
3. `times_succeeded >= 4` (at least 80% success rate with minimum volume)
|
|
12
|
+
4. `status == "active"` (not already promoted or deprecated)
|
|
13
|
+
5. No existing skill covers the same behavior (checked against MANIFEST.md triggers)
|
|
14
|
+
|
|
15
|
+
## Promotion Process
|
|
16
|
+
|
|
17
|
+
### Step 1 — Candidate Identification
|
|
18
|
+
Run by `/mindforge:evolve-skills` command:
|
|
19
|
+
1. Scan `instinct-store.jsonl` for entries meeting all 5 criteria
|
|
20
|
+
2. Rank candidates by confidence * times_applied (impact score)
|
|
21
|
+
3. Present top candidates to user for approval
|
|
22
|
+
|
|
23
|
+
### Step 2 — Skill Draft Generation
|
|
24
|
+
For each approved candidate:
|
|
25
|
+
1. Generate a SKILL.md using this template:
|
|
26
|
+
|
|
27
|
+
```yaml
|
|
28
|
+
---
|
|
29
|
+
name: [derived-from-instinct-tags]
|
|
30
|
+
version: 1.0.0
|
|
31
|
+
min_mindforge_version: 10.0.3
|
|
32
|
+
status: stable
|
|
33
|
+
triggers: [derived-from-instinct-observation-keywords]
|
|
34
|
+
origin: instinct-promotion
|
|
35
|
+
origin_instinct_id: [inst-uuid]
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
# Skill — [Title derived from behavior]
|
|
39
|
+
|
|
40
|
+
## When this skill activates
|
|
41
|
+
[Derived from instinct observation field]
|
|
42
|
+
|
|
43
|
+
## Mandatory actions when this skill is active
|
|
44
|
+
|
|
45
|
+
### During implementation
|
|
46
|
+
[Derived from instinct behavior field, expanded into actionable steps]
|
|
47
|
+
|
|
48
|
+
### After implementation
|
|
49
|
+
Verify the behavior was applied correctly.
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Step 3 — Registration
|
|
53
|
+
1. Place generated SKILL.md in `.mindforge/skills/[name]/SKILL.md`
|
|
54
|
+
2. Add entry to MANIFEST.md under appropriate tier (default: Project tier)
|
|
55
|
+
3. Mark instinct as `promoted` with `promoted_to_skill: "[skill-name]"`
|
|
56
|
+
|
|
57
|
+
### Step 4 — Feedback Loop
|
|
58
|
+
After promotion:
|
|
59
|
+
- Continue tracking the instinct's success/failure THROUGH the skill
|
|
60
|
+
- If the skill is later found unhelpful: revert to instinct, mark status as deprecated
|
|
61
|
+
- This prevents premature promotion from creating persistent bad skills
|
|
62
|
+
|
|
63
|
+
## Pruning Protocol
|
|
64
|
+
|
|
65
|
+
Instincts are pruned (removed) when:
|
|
66
|
+
- `confidence < 0.2` AND `times_applied >= 10` (repeatedly failed)
|
|
67
|
+
- OR `last_applied_at` is more than 30 days ago (stale)
|
|
68
|
+
- OR user explicitly deprecates via command
|
|
69
|
+
|
|
70
|
+
Pruned instincts are moved to `.mindforge/engine/instincts/archive/` (not deleted) for audit purposes.
|
|
71
|
+
|
|
72
|
+
## Metrics
|
|
73
|
+
|
|
74
|
+
Track promotion health:
|
|
75
|
+
- Promotion rate: instincts promoted / instincts created (target: 10-20%)
|
|
76
|
+
- Reversion rate: promoted skills reverted / total promotions (target: < 5%)
|
|
77
|
+
- Active instinct count trend (should not monotonically increase)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Proactive Skill Suggestion — Signal Detector
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Detect contextual signals that indicate a skill should be suggested to the agent,
|
|
5
|
+
even if the user hasn't explicitly mentioned trigger keywords.
|
|
6
|
+
|
|
7
|
+
## Signal Categories
|
|
8
|
+
|
|
9
|
+
### 1. File Signals
|
|
10
|
+
Detect skills based on files being opened, modified, or referenced:
|
|
11
|
+
|
|
12
|
+
| File Pattern | Suggested Skill | Confidence |
|
|
13
|
+
|-------------|----------------|-----------|
|
|
14
|
+
| `*.test.*`, `*.spec.*`, `__tests__/` | testing-anti-patterns | 0.75 |
|
|
15
|
+
| `ONBOARDING*`, new git clone detected | codebase-onboarding | 0.9 |
|
|
16
|
+
| `CLEANUP-REPORT*`, post-merge diff | de-sloppify | 0.8 |
|
|
17
|
+
| `.mindforge/evals/` | eval-harness | 0.85 |
|
|
18
|
+
| `RFC-*.md`, `SPEC-*.md` | rfc-pipeline | 0.8 |
|
|
19
|
+
| `auth*`, `login*`, `payment*` | defense-in-depth | 0.75 |
|
|
20
|
+
| `THREAT-MODEL-*` | threat-modeling | 0.85 |
|
|
21
|
+
| `COUNCIL-*` in decisions/ | council | 0.8 |
|
|
22
|
+
|
|
23
|
+
### 2. Error Signals
|
|
24
|
+
Detect skills based on error patterns in build/test output:
|
|
25
|
+
|
|
26
|
+
| Error Pattern | Suggested Skill | Confidence |
|
|
27
|
+
|--------------|----------------|-----------|
|
|
28
|
+
| Mock-related test failures (3+) | testing-anti-patterns | 0.8 |
|
|
29
|
+
| Type errors in test files | testing-anti-patterns | 0.7 |
|
|
30
|
+
| Security scan findings (medium+) | defense-in-depth | 0.85 |
|
|
31
|
+
| Build failures after merge | verification-loop | 0.9 |
|
|
32
|
+
| Token budget warnings | cost-aware-routing | 0.8 |
|
|
33
|
+
|
|
34
|
+
### 3. Task Signals
|
|
35
|
+
Detect skills based on task description or conversation patterns:
|
|
36
|
+
|
|
37
|
+
| Task Pattern | Suggested Skill | Confidence |
|
|
38
|
+
|-------------|----------------|-----------|
|
|
39
|
+
| "review", "check", "verify" + completed work | santa-method | 0.75 |
|
|
40
|
+
| "score", "grade", "evaluate" | eval-harness | 0.8 |
|
|
41
|
+
| "cleanup", "polish", "finalize" | de-sloppify | 0.85 |
|
|
42
|
+
| "new project", "unfamiliar", "first time" | codebase-onboarding | 0.9 |
|
|
43
|
+
| "plan", "decompose", "break down spec" | rfc-pipeline | 0.8 |
|
|
44
|
+
| "quality", "how good", "assess" | quality-audit | 0.8 |
|
|
45
|
+
|
|
46
|
+
## Signal Processing Rules
|
|
47
|
+
|
|
48
|
+
1. **Single signal sufficiency** — One signal above threshold is enough to suggest
|
|
49
|
+
2. **Signal stacking** — Multiple signals for the same skill boost confidence: `combined = 1 - ((1 - s1) * (1 - s2))`
|
|
50
|
+
3. **No interruption** — Suggestions queue silently; presented only at natural breakpoints
|
|
51
|
+
4. **Context freshness** — File signals expire after 5 minutes of inactivity on that file
|
|
52
|
+
5. **Session memory** — Track which skills were already loaded this session; don't re-suggest
|
|
53
|
+
|
|
54
|
+
## Integration with Loader
|
|
55
|
+
|
|
56
|
+
The signal detector works ALONGSIDE the trigger-based loader, not replacing it:
|
|
57
|
+
- **Loader** = reactive (matches on explicit trigger keywords in task description)
|
|
58
|
+
- **Signal detector** = proactive (observes context and suggests before explicit mention)
|
|
59
|
+
|
|
60
|
+
If the loader has already loaded a skill, the signal detector suppresses its suggestion for that skill.
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Proactive Skill Suggestion — Suggestion Engine
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Manage the lifecycle of skill suggestions: confidence gating, cooldown enforcement,
|
|
5
|
+
deduplication, debounce, and user feedback integration.
|
|
6
|
+
|
|
7
|
+
## Configuration (from config.json)
|
|
8
|
+
|
|
9
|
+
```json
|
|
10
|
+
{
|
|
11
|
+
"proactive_suggestions": {
|
|
12
|
+
"enabled": true,
|
|
13
|
+
"confidence_threshold": 0.7,
|
|
14
|
+
"cooldown_seconds": 300,
|
|
15
|
+
"debounce_seconds": 30,
|
|
16
|
+
"max_recent": 50,
|
|
17
|
+
"store_path": ".mindforge/engine/proactive/recent-suggestions.json"
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Suggestion Lifecycle
|
|
23
|
+
|
|
24
|
+
### Step 1 — Signal Received
|
|
25
|
+
Signal detector emits: `{ skill: string, confidence: number, reason: string, signal_type: string }`
|
|
26
|
+
|
|
27
|
+
### Step 2 — Confidence Gate
|
|
28
|
+
- If `confidence < threshold` (0.7): discard silently
|
|
29
|
+
- If `confidence >= threshold`: proceed to Step 3
|
|
30
|
+
|
|
31
|
+
### Step 3 — Cooldown Check
|
|
32
|
+
- Read dismissals from `.mindforge/engine/proactive/dismissals.json`
|
|
33
|
+
- If this `skill:signal_type` pair was dismissed within `cooldown_seconds` (300s): suppress
|
|
34
|
+
- Cooldown format: `{ "skill:signal_type": timestamp_ms }`
|
|
35
|
+
|
|
36
|
+
### Step 4 — Debounce
|
|
37
|
+
- If ANY suggestion was presented within `debounce_seconds` (30s): queue, don't present
|
|
38
|
+
- Queue is FIFO; oldest suggestion presented first after debounce expires
|
|
39
|
+
|
|
40
|
+
### Step 5 — Deduplication
|
|
41
|
+
- Check if skill is already loaded in current session (from loader)
|
|
42
|
+
- Check if same suggestion was already presented this session
|
|
43
|
+
- If either: discard
|
|
44
|
+
|
|
45
|
+
### Step 6 — Present Suggestion
|
|
46
|
+
Format for agent context:
|
|
47
|
+
```
|
|
48
|
+
💡 Proactive suggestion: Load **[skill-name]** skill
|
|
49
|
+
Reason: [reason from signal]
|
|
50
|
+
Confidence: [0.XX]
|
|
51
|
+
Action: Apply automatically? [yes/dismiss]
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Step 7 — User Response
|
|
55
|
+
- **Accept**: Load the skill via standard loader pipeline
|
|
56
|
+
- **Dismiss**: Record in `dismissals.json` with timestamp, start cooldown
|
|
57
|
+
|
|
58
|
+
## Storage
|
|
59
|
+
|
|
60
|
+
### recent-suggestions.json (circular buffer, max 50)
|
|
61
|
+
```json
|
|
62
|
+
[
|
|
63
|
+
{
|
|
64
|
+
"skill": "testing-anti-patterns",
|
|
65
|
+
"confidence": 0.8,
|
|
66
|
+
"signal_type": "error",
|
|
67
|
+
"reason": "3+ mock-related test failures detected",
|
|
68
|
+
"timestamp": "2026-05-26T10:30:00Z",
|
|
69
|
+
"outcome": "accepted"
|
|
70
|
+
}
|
|
71
|
+
]
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### dismissals.json
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"testing-anti-patterns:error": 1748262600000,
|
|
78
|
+
"de-sloppify:task": 1748262300000
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Metrics
|
|
83
|
+
|
|
84
|
+
Track suggestion effectiveness:
|
|
85
|
+
- **Acceptance rate**: accepted / (accepted + dismissed) — target > 60%
|
|
86
|
+
- **Relevance rate**: accepted suggestions that led to skill activation / total accepted
|
|
87
|
+
- **False positive rate**: dismissed / total presented — target < 40%
|
|
88
|
+
|
|
89
|
+
Report in `/mindforge:status` output:
|
|
90
|
+
```
|
|
91
|
+
Proactive suggestions: 12 presented | 8 accepted (67%) | 4 dismissed
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Disable Conditions
|
|
95
|
+
|
|
96
|
+
Suggestions are automatically disabled when:
|
|
97
|
+
- `config.json` has `proactive_suggestions.enabled: false`
|
|
98
|
+
- Session is in autonomous mode (too noisy)
|
|
99
|
+
- Agent is in a time-critical path (shipping, hotfix)
|
|
100
|
+
- Budget is in economy mode (avoid context overhead)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# MindForge Skills Engine — Composition System
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Enable skills to declaratively depend on and invoke other skills via a `compose:`
|
|
5
|
+
field in YAML frontmatter. This allows complex skills to build on simpler foundations
|
|
6
|
+
without duplicating content.
|
|
7
|
+
|
|
8
|
+
## Schema Addition
|
|
9
|
+
|
|
10
|
+
Skills may include an optional `compose:` field in their YAML frontmatter:
|
|
11
|
+
|
|
12
|
+
```yaml
|
|
13
|
+
---
|
|
14
|
+
name: verification-loop
|
|
15
|
+
version: 1.0.0
|
|
16
|
+
min_mindforge_version: 10.0.3
|
|
17
|
+
status: stable
|
|
18
|
+
triggers: verification, quality gate, build check
|
|
19
|
+
compose:
|
|
20
|
+
- security-review
|
|
21
|
+
---
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Composition Rules
|
|
25
|
+
|
|
26
|
+
### Resolution
|
|
27
|
+
1. When a skill with `compose:` is loaded, the loader resolves each referenced skill name against MANIFEST.md
|
|
28
|
+
2. Referenced skills are loaded as **summarized content** (not full injection) — see loader.md Step 5 summarisation format
|
|
29
|
+
3. The composing (parent) skill is always injected in full; composed (child) skills are summarized
|
|
30
|
+
|
|
31
|
+
### Depth Limit
|
|
32
|
+
- Maximum composition depth: **2 levels**
|
|
33
|
+
- A composed skill's own `compose:` dependencies are NOT resolved (no transitive loading)
|
|
34
|
+
- Rationale: prevents context explosion and keeps token budget predictable
|
|
35
|
+
|
|
36
|
+
### Cycle Detection
|
|
37
|
+
- Before resolving compositions, check for circular references
|
|
38
|
+
- If skill A composes B and B composes A: log a WARNING, skip the circular reference, load only the directly-requested skill
|
|
39
|
+
- Circular detection is checked at load time, not at registration time
|
|
40
|
+
|
|
41
|
+
### Token Budget Impact
|
|
42
|
+
- Each composed skill adds ~150 tokens (summary format only)
|
|
43
|
+
- A skill composing 3 others adds ~450 tokens overhead
|
|
44
|
+
- This counts against the standard context budget (see loader.md budget table)
|
|
45
|
+
|
|
46
|
+
### Conflict Resolution
|
|
47
|
+
- If a composed skill is ALSO matched by trigger (i.e., it would have been loaded independently):
|
|
48
|
+
load it in FULL (not summarized), since it matched on its own merit
|
|
49
|
+
- The composing skill still counts it as satisfied
|
|
50
|
+
|
|
51
|
+
### Validation at Registration
|
|
52
|
+
When a skill is registered via MANIFEST.md:
|
|
53
|
+
1. Check that all skills listed in `compose:` exist in the manifest
|
|
54
|
+
2. If a referenced skill doesn't exist: log a WARNING (not an error) and register anyway
|
|
55
|
+
3. Missing composed skills are simply not loaded at runtime (graceful degradation)
|
|
56
|
+
|
|
57
|
+
## Audit Logging
|
|
58
|
+
|
|
59
|
+
When composition is resolved, add to the task's AUDIT entry:
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"skills_composed": [
|
|
63
|
+
{ "parent": "verification-loop", "child": "security-review", "mode": "summarized" }
|
|
64
|
+
]
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Examples
|
|
69
|
+
|
|
70
|
+
### Skill that composes one dependency
|
|
71
|
+
```yaml
|
|
72
|
+
---
|
|
73
|
+
name: threat-modeling
|
|
74
|
+
compose:
|
|
75
|
+
- security-review
|
|
76
|
+
---
|
|
77
|
+
```
|
|
78
|
+
Result: threat-modeling loaded in full + security-review loaded as summary.
|
|
79
|
+
|
|
80
|
+
### Skill where composed dependency also triggers independently
|
|
81
|
+
Task: "Review auth threat model for the payment API"
|
|
82
|
+
- Triggers match: threat-modeling (via "threat model") AND security-review (via "auth", "payment")
|
|
83
|
+
- Both load in FULL (security-review matched independently, composition is moot)
|
|
@@ -81,6 +81,22 @@ For each matched skill (in tier priority order: Project → Org → Core):
|
|
|
81
81
|
3. Inject the skill content into the agent's context package (per `context-injector.md`)
|
|
82
82
|
4. Log which skills were loaded in the task's `task_started` AUDIT entry
|
|
83
83
|
|
|
84
|
+
### Step 4.1 — Resolve composed dependencies
|
|
85
|
+
|
|
86
|
+
After loading matched skills, resolve any composition dependencies:
|
|
87
|
+
|
|
88
|
+
1. For each loaded skill, check its YAML frontmatter for a `compose:` field
|
|
89
|
+
2. If `compose:` is present, resolve each referenced skill name against MANIFEST.md
|
|
90
|
+
3. Inject composed (child) skills as **summarized content** (not full injection) —
|
|
91
|
+
use the summarisation format defined in Step 5 below
|
|
92
|
+
4. Maximum composition depth: **2 levels** — a composed skill's own `compose:`
|
|
93
|
+
dependencies are NOT resolved (no transitive composition beyond that)
|
|
94
|
+
5. **Cycle detection:** if skill A composes B and B composes A, log a WARNING
|
|
95
|
+
and skip the circular reference — load only the directly-requested skill
|
|
96
|
+
without its circular dependency
|
|
97
|
+
|
|
98
|
+
For full composition semantics, see `composition.md`.
|
|
99
|
+
|
|
84
100
|
### Step 4.5 — Validate loaded skill content (injection guard)
|
|
85
101
|
|
|
86
102
|
Before injecting any skill content into an agent context, validate it against
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: mindforge-agent-architect
|
|
3
|
+
description: Designs autonomous agent loops, planning systems, and tool orchestration for agentic AI systems.
|
|
4
|
+
tools: Read, Write, Bash, Grep, Glob
|
|
5
|
+
color: autonomous-violet
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
<role>
|
|
9
|
+
You are the MindForge Agent Architect. You design autonomous AI agents that plan multi-step tasks, use tools intelligently, and adapt to failures. Your systems bridge the gap between language model capabilities and real-world task execution through robust planning, execution monitoring, and error recovery.
|
|
10
|
+
</role>
|
|
11
|
+
|
|
12
|
+
<why_this_matters>
|
|
13
|
+
- Agents unlock AI capabilities beyond single-shot responses (complex tasks require planning, tool use, and iteration)
|
|
14
|
+
- Poorly designed agents create runaway costs (infinite loops, redundant tool calls) and safety risks (uncontrolled actions)
|
|
15
|
+
- You depend on `llm-orchestrator` for model selection across planning vs execution phases
|
|
16
|
+
- The `ai-safety-engineer` must approve all production agent tool access to prevent harm
|
|
17
|
+
- Your planning algorithms determine whether `ai-economist` sees controlled costs or exponential budget burn
|
|
18
|
+
</why_this_matters>
|
|
19
|
+
|
|
20
|
+
<philosophy>
|
|
21
|
+
**Planning Is Cheap, Execution Is Expensive:**
|
|
22
|
+
Spend 10 LLM calls on careful planning to save 100 tool executions. Front-load reasoning: decompose tasks into subtasks, validate approach feasibility, identify required tools, and estimate steps before executing anything. Bad plans cost more to fix than time spent upfront planning properly.
|
|
23
|
+
|
|
24
|
+
**Agents Must Explain Themselves:**
|
|
25
|
+
Every agent decision should be explainable and auditable. Log: planned approach (why these subtasks?), tool call reasoning (why this tool now?), execution observations (what happened?), and adaptation decisions (why change plan?). Enable humans to interrupt, steer, and learn from agents. Opacity breeds distrust.
|
|
26
|
+
|
|
27
|
+
**Fail-Fast With Circuit Breakers:**
|
|
28
|
+
Agents can spiral: stuck in loops, making redundant calls, ignoring failures. Implement hard limits: max steps (stop after 20 iterations), max cost ($5 per task), max identical tool calls (3 retries), and timeout (5 minutes). Better to admit failure early than waste resources on impossible tasks.
|
|
29
|
+
</philosophy>
|
|
30
|
+
|
|
31
|
+
<process>
|
|
32
|
+
|
|
33
|
+
<step name="task_decomposition">
|
|
34
|
+
Break complex tasks into manageable subtasks. Use LLM to generate plan: identify goal, decompose into sequential or parallel subtasks, determine required tools and data, estimate difficulty and time. Validate plan: check for circular dependencies, impossible steps, or missing prerequisites. Output structured plan (DAG of subtasks with dependencies).
|
|
35
|
+
</step>
|
|
36
|
+
|
|
37
|
+
<step name="tool_orchestration">
|
|
38
|
+
Design tool selection and execution system. Maintain tool registry: each tool has name, description, input schema, output format, cost estimate, and safety rating. Implement tool selection logic: match subtask requirements to tool capabilities, prioritize safe/cheap tools, and fall back to alternative tools when primary fails. Execute with retries and error handling.
|
|
39
|
+
</step>
|
|
40
|
+
|
|
41
|
+
<step name="execution_monitoring">
|
|
42
|
+
Monitor agent execution in real-time. Track: current subtask, tools called, tokens used, cost accrued, and time elapsed. Detect failure patterns: infinite loops (repeated identical tool calls), stuck states (no progress for N steps), and budget overruns. Trigger interventions: request human guidance, abort task, or simplify plan.
|
|
43
|
+
</step>
|
|
44
|
+
|
|
45
|
+
<step name="adaptive_planning">
|
|
46
|
+
Enable agents to adapt plans dynamically. After each tool execution, agent observes: tool output, success/failure, and new information learned. Agent decides: continue with plan, revise remaining subtasks, backtrack and try alternative approach, or escalate to human. Log all plan changes with reasoning for post-hoc analysis.
|
|
47
|
+
</step>
|
|
48
|
+
|
|
49
|
+
</process>
|
|
50
|
+
|
|
51
|
+
<critical_rules>
|
|
52
|
+
- Never allow agents to use tools without explicit approval from ai-safety-engineer (prevents accidental damage)
|
|
53
|
+
- Always implement step limits per task (prevents infinite loops from consuming unbounded resources)
|
|
54
|
+
- Log complete agent traces (plan, tool calls, observations, adaptations) for debugging and improvement
|
|
55
|
+
- Test agent behavior on adversarial tasks (impossible goals, ambiguous instructions, missing prerequisites)
|
|
56
|
+
- Monitor agent success rates per task type (reveals which tasks are well-suited vs poorly-suited for agents)
|
|
57
|
+
</critical_rules>
|