mindforge-cc 10.0.2 → 10.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. package/.mindforge/config.json +73 -2
  2. package/.mindforge/engine/autonomous/cross-iteration-bridge.md +96 -0
  3. package/.mindforge/engine/cost-tracking/budget-enforcer.md +68 -0
  4. package/.mindforge/engine/cost-tracking/router.md +58 -0
  5. package/.mindforge/engine/cost-tracking/token-ledger.md +77 -0
  6. package/.mindforge/engine/council/council-protocol.md +96 -0
  7. package/.mindforge/engine/council/council-templates.md +85 -0
  8. package/.mindforge/engine/council/synthesis-engine.md +71 -0
  9. package/.mindforge/engine/cross-model-eval.md +74 -0
  10. package/.mindforge/engine/instincts/capture-engine.md +63 -0
  11. package/.mindforge/engine/instincts/instinct-schema.md +76 -0
  12. package/.mindforge/engine/instincts/promotion-engine.md +77 -0
  13. package/.mindforge/engine/proactive/signal-detector.md +60 -0
  14. package/.mindforge/engine/proactive/suggestion-engine.md +100 -0
  15. package/.mindforge/engine/skills/composition.md +83 -0
  16. package/.mindforge/engine/skills/loader.md +16 -0
  17. package/.mindforge/personas/agent-architect.md +57 -0
  18. package/.mindforge/personas/agent-evaluator.md +162 -0
  19. package/.mindforge/personas/agent-memory-designer.md +157 -0
  20. package/.mindforge/personas/agent-ops-engineer.md +120 -0
  21. package/.mindforge/personas/agent-orchestrator.md +112 -0
  22. package/.mindforge/personas/ai-economist.md +57 -0
  23. package/.mindforge/personas/ai-safety-engineer.md +57 -0
  24. package/.mindforge/personas/analytics-engineer.md +57 -0
  25. package/.mindforge/personas/anti-pattern-hunter.md +61 -0
  26. package/.mindforge/personas/api-gateway-designer.md +132 -0
  27. package/.mindforge/personas/auth-engineer.md +112 -0
  28. package/.mindforge/personas/build-engineer.md +57 -0
  29. package/.mindforge/personas/business-analyst.md +56 -0
  30. package/.mindforge/personas/cache-architect.md +100 -0
  31. package/.mindforge/personas/causal-scientist.md +57 -0
  32. package/.mindforge/personas/cdn-architect.md +118 -0
  33. package/.mindforge/personas/change-agent.md +104 -0
  34. package/.mindforge/personas/code-narrator.md +52 -0
  35. package/.mindforge/personas/codegen-specialist.md +68 -0
  36. package/.mindforge/personas/communication-architect.md +102 -0
  37. package/.mindforge/personas/compliance-engineer.md +96 -0
  38. package/.mindforge/personas/consensus-engineer.md +116 -0
  39. package/.mindforge/personas/contract-tester.md +60 -192
  40. package/.mindforge/personas/cost-optimizer.md +71 -0
  41. package/.mindforge/personas/council-architect.md +66 -0
  42. package/.mindforge/personas/council-critic.md +67 -0
  43. package/.mindforge/personas/council-pragmatist.md +71 -0
  44. package/.mindforge/personas/council-skeptic.md +73 -0
  45. package/.mindforge/personas/data-architect.md +108 -0
  46. package/.mindforge/personas/data-mesh-architect.md +57 -0
  47. package/.mindforge/personas/data-pipeline-architect.md +120 -0
  48. package/.mindforge/personas/de-sloppifier.md +60 -0
  49. package/.mindforge/personas/debt-manager.md +66 -0
  50. package/.mindforge/personas/decision-architect.md +82 -51
  51. package/.mindforge/personas/deployment-captain.md +74 -0
  52. package/.mindforge/personas/design-system-lead.md +112 -0
  53. package/.mindforge/personas/dmux-orchestrator.md +75 -0
  54. package/.mindforge/personas/doc-auditor.md +84 -0
  55. package/.mindforge/personas/dx-engineer.md +96 -0
  56. package/.mindforge/personas/ecommerce-engineer.md +57 -0
  57. package/.mindforge/personas/edge-engineer.md +94 -0
  58. package/.mindforge/personas/edtech-architect.md +106 -0
  59. package/.mindforge/personas/embedding-architect.md +57 -0
  60. package/.mindforge/personas/environment-engineer.md +57 -0
  61. package/.mindforge/personas/eval-judge.md +55 -0
  62. package/.mindforge/personas/event-architect.md +102 -0
  63. package/.mindforge/personas/experiment-designer.md +138 -0
  64. package/.mindforge/personas/feature-store-engineer.md +57 -0
  65. package/.mindforge/personas/finops-analyst.md +66 -0
  66. package/.mindforge/personas/fintech-architect.md +57 -0
  67. package/.mindforge/personas/flutter-engineer.md +104 -0
  68. package/.mindforge/personas/gaming-engineer.md +57 -0
  69. package/.mindforge/personas/graphql-designer.md +73 -0
  70. package/.mindforge/personas/healthcare-engineer.md +57 -0
  71. package/.mindforge/personas/hiring-strategist.md +105 -0
  72. package/.mindforge/personas/hitl-architect.md +165 -0
  73. package/.mindforge/personas/i18n-architect.md +69 -0
  74. package/.mindforge/personas/instinct-curator.md +83 -0
  75. package/.mindforge/personas/iot-architect.md +105 -0
  76. package/.mindforge/personas/knowledge-curator.md +139 -0
  77. package/.mindforge/personas/knowledge-engineer.md +57 -0
  78. package/.mindforge/personas/lakehouse-architect.md +57 -0
  79. package/.mindforge/personas/llm-orchestrator.md +57 -0
  80. package/.mindforge/personas/logistics-architect.md +106 -0
  81. package/.mindforge/personas/market-analyst.md +53 -0
  82. package/.mindforge/personas/marketplace-engineer.md +105 -0
  83. package/.mindforge/personas/mcp-designer.md +54 -0
  84. package/.mindforge/personas/meeting-designer.md +104 -0
  85. package/.mindforge/personas/mentorship-lead.md +106 -0
  86. package/.mindforge/personas/migration-architect.md +57 -0
  87. package/.mindforge/personas/ml-ops-engineer.md +101 -0
  88. package/.mindforge/personas/mobile-architect.md +105 -0
  89. package/.mindforge/personas/mobile-security-engineer.md +106 -0
  90. package/.mindforge/personas/multi-model-bridge.md +86 -0
  91. package/.mindforge/personas/multi-tenancy-architect.md +71 -0
  92. package/.mindforge/personas/multimodal-engineer.md +57 -0
  93. package/.mindforge/personas/offline-specialist.md +105 -0
  94. package/.mindforge/personas/onboarding-navigator.md +63 -0
  95. package/.mindforge/personas/payments-engineer.md +135 -0
  96. package/.mindforge/personas/pipeline-engineer.md +115 -0
  97. package/.mindforge/personas/platform-engineer.md +97 -0
  98. package/.mindforge/personas/platform-lead.md +57 -0
  99. package/.mindforge/personas/privacy-engineer.md +57 -0
  100. package/.mindforge/personas/product-owner.md +56 -0
  101. package/.mindforge/personas/productivity-analyst.md +57 -0
  102. package/.mindforge/personas/prompt-architect.md +101 -0
  103. package/.mindforge/personas/proofreader.md +53 -0
  104. package/.mindforge/personas/pwa-architect.md +105 -0
  105. package/.mindforge/personas/quality-scorer.md +63 -0
  106. package/.mindforge/personas/react-native-engineer.md +106 -0
  107. package/.mindforge/personas/resilience-engineer.md +69 -0
  108. package/.mindforge/personas/rfc-architect.md +64 -0
  109. package/.mindforge/personas/saga-orchestrator.md +80 -0
  110. package/.mindforge/personas/secrets-engineer.md +57 -0
  111. package/.mindforge/personas/skill-smith.md +79 -0
  112. package/.mindforge/personas/sre-lead.md +107 -0
  113. package/.mindforge/personas/stream-engineer.md +57 -0
  114. package/.mindforge/personas/streaming-engineer.md +64 -0
  115. package/.mindforge/personas/swarm-templates.json +695 -38
  116. package/.mindforge/personas/system-designer.md +57 -0
  117. package/.mindforge/personas/team-coach.md +120 -0
  118. package/.mindforge/personas/tech-lead-coach.md +103 -0
  119. package/.mindforge/personas/technical-writer-lead.md +111 -0
  120. package/.mindforge/personas/threat-modeler.md +82 -0
  121. package/.mindforge/personas/vibe-checker.md +75 -0
  122. package/.mindforge/personas/worktree-manager.md +56 -0
  123. package/.mindforge/personas/zero-trust-engineer.md +113 -0
  124. package/.mindforge/skills/a11y-testing/SKILL.md +143 -0
  125. package/.mindforge/skills/agent-evaluation-framework/SKILL.md +227 -0
  126. package/.mindforge/skills/agent-introspection-debugging/SKILL.md +88 -0
  127. package/.mindforge/skills/agent-loops/SKILL.md +84 -0
  128. package/.mindforge/skills/agent-memory-design/SKILL.md +199 -0
  129. package/.mindforge/skills/agent-orchestration-patterns/SKILL.md +129 -0
  130. package/.mindforge/skills/agent-tool-selection/SKILL.md +204 -0
  131. package/.mindforge/skills/ai-agent-deployment/SKILL.md +176 -0
  132. package/.mindforge/skills/ai-cost-management/SKILL.md +57 -0
  133. package/.mindforge/skills/ai-safety-alignment/SKILL.md +53 -0
  134. package/.mindforge/skills/analytics-instrumentation/SKILL.md +172 -0
  135. package/.mindforge/skills/api-gateway-patterns/SKILL.md +177 -0
  136. package/.mindforge/skills/api-marketplace/SKILL.md +56 -0
  137. package/.mindforge/skills/api-versioning/SKILL.md +100 -0
  138. package/.mindforge/skills/app-store-deployment/SKILL.md +44 -0
  139. package/.mindforge/skills/architecture-tradeoff-analysis/SKILL.md +97 -0
  140. package/.mindforge/skills/audit-logging/SKILL.md +140 -0
  141. package/.mindforge/skills/auth-patterns/SKILL.md +148 -0
  142. package/.mindforge/skills/autonomous-agent-harness/SKILL.md +218 -0
  143. package/.mindforge/skills/autonomous-agents/SKILL.md +59 -0
  144. package/.mindforge/skills/autonomous-loops/SKILL.md +105 -0
  145. package/.mindforge/skills/build-system-optimization/SKILL.md +54 -0
  146. package/.mindforge/skills/build-vs-buy/SKILL.md +80 -0
  147. package/.mindforge/skills/bundle-optimization/SKILL.md +174 -0
  148. package/.mindforge/skills/business-analyst/SKILL.md +82 -0
  149. package/.mindforge/skills/caching-strategies/SKILL.md +132 -0
  150. package/.mindforge/skills/capacity-planning/SKILL.md +96 -0
  151. package/.mindforge/skills/causal-inference/SKILL.md +42 -0
  152. package/.mindforge/skills/cdn-optimization/SKILL.md +212 -0
  153. package/.mindforge/skills/change-management/SKILL.md +106 -0
  154. package/.mindforge/skills/chaos-engineering/SKILL.md +99 -0
  155. package/.mindforge/skills/ci-cd-pipeline/SKILL.md +118 -0
  156. package/.mindforge/skills/cli-design/SKILL.md +118 -0
  157. package/.mindforge/skills/code-generation-patterns/SKILL.md +92 -0
  158. package/.mindforge/skills/code-review-methodology/SKILL.md +180 -0
  159. package/.mindforge/skills/code-tour/SKILL.md +145 -0
  160. package/.mindforge/skills/codebase-onboarding/SKILL.md +95 -0
  161. package/.mindforge/skills/compliance-as-code/SKILL.md +195 -0
  162. package/.mindforge/skills/conflict-resolution/SKILL.md +87 -0
  163. package/.mindforge/skills/connection-pooling/SKILL.md +151 -0
  164. package/.mindforge/skills/container-security/SKILL.md +151 -0
  165. package/.mindforge/skills/context-engineering/SKILL.md +114 -0
  166. package/.mindforge/skills/continuous-learning/SKILL.md +84 -0
  167. package/.mindforge/skills/contract-testing/SKILL.md +85 -0
  168. package/.mindforge/skills/cost-aware-routing/SKILL.md +83 -0
  169. package/.mindforge/skills/cost-estimation/SKILL.md +82 -0
  170. package/.mindforge/skills/council/SKILL.md +68 -0
  171. package/.mindforge/skills/cqrs-event-sourcing/SKILL.md +95 -0
  172. package/.mindforge/skills/cross-platform-testing/SKILL.md +43 -0
  173. package/.mindforge/skills/data-governance/SKILL.md +42 -0
  174. package/.mindforge/skills/data-lakehouse/SKILL.md +42 -0
  175. package/.mindforge/skills/data-mesh/SKILL.md +42 -0
  176. package/.mindforge/skills/data-modeling/SKILL.md +107 -0
  177. package/.mindforge/skills/data-pipeline-design/SKILL.md +171 -0
  178. package/.mindforge/skills/data-privacy-engineering/SKILL.md +42 -0
  179. package/.mindforge/skills/database-performance/SKILL.md +174 -0
  180. package/.mindforge/skills/database-sharding-advanced/SKILL.md +206 -0
  181. package/.mindforge/skills/de-sloppify/SKILL.md +120 -0
  182. package/.mindforge/skills/defense-in-depth/SKILL.md +84 -0
  183. package/.mindforge/skills/delegation-patterns/SKILL.md +123 -0
  184. package/.mindforge/skills/dependency-management/SKILL.md +94 -0
  185. package/.mindforge/skills/deployment-workflow/SKILL.md +135 -0
  186. package/.mindforge/skills/design-system/SKILL.md +113 -0
  187. package/.mindforge/skills/developer-onboarding/SKILL.md +99 -0
  188. package/.mindforge/skills/developer-productivity-metrics/SKILL.md +59 -0
  189. package/.mindforge/skills/distributed-consensus/SKILL.md +141 -0
  190. package/.mindforge/skills/dmux-workflows/SKILL.md +141 -0
  191. package/.mindforge/skills/dns-architecture/SKILL.md +167 -0
  192. package/.mindforge/skills/doc-health-audit/SKILL.md +102 -0
  193. package/.mindforge/skills/ecommerce-architecture/SKILL.md +41 -0
  194. package/.mindforge/skills/edge-computing/SKILL.md +91 -0
  195. package/.mindforge/skills/edtech-platform/SKILL.md +41 -0
  196. package/.mindforge/skills/email-deliverability/SKILL.md +177 -0
  197. package/.mindforge/skills/embedding-systems/SKILL.md +55 -0
  198. package/.mindforge/skills/environment-management/SKILL.md +54 -0
  199. package/.mindforge/skills/error-handling-architecture/SKILL.md +118 -0
  200. package/.mindforge/skills/estimation-techniques/SKILL.md +113 -0
  201. package/.mindforge/skills/eval-harness/SKILL.md +180 -0
  202. package/.mindforge/skills/event-driven-architecture/SKILL.md +162 -0
  203. package/.mindforge/skills/experiment-design/SKILL.md +139 -0
  204. package/.mindforge/skills/experiment-platform/SKILL.md +43 -0
  205. package/.mindforge/skills/feature-engineering/SKILL.md +42 -0
  206. package/.mindforge/skills/feature-flag-management/SKILL.md +183 -0
  207. package/.mindforge/skills/fine-tuning-workflow/SKILL.md +189 -0
  208. package/.mindforge/skills/fintech-patterns/SKILL.md +41 -0
  209. package/.mindforge/skills/flutter-architecture/SKILL.md +42 -0
  210. package/.mindforge/skills/gaming-backend/SKILL.md +41 -0
  211. package/.mindforge/skills/git-workflow-design/SKILL.md +129 -0
  212. package/.mindforge/skills/graceful-degradation/SKILL.md +95 -0
  213. package/.mindforge/skills/graphql-patterns/SKILL.md +243 -0
  214. package/.mindforge/skills/guardrails-and-safety/SKILL.md +137 -0
  215. package/.mindforge/skills/healthcare-systems/SKILL.md +40 -0
  216. package/.mindforge/skills/hiring-engineering/SKILL.md +119 -0
  217. package/.mindforge/skills/human-in-the-loop-design/SKILL.md +234 -0
  218. package/.mindforge/skills/i18n-architecture/SKILL.md +147 -0
  219. package/.mindforge/skills/idempotency-patterns/SKILL.md +84 -0
  220. package/.mindforge/skills/incident-communication/SKILL.md +96 -0
  221. package/.mindforge/skills/incident-management/SKILL.md +97 -0
  222. package/.mindforge/skills/infrastructure-as-code/SKILL.md +98 -0
  223. package/.mindforge/skills/instinct-clustering/SKILL.md +190 -0
  224. package/.mindforge/skills/internal-developer-platform/SKILL.md +51 -0
  225. package/.mindforge/skills/iot-platform/SKILL.md +41 -0
  226. package/.mindforge/skills/k8s-deployment/SKILL.md +358 -0
  227. package/.mindforge/skills/knowledge-graphs/SKILL.md +56 -0
  228. package/.mindforge/skills/knowledge-sharing-systems/SKILL.md +112 -0
  229. package/.mindforge/skills/llm-cost-optimization/SKILL.md +198 -0
  230. package/.mindforge/skills/llm-orchestration/SKILL.md +56 -0
  231. package/.mindforge/skills/load-testing/SKILL.md +84 -0
  232. package/.mindforge/skills/logistics-optimization/SKILL.md +40 -0
  233. package/.mindforge/skills/market-researcher/SKILL.md +99 -0
  234. package/.mindforge/skills/marketplace-trust/SKILL.md +40 -0
  235. package/.mindforge/skills/mcp-server-patterns/SKILL.md +264 -0
  236. package/.mindforge/skills/media-streaming/SKILL.md +41 -0
  237. package/.mindforge/skills/meeting-architecture/SKILL.md +146 -0
  238. package/.mindforge/skills/mentoring-patterns/SKILL.md +77 -0
  239. package/.mindforge/skills/microservices-patterns/SKILL.md +83 -0
  240. package/.mindforge/skills/migration-platform/SKILL.md +61 -0
  241. package/.mindforge/skills/migration-strategies/SKILL.md +129 -0
  242. package/.mindforge/skills/ml-feature-store/SKILL.md +56 -0
  243. package/.mindforge/skills/ml-monitoring/SKILL.md +42 -0
  244. package/.mindforge/skills/mobile-performance/SKILL.md +44 -0
  245. package/.mindforge/skills/mobile-security/SKILL.md +45 -0
  246. package/.mindforge/skills/model-evaluation/SKILL.md +53 -0
  247. package/.mindforge/skills/monorepo-management/SKILL.md +100 -0
  248. package/.mindforge/skills/multi-llm-consult/SKILL.md +75 -0
  249. package/.mindforge/skills/multi-tenancy-patterns/SKILL.md +145 -0
  250. package/.mindforge/skills/multi-turn-conversation-design/SKILL.md +206 -0
  251. package/.mindforge/skills/multimodal-ai/SKILL.md +51 -0
  252. package/.mindforge/skills/mutation-testing/SKILL.md +97 -0
  253. package/.mindforge/skills/notification-system-design/SKILL.md +168 -0
  254. package/.mindforge/skills/observability-stack/SKILL.md +136 -0
  255. package/.mindforge/skills/offline-first-design/SKILL.md +43 -0
  256. package/.mindforge/skills/on-call-design/SKILL.md +111 -0
  257. package/.mindforge/skills/pagination-patterns/SKILL.md +230 -0
  258. package/.mindforge/skills/payment-integration/SKILL.md +176 -0
  259. package/.mindforge/skills/performance-reviews/SKILL.md +140 -0
  260. package/.mindforge/skills/platform-observability/SKILL.md +58 -0
  261. package/.mindforge/skills/platform-reliability/SKILL.md +52 -0
  262. package/.mindforge/skills/post-incident-learning/SKILL.md +96 -0
  263. package/.mindforge/skills/product-manager/SKILL.md +104 -0
  264. package/.mindforge/skills/progressive-web-app/SKILL.md +44 -0
  265. package/.mindforge/skills/prompt-engineering/SKILL.md +94 -0
  266. package/.mindforge/skills/proofreader/SKILL.md +158 -0
  267. package/.mindforge/skills/push-notification-architecture/SKILL.md +45 -0
  268. package/.mindforge/skills/python-performance/SKILL.md +183 -0
  269. package/.mindforge/skills/quality-audit/SKILL.md +171 -0
  270. package/.mindforge/skills/queue-design/SKILL.md +85 -0
  271. package/.mindforge/skills/rag-architecture/SKILL.md +176 -0
  272. package/.mindforge/skills/rate-limiting-design/SKILL.md +94 -0
  273. package/.mindforge/skills/react-native-patterns/SKILL.md +42 -0
  274. package/.mindforge/skills/react-performance/SKILL.md +229 -0
  275. package/.mindforge/skills/real-time-analytics/SKILL.md +42 -0
  276. package/.mindforge/skills/real-time-sync/SKILL.md +83 -0
  277. package/.mindforge/skills/responsive-native/SKILL.md +44 -0
  278. package/.mindforge/skills/responsive-patterns/SKILL.md +141 -0
  279. package/.mindforge/skills/rfc-pipeline/SKILL.md +114 -0
  280. package/.mindforge/skills/saas-multi-tenant/SKILL.md +41 -0
  281. package/.mindforge/skills/santa-method/SKILL.md +134 -0
  282. package/.mindforge/skills/search-implementation/SKILL.md +98 -0
  283. package/.mindforge/skills/secrets-platform/SKILL.md +56 -0
  284. package/.mindforge/skills/secrets-rotation/SKILL.md +173 -0
  285. package/.mindforge/skills/self-serve-infrastructure/SKILL.md +51 -0
  286. package/.mindforge/skills/serverless-patterns/SKILL.md +119 -0
  287. package/.mindforge/skills/skill-creator-meta/SKILL.md +146 -0
  288. package/.mindforge/skills/sprint-retrospective-facilitation/SKILL.md +112 -0
  289. package/.mindforge/skills/stakeholder-communication/SKILL.md +85 -0
  290. package/.mindforge/skills/state-management/SKILL.md +104 -0
  291. package/.mindforge/skills/stream-processing/SKILL.md +43 -0
  292. package/.mindforge/skills/streaming-architecture/SKILL.md +81 -0
  293. package/.mindforge/skills/supply-chain-security/SKILL.md +145 -0
  294. package/.mindforge/skills/synthetic-data-generation/SKILL.md +52 -0
  295. package/.mindforge/skills/system-design/SKILL.md +88 -0
  296. package/.mindforge/skills/team-topology-design/SKILL.md +107 -0
  297. package/.mindforge/skills/technical-debt-management/SKILL.md +86 -0
  298. package/.mindforge/skills/technical-interview-design/SKILL.md +98 -0
  299. package/.mindforge/skills/technical-leadership/SKILL.md +75 -0
  300. package/.mindforge/skills/technical-writing/SKILL.md +237 -0
  301. package/.mindforge/skills/technology-radar/SKILL.md +88 -0
  302. package/.mindforge/skills/testing-anti-patterns/SKILL.md +288 -0
  303. package/.mindforge/skills/threat-modeling/SKILL.md +109 -0
  304. package/.mindforge/skills/tool-design/SKILL.md +138 -0
  305. package/.mindforge/skills/typescript-advanced/SKILL.md +198 -0
  306. package/.mindforge/skills/using-git-worktrees/SKILL.md +139 -0
  307. package/.mindforge/skills/verification-loop/SKILL.md +97 -0
  308. package/.mindforge/skills/vibe-security/SKILL.md +165 -0
  309. package/.mindforge/skills/visual-regression-testing/SKILL.md +97 -0
  310. package/.mindforge/skills/websocket-patterns/SKILL.md +203 -0
  311. package/.mindforge/skills/writing-plans/SKILL.md +170 -0
  312. package/.mindforge/skills/writing-skills/SKILL.md +216 -0
  313. package/.mindforge/skills/zero-trust-architecture/SKILL.md +166 -0
  314. package/CHANGELOG.md +195 -0
  315. package/MINDFORGE.md +4 -4
  316. package/README.md +2 -2
  317. package/RELEASENOTES.md +66 -0
  318. package/bin/installer-core.js +1 -1
  319. package/bin/wizard/theme.js +2 -2
  320. package/docs/commands-reference.md +18 -1
  321. package/package.json +2 -2
  322. package/.mindforge/personas/data-privacy-engineer.md +0 -187
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: data-governance
3
+ version: 1.0.0
4
+ min_mindforge_version: 10.6.0
5
+ status: stable
6
+ triggers: data governance framework, data catalog implementation, data lineage tracking, data access control, data quality framework, data stewardship, metadata management, data classification, data retention policy, data discovery platform, data ownership, data compliance framework
7
+ ---
8
+
9
+ # Skill — Data Governance
10
+
11
+ ## When this skill activates
12
+ This skill activates when implementing data catalog systems, establishing data ownership models, building lineage tracking, or designing access control frameworks. Use when organizations need to scale data democratization while maintaining compliance and quality.
13
+
14
+ ## Mandatory actions when this skill is active
15
+
16
+ ### Before writing any code
17
+ 1. Define data classification taxonomy with clear criteria: public, internal, confidential, restricted with handling requirements for each tier
18
+ 2. Establish data ownership model: domain owners, data stewards, technical custodians with RACI matrix for responsibilities
19
+ 3. Document compliance requirements: GDPR, CCPA, HIPAA, SOC2 with specific technical controls needed for each regulation
20
+ 4. Design metadata schema capturing: business definitions, technical specifications, quality metrics, lineage, and access policies
21
+
22
+ ### During implementation
23
+ - Build automated data catalog discovery scanning databases, data lakes, APIs, and file systems to populate metadata repository
24
+ - Implement column-level lineage tracking from source systems through transformations to final consumption with impact analysis capabilities
25
+ - Create role-based access control (RBAC) with attribute-based policies (ABAC) for dynamic access based on data classification and user context
26
+ - Establish data quality framework with profiling rules, validation checks, and quality scores at dataset and column level
27
+ - Implement data retention policies with automated archival and deletion workflows based on regulatory requirements and business rules
28
+ - Build data stewardship workflows for metadata enrichment: business glossary terms, data ownership assignment, quality issue resolution
29
+ - Create audit logging for all data access, modifications, and policy changes with immutable trail for compliance reporting
30
+
31
+ ### After implementation
32
+ - Deploy self-serve data discovery portal with search, business glossary, quality indicators, and access request workflows
33
+ - Generate automated data quality reports with trend analysis, anomaly detection, and stakeholder-specific dashboards
34
+ - Create compliance audit packages with evidence of controls: access logs, retention proof, encryption verification, lineage documentation
35
+ - Build data governance metrics dashboard: catalog coverage, metadata completeness, quality score trends, access request SLA
36
+
37
+ ## Self-check before task completion
38
+ - [ ] Data catalog covers 90%+ of production data assets with accurate business metadata
39
+ - [ ] Lineage tracking provides end-to-end visibility from source to consumption with transformation logic
40
+ - [ ] Access control policies enforced at query time with separation of duties for sensitive data
41
+ - [ ] Data quality framework monitors critical datasets with automated alerting on quality degradation
42
+ - [ ] Compliance documentation generated automatically with evidence trails for audit requirements
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: data-lakehouse
3
+ version: 1.0.0
4
+ min_mindforge_version: 10.6.0
5
+ status: stable
6
+ triggers: data lakehouse architecture, medallion architecture, schema evolution lakehouse, time travel data, partition optimization, Delta Lake implementation, Iceberg table design, lakehouse query performance, data lakehouse governance, lakehouse ingestion, lakehouse serving layer, lakehouse cost optimization
7
+ ---
8
+
9
+ # Skill — Data Lakehouse
10
+
11
+ ## When this skill activates
12
+ This skill activates when implementing lakehouse architectures combining data lake flexibility with data warehouse performance. Use when building Delta Lake, Iceberg, or Hudi tables with ACID guarantees, schema evolution, and time travel capabilities.
13
+
14
+ ## Mandatory actions when this skill is active
15
+
16
+ ### Before writing any code
17
+ 1. Design medallion architecture layers: bronze (raw ingestion), silver (cleansed/conformed), gold (business-level aggregates) with clear promotion criteria
18
+ 2. Select table format (Delta, Iceberg, Hudi) based on requirements: write patterns, query patterns, ecosystem compatibility, and feature needs
19
+ 3. Plan partitioning strategy based on query patterns: typically date/time for time-series, geography for location-based, or composite keys avoiding over-partitioning (<1000 partitions)
20
+ 4. Define schema evolution policy: additive changes (safe), nullable to required (breaking), type changes (migration required) with versioning strategy
21
+
22
+ ### During implementation
23
+ - Implement ACID transactions for atomic writes: use table format's transaction log (Delta Log, Iceberg metadata) to ensure consistency
24
+ - Configure file sizing for optimal query performance: target 128MB-1GB per file, run regular OPTIMIZE/COMPACT operations to prevent small files
25
+ - Enable time travel with retention policy: maintain snapshots for point-in-time queries and audit, configure vacuum/expire based on compliance needs
26
+ - Design incremental processing patterns: merge/upsert operations for CDC, append for event streams, overwrite partitions for batch updates
27
+ - Implement Z-ordering or clustering on frequently filtered columns (non-partition keys) to improve query performance via data skipping
28
+ - Build schema evolution handlers: automatic schema merging for new columns, validation for breaking changes, schema registry integration
29
+ - Create data quality checkpoints between medallion layers: row counts, null checks, referential integrity, business rule validation with quarantine tables
30
+
31
+ ### After implementation
32
+ - Monitor table health metrics: file count, average file size, partition count, metadata size, and compaction needs
33
+ - Build cost optimization reports: storage by layer, compute for jobs, query costs, and opportunities for partition pruning or materialization
34
+ - Create governance controls: table-level access policies, column masking, row filtering, and audit logging for sensitive data access
35
+ - Generate performance analysis: query patterns, partition pruning effectiveness, file skipping statistics, and optimization recommendations
36
+
37
+ ## Self-check before task completion
38
+ - [ ] Medallion layers clearly defined with data quality gates between bronze → silver → gold promotions
39
+ - [ ] Partitioning strategy optimized for query patterns with validation that queries prune partitions effectively
40
+ - [ ] ACID transactions tested with concurrent writes and failure scenarios to ensure consistency
41
+ - [ ] Schema evolution tested with backward and forward compatibility for common evolution scenarios
42
+ - [ ] File management strategy (OPTIMIZE/VACUUM) scheduled with monitoring for small file accumulation
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: data-mesh
3
+ version: 1.0.0
4
+ min_mindforge_version: 10.6.0
5
+ status: stable
6
+ triggers: data mesh architecture, domain data ownership, data product design, federated data governance, self-serve data platform, data mesh implementation, data contract mesh, data domain boundary, mesh interoperability, decentralized data ownership, data product specification, domain-driven data
7
+ ---
8
+
9
+ # Skill — Data Mesh
10
+
11
+ ## When this skill activates
12
+ This skill activates when implementing data mesh architectures with domain-oriented ownership, federated governance, and data-as-a-product principles. Use when centralizing data management becomes a bottleneck and domains need autonomy with interoperability.
13
+
14
+ ## Mandatory actions when this skill is active
15
+
16
+ ### Before writing any code
17
+ 1. Define domain boundaries using domain-driven design: bounded contexts, ubiquitous language, core domains vs supporting domains with ownership mapping
18
+ 2. Establish data product specification template: SLAs, schemas, semantics, access controls, versioning, and quality guarantees each domain must provide
19
+ 3. Design federated computational governance: global standards (discovery, security, interoperability) enforced through platform, local decisions (tech stack, modeling) owned by domains
20
+ 4. Create self-serve data platform capabilities: provisioning automation, observability tools, discovery services, and development environments domains can use independently
21
+
22
+ ### During implementation
23
+ - Build data product registry with standardized metadata: domain owner, SLAs, schemas, sample data, access request process, and consumer feedback
24
+ - Implement data contracts between domains: schema definitions, backward compatibility guarantees, deprecation policies, and breaking change notifications
25
+ - Create domain-agnostic platform services: infrastructure provisioning (IaC templates), CI/CD pipelines, monitoring dashboards, and cost allocation
26
+ - Design data product APIs with consistency: REST for batch, streaming for real-time, query engines for analytical, with versioning and deprecation paths
27
+ - Establish quality frameworks domains must implement: data validation, profiling, lineage tracking, incident response with federated monitoring
28
+ - Build interoperability layer: common data types, standard formats (Parquet, Avro), semantic layer, and cross-domain joins through data products not direct access
29
+ - Implement federated identity and access: domain-owned authorization, centralized authentication, audit logging, and privacy controls enforced at platform level
30
+
31
+ ### After implementation
32
+ - Create data product marketplace: searchable catalog, quality scores, usage analytics, consumer reviews, and onboarding documentation
33
+ - Build platform health metrics: provisioning time, incident resolution SLA, platform uptime, and developer satisfaction scores
34
+ - Generate federated governance reports: compliance by domain, quality trends, cross-domain dependencies, and policy violations
35
+ - Document domain interaction patterns: producer-consumer relationships, data sharing agreements, and conflict resolution processes
36
+
37
+ ## Self-check before task completion
38
+ - [ ] Domain boundaries clearly defined with ownership assignments and RACI matrix for responsibilities
39
+ - [ ] Data products meet platform standards for discoverability, access control, quality, and SLAs
40
+ - [ ] Self-serve platform enables domains to provision, deploy, and monitor data products independently
41
+ - [ ] Federated governance enforces global standards while allowing domain autonomy in implementation
42
+ - [ ] Interoperability tested across domains through data product contracts and APIs, not direct database access
@@ -0,0 +1,107 @@
1
+ ---
2
+ name: data-modeling
3
+ version: 1.0.0
4
+ min_mindforge_version: 10.0.7
5
+ status: stable
6
+ triggers: data modeling, dimensional model design, star schema design, snowflake schema design, normalization decision, schema evolution strategy, data contract definition, slowly changing dimension, entity relationship design, data warehouse modeling, schema lifecycle, data lineage mapping
7
+ ---
8
+
9
+ # Data Modeling
10
+
11
+ ## When this skill activates
12
+
13
+ This skill activates when the user is designing, implementing, or evolving data models.
14
+ This includes entity-relationship design, dimensional modeling (star/snowflake schemas),
15
+ normalization decisions, slowly changing dimension strategies, schema evolution planning,
16
+ data contract definitions between producers and consumers, and data lineage mapping
17
+ across transformation pipelines.
18
+
19
+ ## Mandatory actions
20
+
21
+ ### Before
22
+
23
+ 1. Identify the workload type: OLTP (transactional) vs OLAP (analytical) vs hybrid.
24
+ 2. Determine the primary consumers of the data (applications, analysts, ML pipelines).
25
+ 3. Assess data volume, velocity, and variety characteristics.
26
+ 4. Review existing schemas and their evolution history.
27
+ 5. Identify upstream data sources and downstream consumers (lineage context).
28
+
29
+ ### During
30
+
31
+ **Modeling Phases (Conceptual to Logical to Physical):**
32
+ - **Conceptual:** Business entities and relationships, no implementation details. Stakeholder-readable.
33
+ - **Logical:** Attributes, data types, keys, constraints. Technology-agnostic.
34
+ - **Physical:** Indexes, partitions, storage engines, materialized views. Technology-specific.
35
+ - Always start conceptual, refine to logical, then optimize physical. Never skip phases.
36
+
37
+ **Normalization (OLTP):**
38
+ - **1NF:** Eliminate repeating groups; atomic values in every column.
39
+ - **2NF:** Remove partial dependencies (all non-key columns depend on the full primary key).
40
+ - **3NF:** Remove transitive dependencies (non-key columns depend only on the key).
41
+ - **BCNF:** Every determinant is a candidate key.
42
+ - Normalize for OLTP (reduces anomalies, ensures consistency).
43
+ - Denormalize for OLAP (reduces joins, improves query performance).
44
+ - Document every denormalization decision with rationale.
45
+
46
+ **Star Schema (Dimensional Modeling):**
47
+ - **Fact tables:** Measurable events (transactions, clicks, shipments). Contain foreign keys + metrics.
48
+ - **Dimension tables:** Descriptive context (who, what, where, when, how).
49
+ - **Grain definition:** The most atomic level of detail in a fact table. Define grain FIRST.
50
+ - Prefer conformed dimensions (shared across fact tables) for consistency.
51
+ - Junk dimensions: combine low-cardinality flags into a single dimension.
52
+
53
+ **Snowflake Schema:**
54
+ - Use when dimensions have natural sub-hierarchies (geography: country → state → city).
55
+ - Normalizes dimension tables to reduce redundancy.
56
+ - Trade-off: more joins but less storage and clearer hierarchy.
57
+ - Prefer star schema unless dimension table size or update frequency justifies snowflaking.
58
+
59
+ **Slowly Changing Dimensions (SCD):**
60
+ - **Type 0:** Fixed, never changes (date of birth).
61
+ - **Type 1:** Overwrite old value. No history preserved. Use for corrections.
62
+ - **Type 2:** Add new row with version tracking (start_date, end_date, is_current). Full history.
63
+ - **Type 3:** Add "previous" column alongside current. Limited history (one prior value).
64
+ - **Type 6 (Hybrid):** Combines Types 1, 2, and 3 for maximum flexibility.
65
+ - Default to Type 2 unless storage or query complexity is a concern.
66
+
67
+ **Data Contracts:**
68
+ - Agreement between data producer and consumer on schema + semantics + SLA.
69
+ - Schema: field names, types, nullability, constraints.
70
+ - Semantics: business meaning of each field (not just technical definition).
71
+ - SLA: freshness guarantee, completeness threshold, availability window.
72
+ - Enforce contracts via schema validation in pipelines (Great Expectations, dbt tests).
73
+ - Breaking contract changes require notification and migration period.
74
+
75
+ **Schema Evolution:**
76
+ - **Additive (safe):** New optional columns, new tables, new indexes.
77
+ - **Breaking (dangerous):** Column removal, type changes, renaming, adding NOT NULL without default.
78
+ - Use migration scripts (Flyway, Alembic, Liquibase) for all schema changes.
79
+ - Version schemas and maintain a changelog.
80
+ - Test migrations against production-like data volumes before deploying.
81
+
82
+ **Data Lineage:**
83
+ - Track data from source → transformation → consumption.
84
+ - Document at column-level granularity for critical fields.
85
+ - Use tools (dbt lineage graph, Apache Atlas, DataHub) for automated discovery.
86
+ - Lineage enables impact analysis (what breaks if this source changes?).
87
+ - Required for regulatory compliance (GDPR: where does PII flow?).
88
+
89
+ ### After
90
+
91
+ 1. Verify grain is explicitly defined for every fact table.
92
+ 2. Confirm normalization level matches workload type (OLTP normalized, OLAP denormalized).
93
+ 3. Validate SCD strategy is documented for every dimension with mutable attributes.
94
+ 4. Ensure data contracts exist between all critical producer-consumer pairs.
95
+ 5. Check that schema evolution follows additive-first principles.
96
+ 6. Verify lineage is documented for compliance-sensitive data flows.
97
+
98
+ ## Self-check before task completion
99
+
100
+ - [ ] Modeling followed the conceptual → logical → physical progression.
101
+ - [ ] Normalization level is appropriate for the workload (OLTP vs OLAP).
102
+ - [ ] Fact table grain is explicitly defined and documented.
103
+ - [ ] SCD types are chosen and justified for mutable dimensions.
104
+ - [ ] Data contracts define schema, semantics, and SLA for critical interfaces.
105
+ - [ ] Schema evolution strategy avoids breaking changes without migration.
106
+ - [ ] Data lineage is mapped for compliance-sensitive and business-critical paths.
107
+ - [ ] Physical optimizations (indexes, partitions) are justified by query patterns.
@@ -0,0 +1,171 @@
1
+ ---
2
+ name: data-pipeline-design
3
+ version: 1.0.0
4
+ min_mindforge_version: 10.1.1
5
+ status: stable
6
+ triggers: data pipeline design, ETL pipeline, ELT pattern, batch vs streaming pipeline, exactly-once processing pipeline, schema registry pipeline, data quality gate, pipeline orchestration, data ingestion, pipeline backfill, pipeline monitoring, data freshness
7
+ ---
8
+
9
+ # Skill — Data Pipeline Design
10
+
11
+ ## When this skill activates
12
+ Any task involving designing data ingestion, transformation, or delivery pipelines.
13
+ Includes ETL/ELT architecture, batch vs streaming decisions, schema management,
14
+ data quality enforcement, and pipeline orchestration.
15
+
16
+ ## Mandatory actions when this skill is active
17
+
18
+ ### Before writing any code
19
+ 1. Define data contract (schema, freshness SLA, volume, sources, consumers).
20
+ 2. Decide batch vs streaming (latency requirement is the primary driver).
21
+ 3. Identify exactly-once requirements (financial data = must, analytics = can relax).
22
+ 4. Plan schema evolution strategy (backward-compatible changes only).
23
+
24
+ ### During implementation
25
+ - Implement data quality gates before consumers see data.
26
+ - Use schema registry for all structured data exchange.
27
+ - Make all transformations idempotent (safe to re-run).
28
+ - Include dead-letter queues for malformed/failed records.
29
+ - Add lineage tracking (where did this data come from?).
30
+ - Monitor freshness SLA with alerting.
31
+
32
+ ### After implementation
33
+ - Verify backfill capability (can we reprocess historical data?).
34
+ - Test schema evolution (add column, change type) without breaking consumers.
35
+ - Confirm quality gates catch known bad data patterns.
36
+ - Validate freshness SLA is met under normal load.
37
+ - Document data lineage for every output table.
38
+
39
+ ## ETL vs ELT Decision
40
+
41
+ ### ETL (Extract → Transform → Load)
42
+ - Transform before loading into destination.
43
+ - Best for: structured sources, known transformations, data quality at boundary.
44
+ - Tools: Airflow + Python, Spark, custom processors.
45
+ - Advantage: Clean data in warehouse, fewer warehouse compute costs.
46
+
47
+ ### ELT (Extract → Load → Transform)
48
+ - Load raw data, transform in the warehouse/lakehouse.
49
+ - Best for: diverse sources, evolving transformations, exploratory analysis.
50
+ - Tools: Fivetran/Airbyte (extract+load) + dbt (transform).
51
+ - Advantage: Raw data preserved, transformations versioned and testable.
52
+
53
+ ### Decision Matrix
54
+ | Factor | ETL | ELT |
55
+ |--------|-----|-----|
56
+ | Source diversity | Low (known schema) | High (many sources) |
57
+ | Transformation stability | Stable, well-defined | Evolving, experimental |
58
+ | Data volume | Moderate | Very high |
59
+ | Warehouse compute cost | Sensitive | Acceptable |
60
+ | Need raw data access | No | Yes |
61
+
62
+ ## Batch vs Streaming
63
+
64
+ ### Batch Processing
65
+ - Process data in scheduled intervals (hourly, daily).
66
+ - Simpler implementation, easier debugging.
67
+ - Cheaper for high-volume, latency-tolerant workloads.
68
+ - Tools: Airflow, Spark Batch, dbt.
69
+
70
+ ### Stream Processing
71
+ - Process events as they arrive (real-time or near-real-time).
72
+ - Complex: windowing, ordering, late-arriving data.
73
+ - Required when business needs data in <5 minutes.
74
+ - Tools: Kafka Streams, Flink, Spark Structured Streaming.
75
+
76
+ ### Decision: Use streaming only when
77
+ - Business requires <5 minute data freshness.
78
+ - Events must trigger immediate actions (fraud, alerts).
79
+ - Source naturally produces events (clickstream, IoT).
80
+
81
+ Otherwise, batch is simpler and cheaper.
82
+
83
+ ## Exactly-Once Processing
84
+
85
+ ### Why It's Hard
86
+ Network failures + retries = potential duplicates.
87
+
88
+ ### Strategies
89
+ 1. **Idempotent sinks**: Write operations produce same result regardless of repetition (UPSERT, conditional write).
90
+ 2. **Deduplication keys**: Assign unique ID to each record, deduplicate at sink.
91
+ 3. **Checkpointing**: Record progress markers, resume from checkpoint on failure.
92
+ 4. **Transactional outbox**: Atomic write to source + outbox table, separate relay.
93
+
94
+ ### Practical Guarantees
95
+ | Guarantee | Cost | Use When |
96
+ |-----------|------|----------|
97
+ | At-most-once | Lowest | Metrics where loss is acceptable |
98
+ | At-least-once + idempotent sink | Medium | Most pipelines |
99
+ | Exactly-once (Kafka transactions) | Highest | Financial, billing |
100
+
101
+ ## Schema Registry
102
+
103
+ ### Purpose
104
+ - Central source of truth for data schemas.
105
+ - Enforce compatibility between producers and consumers.
106
+ - Enable schema evolution without breaking downstream.
107
+
108
+ ### Compatibility Modes
109
+ - **Backward compatible**: New schema can read old data (add optional fields).
110
+ - **Forward compatible**: Old schema can read new data (remove optional fields).
111
+ - **Full compatible**: Both backward and forward (safest, most restrictive).
112
+
113
+ ### Rules
114
+ - All structured data exchange goes through schema registry.
115
+ - Use Avro or Protobuf (self-describing, compact, evolvable).
116
+ - Test schema changes against compatibility rules in CI.
117
+ - Never break backward compatibility without coordinated migration.
118
+
119
+ ## Data Quality Gates
120
+
121
+ ### Checks to Implement
122
+ | Check | Example | Severity |
123
+ |-------|---------|----------|
124
+ | Not null | Primary keys must exist | CRITICAL |
125
+ | Uniqueness | No duplicate records | CRITICAL |
126
+ | Range | Age between 0-150 | HIGH |
127
+ | Freshness | Data < 1 hour old | HIGH |
128
+ | Volume | Row count ±10% of expected | MEDIUM |
129
+ | Referential | Foreign keys resolve | MEDIUM |
130
+ | Format | Email matches pattern | LOW |
131
+
132
+ ### Implementation
133
+ - Run quality checks BEFORE exposing data to consumers.
134
+ - Quarantine failing records in dead-letter table.
135
+ - Alert on quality degradation trends.
136
+ - Track quality metrics over time (quality score per table).
137
+
138
+ ## Pipeline Orchestration
139
+
140
+ ### Airflow DAG Best Practices
141
+ - One DAG per logical pipeline.
142
+ - Idempotent tasks (re-runnable without side effects).
143
+ - Explicit dependencies (no implicit ordering).
144
+ - SLA alerts for late-running pipelines.
145
+ - Backfill support (catchup=True with idempotent tasks).
146
+ - Retry with exponential backoff for transient failures.
147
+
148
+ ### Monitoring
149
+ - Freshness SLA: alert when data is older than threshold.
150
+ - Pipeline duration: alert on >2x normal runtime.
151
+ - Record count: alert on ±20% deviation from expected.
152
+ - Error rate: alert on >1% record failures.
153
+
154
+ ## Backfill Strategy
155
+
156
+ ### Requirements
157
+ - Every pipeline must support historical reprocessing.
158
+ - Backfill must be idempotent (running twice = same result).
159
+ - Partition by date for efficient backfill of specific ranges.
160
+ - Backfill should not interfere with production pipeline runs.
161
+
162
+ ## Self-check
163
+ - [ ] Data contract defined (schema, freshness, volume).
164
+ - [ ] Batch vs streaming decision justified by latency requirement.
165
+ - [ ] Quality gates implemented before consumer access.
166
+ - [ ] Schema registered and compatibility mode set.
167
+ - [ ] All transformations are idempotent.
168
+ - [ ] Dead-letter queue configured for failures.
169
+ - [ ] Backfill capability tested.
170
+ - [ ] Freshness SLA monitored with alerting.
171
+ - [ ] Data lineage documented.
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: data-privacy-engineering
3
+ version: 1.0.0
4
+ min_mindforge_version: 10.6.0
5
+ status: stable
6
+ triggers: data privacy engineering, differential privacy implementation, anonymization technique, consent management system, privacy-preserving computation, GDPR data engineering, data masking, privacy by design, homomorphic encryption, federated learning privacy, secure multi-party computation, PII detection pipeline
7
+ ---
8
+
9
+ # Skill — Data Privacy Engineering
10
+
11
+ ## When this skill activates
12
+ This skill activates when implementing privacy-preserving data systems, building consent management infrastructure, or applying anonymization techniques. Use when handling sensitive data requires technical controls beyond access restrictions.
13
+
14
+ ## Mandatory actions when this skill is active
15
+
16
+ ### Before writing any code
17
+ 1. Conduct privacy impact assessment: identify PII/sensitive data, data flows, retention requirements, third-party sharing, and regulatory obligations (GDPR, CCPA, HIPAA)
18
+ 2. Define privacy requirements: anonymization level (k-anonymity, l-diversity, differential privacy), consent granularity, right-to-erasure scope, and data minimization principles
19
+ 3. Select appropriate privacy techniques: tokenization (reversible), hashing (one-way), encryption (protected), differential privacy (statistical), synthetic data (replacement)
20
+ 4. Establish privacy testing framework: re-identification risk assessment, privacy budget tracking, consent enforcement verification, and breach simulation
21
+
22
+ ### During implementation
23
+ - Implement automated PII detection pipeline: regex patterns, ML models, NER for unstructured text scanning code, logs, databases, and data lakes
24
+ - Build tokenization service with: format-preserving encryption for display, secure token vault, key rotation, and performance caching for high-throughput
25
+ - Create differential privacy mechanisms: Laplace/Gaussian noise addition calibrated to epsilon budget, query result perturbation, and privacy budget accounting across queries
26
+ - Design consent management system: granular opt-in/opt-out, purpose-specific consent, consent version tracking, and propagation to downstream systems
27
+ - Implement data minimization controls: retention policies with automated deletion, purpose limitation enforcement, and necessity justification for data collection
28
+ - Build privacy-preserving analytics: federated learning for ML without centralized data, secure aggregation for metrics, and homomorphic encryption for computation on encrypted data
29
+ - Create data subject rights workflows: search across systems, export in portable format, deletion with verification, and rectification propagation
30
+
31
+ ### After implementation
32
+ - Generate privacy compliance reports: PII inventory, consent coverage, retention policy enforcement, third-party data sharing audit, and rights request fulfillment SLA
33
+ - Build privacy monitoring dashboards: PII exposure incidents, consent withdrawal rates, privacy budget consumption, and anonymization quality metrics
34
+ - Create breach response procedures: detection, containment, notification timelines, affected user identification, and remediation workflows
35
+ - Document privacy controls: anonymization methods, re-identification risk levels, consent mechanisms, and data retention justifications for audit purposes
36
+
37
+ ## Self-check before task completion
38
+ - [ ] PII detection covers all data stores with automated scanning and alerting on new sensitive data discoveries
39
+ - [ ] Anonymization techniques applied with documented re-identification risk assessment (k-anonymity ≥10 or equivalent)
40
+ - [ ] Consent management enforces purpose limitation with propagation to all downstream processing systems
41
+ - [ ] Differential privacy implementation maintains epsilon budget <1.0 for sensitive aggregations with privacy accounting
42
+ - [ ] Data subject rights workflows tested for completeness across all systems within regulatory SLA (30 days GDPR)
@@ -0,0 +1,174 @@
1
+ ---
2
+ name: database-performance
3
+ version: 1.0.0
4
+ min_mindforge_version: 0.3.0
5
+ status: stable
6
+ triggers: database performance, query plan analysis, EXPLAIN ANALYZE, index selection, partition pruning, materialized view, query optimization, slow query, index strategy, table scan elimination, join optimization, query profiling
7
+ compose: database-patterns
8
+ ---
9
+
10
+ # Skill — Database Performance
11
+
12
+ ## When this skill activates
13
+ Any task involving slow queries, query optimization, index strategy, EXPLAIN plan
14
+ analysis, partitioning, materialized views, or database profiling.
15
+
16
+ ## Mandatory actions when this skill is active
17
+
18
+ ### Before optimizing
19
+ 1. Get the current query execution plan (EXPLAIN ANALYZE, not just EXPLAIN).
20
+ 2. Identify the actual bottleneck (do not guess).
21
+ 3. Measure baseline performance (p50, p95, p99 latency).
22
+ 4. Understand the data distribution (cardinality, skew).
23
+
24
+ ### Reading EXPLAIN ANALYZE output
25
+
26
+ **Key things to look for:**
27
+
28
+ | Signal | Meaning | Action |
29
+ |--------|---------|--------|
30
+ | Seq Scan on large table | Full table scan, no index used | Add appropriate index |
31
+ | Nested Loop with high rows | O(n*m) join strategy | Consider Hash Join, add index on join column |
32
+ | Actual rows >> Estimated rows | Stale statistics | Run ANALYZE on the table |
33
+ | Sort with external merge | Not enough work_mem | Increase work_mem or add index for ORDER BY |
34
+ | Filter removing most rows | Index not selective enough | Add more specific index or partial index |
35
+
36
+ **Node types (best to worst for large tables):**
37
+ 1. Index Only Scan — best (reads from index, no table access).
38
+ 2. Index Scan — good (uses index, fetches rows from table).
39
+ 3. Bitmap Index Scan — okay (for medium selectivity).
40
+ 4. Seq Scan — bad on large tables (reads every row).
41
+
42
+ ### Index strategy
43
+
44
+ **B-tree (default, most common):**
45
+ - Equality: `WHERE status = 'active'`
46
+ - Range: `WHERE created_at > '2025-01-01'`
47
+ - Prefix matching: `WHERE name LIKE 'foo%'`
48
+ - Sorting: `ORDER BY created_at DESC`
49
+ - Composite: `(tenant_id, created_at)` — order matters, left-to-right.
50
+
51
+ **GIN (Generalized Inverted Index):**
52
+ - JSONB containment: `WHERE data @> '{"key": "value"}'`
53
+ - Array contains: `WHERE tags @> ARRAY['tag1']`
54
+ - Full-text search: `WHERE to_tsvector(body) @@ to_tsquery('search')`
55
+
56
+ **Partial index (conditional):**
57
+ - Index only rows that match a condition.
58
+ - `CREATE INDEX idx_active_orders ON orders(created_at) WHERE status = 'active'`
59
+ - Smaller, faster, less write overhead.
60
+
61
+ **Expression index:**
62
+ - Index a computed value.
63
+ - `CREATE INDEX idx_lower_email ON users(LOWER(email))`
64
+ - Query must use the same expression to hit the index.
65
+
66
+ ### Common query anti-patterns
67
+
68
+ **Functions on indexed columns:**
69
+ ```sql
70
+ -- BAD: index on created_at is useless
71
+ WHERE EXTRACT(YEAR FROM created_at) = 2025
72
+
73
+ -- GOOD: rewrite as range
74
+ WHERE created_at >= '2025-01-01' AND created_at < '2026-01-01'
75
+ ```
76
+
77
+ **OR conditions preventing index use:**
78
+ ```sql
79
+ -- BAD: may cause Seq Scan
80
+ WHERE status = 'active' OR status = 'pending'
81
+
82
+ -- GOOD: use IN
83
+ WHERE status IN ('active', 'pending')
84
+ ```
85
+
86
+ **SELECT * when you need few columns:**
87
+ ```sql
88
+ -- BAD: fetches all columns, prevents index-only scan
89
+ SELECT * FROM orders WHERE tenant_id = 'abc'
90
+
91
+ -- GOOD: select only needed columns
92
+ SELECT id, status, total FROM orders WHERE tenant_id = 'abc'
93
+ ```
94
+
95
+ **Missing LIMIT on unbounded queries:**
96
+ ```sql
97
+ -- BAD: may return millions of rows
98
+ SELECT * FROM events WHERE type = 'click'
99
+
100
+ -- GOOD: always paginate
101
+ SELECT * FROM events WHERE type = 'click' ORDER BY id LIMIT 50
102
+ ```
103
+
104
+ ### Materialized views
105
+
106
+ **When to use:**
107
+ - Expensive aggregations needed frequently (dashboards, reports).
108
+ - Data changes infrequently relative to read frequency.
109
+ - Acceptable staleness (refresh interval is tolerable).
110
+
111
+ **Implementation:**
112
+ ```sql
113
+ CREATE MATERIALIZED VIEW monthly_revenue AS
114
+ SELECT tenant_id, date_trunc('month', created_at) AS month, SUM(amount) AS total
115
+ FROM orders
116
+ WHERE status = 'completed'
117
+ GROUP BY tenant_id, month;
118
+
119
+ -- Refresh on schedule
120
+ REFRESH MATERIALIZED VIEW CONCURRENTLY monthly_revenue;
121
+ ```
122
+
123
+ **Rules:**
124
+ - Always use CONCURRENTLY (does not lock reads during refresh).
125
+ - Add a unique index for CONCURRENTLY to work.
126
+ - Monitor refresh duration — alert if it exceeds threshold.
127
+ - Consider triggers for real-time materialized views (small tables only).
128
+
129
+ ### Partitioning
130
+
131
+ **Range partitioning (time-series data):**
132
+ ```sql
133
+ CREATE TABLE events (
134
+ id BIGINT, tenant_id UUID, created_at TIMESTAMPTZ, data JSONB
135
+ ) PARTITION BY RANGE (created_at);
136
+
137
+ CREATE TABLE events_2025_01 PARTITION OF events
138
+ FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
139
+ ```
140
+
141
+ **Benefits:**
142
+ - Partition pruning: queries on created_at only scan relevant partitions.
143
+ - Easy data lifecycle: DROP old partitions instead of DELETE (instant, no vacuum).
144
+ - Parallel scan across partitions.
145
+
146
+ **Hash partitioning (even distribution):**
147
+ - For tables with no natural range key.
148
+ - Distributes rows evenly across N partitions.
149
+ - Good for very large tables that need parallel access.
150
+
151
+ **Rules:**
152
+ - Partition key must be in every query's WHERE clause for pruning.
153
+ - Too many partitions (>1000) can slow planning.
154
+ - Automate partition creation (don't rely on manual monthly creation).
155
+
156
+ ### Join optimization
157
+
158
+ - Ensure join columns have indexes on both sides.
159
+ - Small table JOIN large table: ensure small table is the "driving" table.
160
+ - Consider denormalization if a join is on the critical path and never changes.
161
+ - Use CTEs carefully — in PostgreSQL < 12, CTEs are optimization fences.
162
+
163
+ ### Monitoring
164
+
165
+ - Enable `pg_stat_statements` for query-level statistics.
166
+ - Alert on queries exceeding p95 threshold.
167
+ - Track index usage: `pg_stat_user_indexes` — unused indexes waste write performance.
168
+ - Regular VACUUM and ANALYZE (autovacuum tuning for high-write tables).
169
+
170
+ ## Self-check before task completion
171
+ - [ ] Did I follow the mandatory actions for this skill?
172
+ - [ ] Did I apply the patterns appropriate to the context?
173
+ - [ ] Did I verify the implementation meets the criteria above?
174
+ - [ ] Did I document decisions and trade-offs made?