skillstore-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/README.md +95 -0
  2. package/data/bundles/devflow-complete.json +19 -0
  3. package/data/free-skills/devflow-agile/manifest.json +19 -0
  4. package/data/free-skills/devflow-agile/plugin/commands/agile/retro.md +23 -0
  5. package/data/free-skills/devflow-agile/plugin/commands/agile/review.md +21 -0
  6. package/data/free-skills/devflow-agile/plugin/commands/agile/sprint.md +30 -0
  7. package/data/free-skills/devflow-agile/plugin/commands/agile/standup.md +20 -0
  8. package/data/free-skills/devflow-agile/plugin/commands/agile.md +35 -0
  9. package/data/free-skills/devflow-agile/plugin/commands/devflow.md +42 -0
  10. package/data/free-skills/devflow-agile/plugin/skills/developer/SKILL.md +93 -0
  11. package/data/free-skills/devflow-agile/plugin/skills/developer/assets/sample-output.md +182 -0
  12. package/data/free-skills/devflow-agile/plugin/skills/developer/references/clean-architecture.md +361 -0
  13. package/data/free-skills/devflow-agile/plugin/skills/developer/references/clean-code-guide.md +207 -0
  14. package/data/free-skills/devflow-agile/plugin/skills/developer/references/debugging-methodology.md +191 -0
  15. package/data/free-skills/devflow-agile/template/agents/agile-coach.md +76 -0
  16. package/data/free-skills/devflow-agile/template/workflows/agile-sprint-workflow.md +81 -0
  17. package/data/free-skills/devflow-bootstrap/manifest.json +8 -0
  18. package/data/free-skills/devflow-bootstrap/plugin/commands/bootstrap/auto.md +31 -0
  19. package/data/free-skills/devflow-bootstrap/plugin/commands/bootstrap.md +38 -0
  20. package/data/free-skills/devflow-bootstrap/plugin/commands/devflow.md +20 -0
  21. package/data/free-skills/devflow-bootstrap/plugin/skills/project-scaffold/SKILL.md +56 -0
  22. package/data/free-skills/devflow-bootstrap/plugin/skills/project-scaffold/assets/sample-output.md +216 -0
  23. package/data/free-skills/devflow-bootstrap/plugin/skills/project-scaffold/references/architecture-decisions.md +254 -0
  24. package/data/free-skills/devflow-bootstrap/plugin/skills/project-scaffold/references/stack-templates.md +400 -0
  25. package/data/free-skills/devflow-bootstrap/template/agents/bootstrap-specialist.md +56 -0
  26. package/data/free-skills/devflow-bootstrap/template/workflows/bootstrap-workflow.md +70 -0
  27. package/data/free-skills/devflow-docs/manifest.json +8 -0
  28. package/data/free-skills/devflow-docs/plugin/commands/devflow.md +20 -0
  29. package/data/free-skills/devflow-docs/plugin/commands/docs/generate.md +17 -0
  30. package/data/free-skills/devflow-docs/plugin/commands/docs/parse.md +19 -0
  31. package/data/free-skills/devflow-docs/plugin/commands/docs.md +26 -0
  32. package/data/free-skills/devflow-docs/plugin/skills/pdf-processor/SKILL.md +59 -0
  33. package/data/free-skills/devflow-docs/plugin/skills/pdf-processor/assets/sample-output.md +114 -0
  34. package/data/free-skills/devflow-docs/plugin/skills/pdf-processor/references/extraction-techniques.md +115 -0
  35. package/data/free-skills/devflow-docs/plugin/skills/pdf-processor/references/ocr-strategies.md +167 -0
  36. package/data/free-skills/devflow-docs/template/agents/docs-specialist.md +35 -0
  37. package/data/free-skills/devflow-docs/template/workflows/docs-workflow.md +70 -0
  38. package/data/free-skills/devflow-postproject/manifest.json +13 -0
  39. package/data/free-skills/devflow-postproject/plugin/commands/devflow.md +34 -0
  40. package/data/free-skills/devflow-postproject/plugin/commands/postproject/handover.md +21 -0
  41. package/data/free-skills/devflow-postproject/plugin/commands/postproject/retro.md +21 -0
  42. package/data/free-skills/devflow-postproject/plugin/commands/postproject/support.md +21 -0
  43. package/data/free-skills/devflow-postproject/plugin/commands/postproject.md +32 -0
  44. package/data/free-skills/devflow-postproject/plugin/skills/retrospective/SKILL.md +70 -0
  45. package/data/free-skills/devflow-postproject/plugin/skills/retrospective/assets/sample-output.md +79 -0
  46. package/data/free-skills/devflow-postproject/plugin/skills/retrospective/references/facilitation-techniques.md +178 -0
  47. package/data/free-skills/devflow-postproject/plugin/skills/retrospective/references/lessons-learned-template.md +118 -0
  48. package/data/free-skills/devflow-postproject/plugin/skills/retrospective/references/retro-techniques.md +100 -0
  49. package/data/free-skills/devflow-postproject/template/agents/transition-manager.md +71 -0
  50. package/data/free-skills/devflow-postproject/template/workflows/transition-workflow.md +72 -0
  51. package/data/free-skills/devflow-presale/manifest.json +15 -0
  52. package/data/free-skills/devflow-presale/plugin/commands/devflow.md +47 -0
  53. package/data/free-skills/devflow-presale/plugin/commands/presale/analyze.md +30 -0
  54. package/data/free-skills/devflow-presale/plugin/commands/presale/estimate.md +30 -0
  55. package/data/free-skills/devflow-presale/plugin/commands/presale/price.md +30 -0
  56. package/data/free-skills/devflow-presale/plugin/commands/presale/propose.md +30 -0
  57. package/data/free-skills/devflow-presale/plugin/commands/presale.md +42 -0
  58. package/data/free-skills/devflow-presale/plugin/skills/requirement-analysis/SKILL.md +63 -0
  59. package/data/free-skills/devflow-presale/plugin/skills/requirement-analysis/assets/sample-output.md +129 -0
  60. package/data/free-skills/devflow-presale/plugin/skills/requirement-analysis/references/extraction-framework.md +140 -0
  61. package/data/free-skills/devflow-presale/plugin/skills/requirement-analysis/references/output-template.md +132 -0
  62. package/data/free-skills/devflow-presale/template/agents/presale-lead.md +83 -0
  63. package/data/free-skills/devflow-presale/template/agents/proposal-reviewer.md +63 -0
  64. package/data/free-skills/devflow-presale/template/workflows/presale-workflow.md +70 -0
  65. package/data/registry/categories.json +7 -0
  66. package/data/registry/packages.json +184 -0
  67. package/data/shared/framework/agents/brainstormer.md +74 -0
  68. package/data/shared/framework/agents/code-reviewer.md +87 -0
  69. package/data/shared/framework/agents/debugger.md +84 -0
  70. package/data/shared/framework/agents/docs-manager.md +55 -0
  71. package/data/shared/framework/agents/git-manager.md +59 -0
  72. package/data/shared/framework/agents/planner.md +68 -0
  73. package/data/shared/framework/agents/researcher.md +66 -0
  74. package/data/shared/framework/agents/tester.md +65 -0
  75. package/data/shared/framework/commands/cook/auto.md +27 -0
  76. package/data/shared/framework/commands/cook.md +45 -0
  77. package/data/shared/framework/commands/fix/ci.md +21 -0
  78. package/data/shared/framework/commands/fix/test.md +26 -0
  79. package/data/shared/framework/commands/fix/types.md +29 -0
  80. package/data/shared/framework/commands/fix.md +26 -0
  81. package/data/shared/framework/commands/git/cm.md +37 -0
  82. package/data/shared/framework/commands/git/pr.md +40 -0
  83. package/data/shared/framework/config/CLAUDE.md.template +26 -0
  84. package/data/shared/framework/config/settings.json +41 -0
  85. package/data/shared/framework/config/skillstore.config.json +29 -0
  86. package/data/shared/framework/hooks/discord-notify.sh +85 -0
  87. package/data/shared/framework/hooks/docs-sync.sh +53 -0
  88. package/data/shared/framework/hooks/modularization-hook.js +103 -0
  89. package/data/shared/framework/hooks/notification.js +94 -0
  90. package/data/shared/framework/hooks/quality-gate.js +109 -0
  91. package/data/shared/framework/hooks/scout-block.js +77 -0
  92. package/data/shared/framework/hooks/telegram-notify.sh +77 -0
  93. package/data/shared/framework/protocols/error-recovery.md +80 -0
  94. package/data/shared/framework/protocols/orchestration-protocol.md +112 -0
  95. package/data/shared/framework/quality/review-protocol.md +76 -0
  96. package/data/shared/framework/quality/verification-protocol.md +66 -0
  97. package/data/shared/framework/rules/development-rules.md +75 -0
  98. package/data/shared/framework/skills/backend-development/SKILL.md +77 -0
  99. package/data/shared/framework/skills/backend-development/assets/sample-output.md +175 -0
  100. package/data/shared/framework/skills/backend-development/references/advanced-patterns.md +180 -0
  101. package/data/shared/framework/skills/backend-development/references/api-design-guide.md +160 -0
  102. package/data/shared/framework/skills/backend-development/references/architecture-patterns.md +183 -0
  103. package/data/shared/framework/skills/backend-development/references/observability-resilience.md +155 -0
  104. package/data/shared/framework/skills/backend-development/references/troubleshooting.md +199 -0
  105. package/data/shared/framework/skills/codebase-analysis/SKILL.md +72 -0
  106. package/data/shared/framework/skills/codebase-analysis/assets/sample-output.md +263 -0
  107. package/data/shared/framework/skills/codebase-analysis/references/analysis-techniques.md +241 -0
  108. package/data/shared/framework/skills/codebase-analysis/references/dependency-mapping.md +280 -0
  109. package/data/shared/framework/skills/codebase-analysis/references/tech-debt-assessment.md +208 -0
  110. package/data/shared/framework/skills/databases/SKILL.md +72 -0
  111. package/data/shared/framework/skills/databases/assets/sample-output.md +212 -0
  112. package/data/shared/framework/skills/databases/references/advanced-data-patterns.md +259 -0
  113. package/data/shared/framework/skills/databases/references/query-optimization.md +214 -0
  114. package/data/shared/framework/skills/databases/references/schema-design.md +159 -0
  115. package/data/shared/framework/skills/databases/references/troubleshooting.md +214 -0
  116. package/data/shared/framework/skills/debugging-investigation/SKILL.md +84 -0
  117. package/data/shared/framework/skills/debugging-investigation/assets/sample-output.md +314 -0
  118. package/data/shared/framework/skills/debugging-investigation/references/systematic-debugging.md +197 -0
  119. package/data/shared/framework/skills/debugging-investigation/references/tool-specific-guides.md +202 -0
  120. package/data/shared/framework/skills/debugging-investigation/references/troubleshooting-patterns.md +196 -0
  121. package/data/shared/framework/skills/frontend-development/SKILL.md +67 -0
  122. package/data/shared/framework/skills/frontend-development/assets/sample-output.md +110 -0
  123. package/data/shared/framework/skills/frontend-development/references/component-patterns.md +112 -0
  124. package/data/shared/framework/skills/frontend-development/references/performance-guide.md +169 -0
  125. package/data/shared/framework/skills/frontend-development/references/routing-forms-realtime.md +374 -0
  126. package/data/shared/framework/skills/frontend-development/references/ssr-rsc-patterns.md +284 -0
  127. package/data/shared/framework/skills/frontend-development/references/troubleshooting.md +154 -0
  128. package/data/shared/framework/skills/mobile-development/SKILL.md +67 -0
  129. package/data/shared/framework/skills/mobile-development/assets/sample-output.md +382 -0
  130. package/data/shared/framework/skills/mobile-development/references/mobile-patterns.md +681 -0
  131. package/data/shared/framework/skills/mobile-development/references/mobile-performance.md +524 -0
  132. package/data/shared/framework/skills/mobile-development/references/troubleshooting.md +158 -0
  133. package/data/shared/framework/skills/security-audit/SKILL.md +83 -0
  134. package/data/shared/framework/skills/security-audit/assets/sample-output.md +451 -0
  135. package/data/shared/framework/skills/security-audit/references/owasp-checklist.md +580 -0
  136. package/data/shared/framework/skills/security-audit/references/secure-coding-patterns.md +433 -0
  137. package/data/shared/framework/skills/security-audit/references/vulnerability-remediation.md +331 -0
  138. package/data/shared/framework/skills/ui-generation/SKILL.md +70 -0
  139. package/data/shared/framework/skills/ui-generation/assets/sample-output.md +139 -0
  140. package/data/shared/framework/skills/ui-generation/references/accessibility-responsive.md +127 -0
  141. package/data/shared/framework/skills/ui-generation/references/compound-components.md +252 -0
  142. package/data/shared/framework/skills/ui-generation/references/generation-patterns.md +110 -0
  143. package/data/shared/framework/skills/ui-generation/references/storybook-design-system.md +278 -0
  144. package/data/shared/framework/skills/ui-generation/references/troubleshooting.md +198 -0
  145. package/data/shared/framework/workflows/documentation-management.md +58 -0
  146. package/data/shared/framework/workflows/primary-workflow.md +88 -0
  147. package/dist/commands/activate.d.ts +3 -0
  148. package/dist/commands/activate.d.ts.map +1 -0
  149. package/dist/commands/activate.js +34 -0
  150. package/dist/commands/activate.js.map +1 -0
  151. package/dist/commands/bundle.d.ts +3 -0
  152. package/dist/commands/bundle.d.ts.map +1 -0
  153. package/dist/commands/bundle.js +64 -0
  154. package/dist/commands/bundle.js.map +1 -0
  155. package/dist/commands/install.d.ts +3 -0
  156. package/dist/commands/install.d.ts.map +1 -0
  157. package/dist/commands/install.js +99 -0
  158. package/dist/commands/install.js.map +1 -0
  159. package/dist/commands/list.d.ts +3 -0
  160. package/dist/commands/list.d.ts.map +1 -0
  161. package/dist/commands/list.js +37 -0
  162. package/dist/commands/list.js.map +1 -0
  163. package/dist/commands/search.d.ts +3 -0
  164. package/dist/commands/search.d.ts.map +1 -0
  165. package/dist/commands/search.js +30 -0
  166. package/dist/commands/search.js.map +1 -0
  167. package/dist/commands/status.d.ts +3 -0
  168. package/dist/commands/status.d.ts.map +1 -0
  169. package/dist/commands/status.js +35 -0
  170. package/dist/commands/status.js.map +1 -0
  171. package/dist/commands/update.d.ts +3 -0
  172. package/dist/commands/update.d.ts.map +1 -0
  173. package/dist/commands/update.js +68 -0
  174. package/dist/commands/update.js.map +1 -0
  175. package/dist/download/cache.d.ts +3 -0
  176. package/dist/download/cache.d.ts.map +1 -0
  177. package/dist/download/cache.js +18 -0
  178. package/dist/download/cache.js.map +1 -0
  179. package/dist/download/client.d.ts +2 -0
  180. package/dist/download/client.d.ts.map +1 -0
  181. package/dist/download/client.js +58 -0
  182. package/dist/download/client.js.map +1 -0
  183. package/dist/index.d.ts +3 -0
  184. package/dist/index.d.ts.map +1 -0
  185. package/dist/index.js +23 -0
  186. package/dist/index.js.map +1 -0
  187. package/dist/installer/file-copier.d.ts +6 -0
  188. package/dist/installer/file-copier.d.ts.map +1 -0
  189. package/dist/installer/file-copier.js +32 -0
  190. package/dist/installer/file-copier.js.map +1 -0
  191. package/dist/installer/plugin-installer.d.ts +12 -0
  192. package/dist/installer/plugin-installer.d.ts.map +1 -0
  193. package/dist/installer/plugin-installer.js +33 -0
  194. package/dist/installer/plugin-installer.js.map +1 -0
  195. package/dist/installer/template-installer.d.ts +12 -0
  196. package/dist/installer/template-installer.d.ts.map +1 -0
  197. package/dist/installer/template-installer.js +45 -0
  198. package/dist/installer/template-installer.js.map +1 -0
  199. package/dist/license/crypto.d.ts +16 -0
  200. package/dist/license/crypto.d.ts.map +1 -0
  201. package/dist/license/crypto.js +50 -0
  202. package/dist/license/crypto.js.map +1 -0
  203. package/dist/license/license-store.d.ts +19 -0
  204. package/dist/license/license-store.d.ts.map +1 -0
  205. package/dist/license/license-store.js +99 -0
  206. package/dist/license/license-store.js.map +1 -0
  207. package/dist/license/validator.d.ts +32 -0
  208. package/dist/license/validator.d.ts.map +1 -0
  209. package/dist/license/validator.js +81 -0
  210. package/dist/license/validator.js.map +1 -0
  211. package/dist/registry/loader.d.ts +30 -0
  212. package/dist/registry/loader.d.ts.map +1 -0
  213. package/dist/registry/loader.js +22 -0
  214. package/dist/registry/loader.js.map +1 -0
  215. package/dist/registry/search-engine.d.ts +9 -0
  216. package/dist/registry/search-engine.d.ts.map +1 -0
  217. package/dist/registry/search-engine.js +30 -0
  218. package/dist/registry/search-engine.js.map +1 -0
  219. package/dist/utils/config.d.ts +14 -0
  220. package/dist/utils/config.d.ts.map +1 -0
  221. package/dist/utils/config.js +28 -0
  222. package/dist/utils/config.js.map +1 -0
  223. package/dist/utils/logger.d.ts +9 -0
  224. package/dist/utils/logger.d.ts.map +1 -0
  225. package/dist/utils/logger.js +22 -0
  226. package/dist/utils/logger.js.map +1 -0
  227. package/dist/utils/paths.d.ts +20 -0
  228. package/dist/utils/paths.d.ts.map +1 -0
  229. package/dist/utils/paths.js +79 -0
  230. package/dist/utils/paths.js.map +1 -0
  231. package/package.json +54 -0
@@ -0,0 +1,214 @@
1
+ # Database Troubleshooting Guide
2
+
3
+ ## Slow Query Diagnosis Workflow
4
+
5
+ **Step 1: Identify slow queries**
6
+ ```sql
7
+ -- PostgreSQL: find queries exceeding threshold
8
+ SELECT pid, now() - query_start AS duration, query, state
9
+ FROM pg_stat_activity
10
+ WHERE state = 'active' AND now() - query_start > interval '1 second'
11
+ ORDER BY duration DESC;
12
+
13
+ -- Enable slow query logging
14
+ ALTER SYSTEM SET log_min_duration_statement = '500'; -- log queries > 500ms
15
+ SELECT pg_reload_conf();
16
+ ```
17
+
18
+ **Step 2: Analyze with EXPLAIN**
19
+ ```sql
20
+ EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...;
21
+ -- Look for: Seq Scan (missing index), Nested Loop on large tables, high "actual rows" vs "planned rows"
22
+ ```
23
+
24
+ **Step 3: Fix by priority**
25
+ 1. **Add missing index** — If Seq Scan on a filtered/joined column: `CREATE INDEX CONCURRENTLY idx_name ON table(column);`
26
+ 2. **Rewrite query** — Replace correlated subqueries with JOINs, use CTEs for readability but be aware of optimization fences (pre-PG12).
27
+ 3. **Update statistics** — If planner estimates are wildly off: `ANALYZE table_name;`
28
+ 4. **Partial index** — If only a subset of rows is queried: `CREATE INDEX idx ON orders(status) WHERE status = 'pending';`
29
+
30
+ ## Lock Contention Diagnosis
31
+
32
+ **Identify blocking locks:**
33
+ ```sql
34
+ -- Active locks with blocking info
35
+ SELECT
36
+ blocked.pid AS blocked_pid,
37
+ blocked.query AS blocked_query,
38
+ blocking.pid AS blocking_pid,
39
+ blocking.query AS blocking_query,
40
+ blocked.wait_event_type,
41
+ now() - blocked.query_start AS wait_duration
42
+ FROM pg_stat_activity blocked
43
+ JOIN pg_locks bl ON blocked.pid = bl.pid AND NOT bl.granted
44
+ JOIN pg_locks kl ON bl.locktype = kl.locktype
45
+ AND bl.database IS NOT DISTINCT FROM kl.database
46
+ AND bl.relation IS NOT DISTINCT FROM kl.relation
47
+ AND bl.pid != kl.pid AND kl.granted
48
+ JOIN pg_stat_activity blocking ON kl.pid = blocking.pid;
49
+
50
+ -- Kill a blocking query if needed
51
+ SELECT pg_terminate_backend(<blocking_pid>);
52
+ ```
53
+
54
+ **Common lock contention scenarios:**
55
+ 1. **Long-running `ALTER TABLE`** — Acquires `ACCESS EXCLUSIVE` lock, blocks all reads/writes.
56
+ - Fix: Use `CREATE INDEX CONCURRENTLY`, `ALTER TABLE ... ADD COLUMN` (non-blocking for nullable columns without defaults in PG11+).
57
+ 2. **Bulk UPDATE without batching** — Locks thousands of rows in one transaction.
58
+ - Fix: Batch updates with `LIMIT` and loop.
59
+ 3. **`LOCK TABLE` left in application code** — Forgotten explicit lock from debugging.
60
+
61
+ ## Replication Lag Monitoring and Mitigation
62
+
63
+ **Monitor lag:**
64
+ ```sql
65
+ -- On primary: check replication slots and lag
66
+ SELECT slot_name, active,
67
+ pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS lag_bytes
68
+ FROM pg_replication_slots;
69
+
70
+ -- On replica: check lag in seconds
71
+ SELECT now() - pg_last_xact_replay_timestamp() AS replication_lag;
72
+ ```
73
+
74
+ **Common causes and fixes:**
75
+ 1. **Replica under-provisioned** — Replica has fewer resources than primary.
76
+ - Fix: Match replica resources (CPU, IOPS, memory) to primary.
77
+ 2. **Long-running queries on replica** — Conflict with WAL replay.
78
+ - Fix: Set `hot_standby_feedback = on` or increase `max_standby_streaming_delay`.
79
+ 3. **Large transactions** — Bulk inserts/updates generate massive WAL.
80
+ - Fix: Break into smaller batches.
81
+ 4. **Network bottleneck** — Insufficient bandwidth between primary and replica.
82
+ - Fix: Check network throughput, consider `wal_compression = on`.
83
+
84
+ **Application-level mitigation:**
85
+ - Read-after-write consistency: route reads to primary for N seconds after a write.
86
+ - Use synchronous replication for critical reads (trade-off: write latency).
87
+
88
+ ## Connection Limit Reached
89
+
90
+ **Symptoms:** `FATAL: too many connections for role "appuser"`, new connections refused.
91
+
92
+ **Diagnosis:**
93
+ ```sql
94
+ -- Current connection count by source
95
+ SELECT usename, client_addr, state, count(*)
96
+ FROM pg_stat_activity
97
+ GROUP BY usename, client_addr, state
98
+ ORDER BY count DESC;
99
+
100
+ -- Check limits
101
+ SHOW max_connections;
102
+ SELECT rolname, rolconnlimit FROM pg_roles WHERE rolconnlimit > 0;
103
+ ```
104
+
105
+ **Fix patterns:**
106
+ 1. **Connection pooler missing** — Each app instance opens its own pool.
107
+ - Fix: Deploy PgBouncer or Pgpool-II in front of PostgreSQL.
108
+ 2. **Idle connections from crashed workers** — Connections not closed on process exit.
109
+ - Fix: Set `idle_in_transaction_session_timeout = '30s'` in PostgreSQL config.
110
+ 3. **Too many microservices** — Each service has a pool of 10, 20 services = 200 connections.
111
+ - Fix: Centralize through PgBouncer in transaction mode.
112
+ 4. **`max_connections` too low** — Default is 100, may be insufficient.
113
+ - Fix: Increase, but prefer pooling first (each connection costs ~10MB RAM).
114
+
115
+ ## Migration Failure Rollback Procedures
116
+
117
+ **Before running migrations (safety net):**
118
+ ```bash
119
+ # Backup current schema
120
+ pg_dump --schema-only -f schema_backup_$(date +%Y%m%d).sql dbname
121
+
122
+ # For critical migrations, backup data too
123
+ pg_dump -Fc -f full_backup_$(date +%Y%m%d).dump dbname
124
+ ```
125
+
126
+ **When migration fails mid-way:**
127
+ 1. Check migration tool state — most tools track applied migrations in a table (e.g., `schema_migrations`, `_prisma_migrations`).
128
+ 2. If the tool supports rollback: `npx prisma migrate reset` (dev) or apply the down migration.
129
+ 3. If stuck in dirty state:
130
+ ```sql
131
+ -- Knex: fix migration lock
132
+ DELETE FROM knex_migrations_lock;
133
+ -- Prisma: mark failed migration as rolled back
134
+ UPDATE _prisma_migrations SET rolled_back_at = NOW() WHERE migration_name = 'failed_migration';
135
+ ```
136
+ 4. Manual rollback: apply the reverse DDL statements from the failed migration.
137
+ 5. Restore from backup as last resort: `pg_restore -d dbname full_backup.dump`.
138
+
139
+ **Prevention:** Always test migrations against a copy of production data before applying.
140
+
141
+ ## Data Corruption Detection and Recovery
142
+
143
+ **Detection signals:**
144
+ - Queries return unexpected results or errors like `could not read block`.
145
+ - `pg_catalog.pg_stat_database` shows `checksum_failures > 0` (if data checksums enabled).
146
+
147
+ **Verification:**
148
+ ```bash
149
+ # Check for corruption (requires downtime or read replica)
150
+ pg_amcheck --all --heapallindexed dbname
151
+
152
+ # Verify specific table
153
+ SELECT ctid, * FROM table_name WHERE condition LIMIT 100;
154
+ -- If errors occur on specific pages, note the ctid range
155
+ ```
156
+
157
+ **Recovery steps:**
158
+ 1. **From replica** — If replica is clean, promote it or copy affected tables.
159
+ 2. **From backup** — Restore specific table: `pg_restore -d dbname -t table_name backup.dump`.
160
+ 3. **REINDEX** — If only indexes are corrupt: `REINDEX TABLE table_name;`
161
+ 4. **Enable checksums** for future detection: `pg_checksums --enable -D /data` (requires restart).
162
+
163
+ ## Bloated Tables and Indexes
164
+
165
+ **Detection:**
166
+ ```sql
167
+ -- Estimate table bloat
168
+ SELECT schemaname, tablename,
169
+ pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS total_size,
170
+ n_dead_tup, n_live_tup,
171
+ round(n_dead_tup::numeric / NULLIF(n_live_tup, 0) * 100, 1) AS dead_pct
172
+ FROM pg_stat_user_tables
173
+ ORDER BY n_dead_tup DESC LIMIT 20;
174
+
175
+ -- Check if autovacuum is running
176
+ SELECT relname, last_vacuum, last_autovacuum, last_analyze
177
+ FROM pg_stat_user_tables
178
+ WHERE schemaname = 'public'
179
+ ORDER BY n_dead_tup DESC;
180
+ ```
181
+
182
+ **Fix patterns:**
183
+ 1. **Manual VACUUM** — `VACUUM (VERBOSE) table_name;` (reclaims space for reuse, no lock).
184
+ 2. **VACUUM FULL** — `VACUUM FULL table_name;` (reclaims disk space, but takes ACCESS EXCLUSIVE lock).
185
+ 3. **pg_repack** (zero-downtime alternative to VACUUM FULL):
186
+ ```bash
187
+ pg_repack -d dbname -t table_name --no-superuser-check
188
+ ```
189
+ 4. **Tune autovacuum** — If bloat recurs, autovacuum is too conservative:
190
+ ```sql
191
+ ALTER TABLE hot_table SET (autovacuum_vacuum_scale_factor = 0.01, autovacuum_vacuum_threshold = 1000);
192
+ ```
193
+
194
+ ## Deadlock Analysis from Logs
195
+
196
+ **PostgreSQL log format for deadlocks:**
197
+ ```
198
+ ERROR: deadlock detected
199
+ DETAIL: Process 1234 waits for ShareLock on transaction 5678; blocked by process 9012.
200
+ Process 9012 waits for ShareLock on transaction 1234; blocked by process 1234.
201
+ HINT: See server log for query details.
202
+ ```
203
+
204
+ **Analysis workflow:**
205
+ 1. Enable detailed deadlock logging: `SET deadlock_timeout = '1s';` and `SET log_lock_waits = on;`
206
+ 2. Parse logs for `deadlock detected` entries.
207
+ 3. Map the processes to queries — the log shows the exact SQL of each participant.
208
+ 4. Identify the conflicting access pattern — usually two transactions updating the same rows in opposite order.
209
+
210
+ **Resolution:**
211
+ - Enforce consistent ordering: both transactions should update table A then table B (not A→B and B→A).
212
+ - Use `SELECT ... FOR UPDATE` with explicit ordering: `ORDER BY id` to lock rows in deterministic order.
213
+ - Reduce transaction scope — fewer rows locked means fewer collision opportunities.
214
+ - Add application-level retry with exponential backoff for deadlock error code `40P01`.
@@ -0,0 +1,84 @@
1
+ ---
2
+ name: debugging-investigation
3
+ description: Systematic debugging and root cause investigation — log analysis, performance profiling, production incident diagnosis across all tech stacks
4
+ ---
5
+
6
+ # Debugging & Investigation
7
+
8
+ ## Triggers
9
+
10
+ Activate this skill when:
11
+ - Investigating bug reports or unexpected behavior
12
+ - Diagnosing test failures (unit, integration, e2e)
13
+ - Responding to production incidents or outages
14
+ - Analyzing performance degradation (slow responses, high latency)
15
+ - Tracking down intermittent or flaky errors
16
+ - Investigating memory leaks or resource exhaustion
17
+ - Diagnosing data inconsistencies or corruption
18
+ - Debugging race conditions or concurrency issues
19
+
20
+ ## Process
21
+
22
+ ### 1. Symptom Collection
23
+ - Gather exact error messages, stack traces, and log output
24
+ - Identify when the issue started (deploy, config change, traffic spike)
25
+ - Determine scope: one user, one endpoint, one service, or system-wide
26
+ - Check environment: local, staging, or production
27
+ - Collect reproduction steps if available
28
+
29
+ ### 2. Hypothesis Formation
30
+ Load: `references/systematic-debugging.md`
31
+
32
+ - List 2-4 plausible root causes ranked by likelihood
33
+ - For each hypothesis, define what evidence would confirm or refute it
34
+ - Apply differential debugging: what changed recently? (git log, dependency updates, config changes)
35
+ - Check for known bug patterns that match symptoms
36
+
37
+ ### 3. Evidence Gathering
38
+ Load: `references/tool-specific-guides.md`
39
+
40
+ - Use appropriate debugging tools for the stack (DevTools, profilers, log aggregators)
41
+ - Collect quantitative data: metrics, timings, memory snapshots, query plans
42
+ - Reproduce the issue in the most controlled environment possible
43
+ - Narrow the problem space using binary search debugging (bisect code, data, config)
44
+ - Document each finding with timestamps and exact values
45
+
46
+ ### 4. Root Cause Confirmation
47
+ Load: `references/troubleshooting-patterns.md`
48
+
49
+ - Confirm the root cause explains ALL observed symptoms, not just some
50
+ - Verify by predicting a consequence of the root cause and testing for it
51
+ - Rule out alternative hypotheses with evidence
52
+ - Identify contributing factors (the root cause vs. the trigger)
53
+
54
+ ### 5. Fix & Verify
55
+ - Implement the minimal fix that addresses the root cause
56
+ - Write a regression test that reproduces the original symptom
57
+ - Verify the fix in the same environment where the bug was observed
58
+ - Check for side effects: run full test suite, monitor metrics post-deploy
59
+ - Document the incident: timeline, root cause, fix, and prevention measures
60
+
61
+ ## Quick Reference — Symptom to Technique
62
+
63
+ | Symptom | First Technique | Tool |
64
+ |---|---|---|
65
+ | Slow API response | EXPLAIN ANALYZE + flame chart | Database profiler, 0x/clinic.js |
66
+ | Memory climbing over time | Heap snapshot diffing | Chrome DevTools Memory, clinic.js |
67
+ | Intermittent test failure | Timing/ordering analysis | Retry with seed, race detector |
68
+ | UI not rendering correctly | Component state inspection | React/Flutter DevTools |
69
+ | 500 errors after deploy | Differential debugging | git bisect, deploy diff |
70
+ | Network timeout | Waterfall analysis | Chrome Network panel, tcpdump |
71
+ | Auth failures | Token/session inspection | jwt.io, browser storage, logs |
72
+ | Data mismatch | Query + transformation trace | SQL logs, data pipeline logs |
73
+ | High CPU usage | CPU profiling | perf, clinic flame, Activity Monitor |
74
+ | Infinite loop / re-render | Execution tracing | Profiler, why-did-you-render |
75
+
76
+ ## References
77
+
78
+ - [Systematic Debugging](references/systematic-debugging.md) — binary search, rubber duck, differential debugging, scientific method, cognitive biases
79
+ - [Tool-Specific Guides](references/tool-specific-guides.md) — Chrome DevTools, Node.js, React DevTools, Flutter DevTools, database, VS Code debugger
80
+ - [Troubleshooting Patterns](references/troubleshooting-patterns.md) — race conditions, memory leaks, deadlocks, N+1 queries, timezone bugs, encoding, floating point
81
+
82
+ ## Assets
83
+
84
+ - [Sample Output](assets/sample-output.md) — complete debug report for a production Node.js memory leak investigation
@@ -0,0 +1,314 @@
1
+ # Debug Report: Production Memory Leak in Node.js Express API
2
+
3
+ ## Incident Summary
4
+
5
+ | Field | Value |
6
+ |---|---|
7
+ | Service | order-service (Node.js 18, Express 4.18) |
8
+ | Environment | Production (3 pods, Kubernetes) |
9
+ | First Detected | 2026-03-17 14:30 UTC (Datadog alert) |
10
+ | Resolved | 2026-03-18 09:15 UTC |
11
+ | Duration | ~19 hours |
12
+ | Impact | API p99 latency increased from 120ms to 2400ms; 3 pod restarts due to OOM |
13
+
14
+ ---
15
+
16
+ ## 1. Symptom Collection
17
+
18
+ ### Observed Symptoms
19
+ - **Memory growth**: RSS climbing from ~200MB at pod start to 1.2GB over 48 hours before OOM kill
20
+ - **Latency degradation**: p50 stable at 45ms, but p99 increased from 120ms to 2400ms (20x)
21
+ - **GC pressure**: V8 garbage collection pauses increased from 5ms avg to 180ms avg
22
+ - **Restart pattern**: Pods restarting every 36-48 hours (hitting the 1.5GB memory limit)
23
+
24
+ ### Timeline
25
+ ```
26
+ Mar 15 10:00 Deploy v2.14.0 (includes WebSocket notification feature)
27
+ Mar 15 10:30 All metrics normal
28
+ Mar 16 08:00 First Datadog memory alert (pod-2 at 800MB)
29
+ Mar 17 02:15 pod-2 OOM killed and restarted
30
+ Mar 17 14:30 pod-1 and pod-3 both above 1GB, p99 latency alert fires
31
+ Mar 17 15:00 Investigation begins
32
+ ```
33
+
34
+ ### Key Observation
35
+ The leak started after deploying v2.14.0, which added a WebSocket notification feature for order status updates. Memory growth is steady (~20MB/hour) and correlates with request volume, not WebSocket connection count.
36
+
37
+ ---
38
+
39
+ ## 2. Hypothesis Formation
40
+
41
+ ### Hypothesis 1: Database Connection Pool Leak
42
+ **Reasoning**: New feature adds additional database queries for notification preferences. Connections may not be returning to the pool.
43
+ **If true**: `pg_stat_activity` would show growing active connections. Pool metrics would show exhaustion.
44
+ **Likelihood**: Medium
45
+
46
+ ### Hypothesis 2: EventEmitter Listener Accumulation
47
+ **Reasoning**: The new WebSocket feature registers event listeners to broadcast order status changes. If listeners are added per-request and never removed, they accumulate.
48
+ **If true**: `process._getActiveHandles().length` would grow over time. Node.js would eventually emit a MaxListenersExceeded warning.
49
+ **Likelihood**: High (most consistent with steady-per-request growth pattern)
50
+
51
+ ### Hypothesis 3: In-Memory Cache Without Eviction
52
+ **Reasoning**: The notification feature caches user notification preferences to avoid repeated DB lookups. If the cache has no TTL or max size, it grows indefinitely.
53
+ **If true**: Inspecting the cache object would show size growing proportionally to unique users served.
54
+ **Likelihood**: Medium
55
+
56
+ ---
57
+
58
+ ## 3. Evidence Gathering
59
+
60
+ ### Testing Hypothesis 1: Database Connection Pool
61
+
62
+ **Method**: Query `pg_stat_activity` and check pool metrics.
63
+
64
+ ```sql
65
+ SELECT count(*), state FROM pg_stat_activity
66
+ WHERE datname = 'orders_db'
67
+ GROUP BY state;
68
+ ```
69
+
70
+ **Result**:
71
+ ```
72
+ count | state
73
+ -------+--------
74
+ 8 | active
75
+ 2 | idle
76
+ ```
77
+
78
+ Pool size is configured as 10 connections. Active connections are stable at 8-10, cycling normally. No connection leak.
79
+
80
+ **Verdict**: REFUTED. Connection pool is healthy.
81
+
82
+ ### Testing Hypothesis 2: EventEmitter Listener Accumulation
83
+
84
+ **Method**: Add diagnostic endpoint to production (behind internal-only middleware):
85
+
86
+ ```javascript
87
+ app.get('/_debug/listeners', internalOnly, (req, res) => {
88
+ const orderEmitter = require('./events/orderEvents').emitter;
89
+ res.json({
90
+ listenerCounts: {
91
+ statusChange: orderEmitter.listenerCount('statusChange'),
92
+ created: orderEmitter.listenerCount('created'),
93
+ cancelled: orderEmitter.listenerCount('cancelled'),
94
+ },
95
+ activeHandles: process._getActiveHandles().length,
96
+ activeRequests: process._getActiveRequests().length,
97
+ memoryUsage: process.memoryUsage(),
98
+ });
99
+ });
100
+ ```
101
+
102
+ **Result after 2 hours of running** (pod-3, restarted at 15:00):
103
+ ```json
104
+ {
105
+ "listenerCounts": {
106
+ "statusChange": 14847,
107
+ "created": 1,
108
+ "cancelled": 1
109
+ },
110
+ "activeHandles": 14862,
111
+ "activeRequests": 3,
112
+ "memoryUsage": {
113
+ "rss": 487653376,
114
+ "heapUsed": 401283072,
115
+ "heapTotal": 460324864,
116
+ "external": 12419072
117
+ }
118
+ }
119
+ ```
120
+
121
+ 14,847 listeners on `statusChange` after 2 hours. This matches approximately 2 hours of request volume (~120 requests/minute = 14,400 requests). Each request is adding a listener and never removing it.
122
+
123
+ **Verdict**: CONFIRMED. Listener count grows 1:1 with requests.
124
+
125
+ ### Testing Hypothesis 3: Cache Without Eviction
126
+
127
+ **Method**: Add cache size to diagnostic endpoint.
128
+
129
+ ```javascript
130
+ const prefCache = require('./cache/notificationPreferences');
131
+ // Added to /_debug/listeners response:
132
+ cacheSize: prefCache.size,
133
+ ```
134
+
135
+ **Result**:
136
+ ```json
137
+ {
138
+ "cacheSize": 342
139
+ }
140
+ ```
141
+
142
+ Cache size is 342 entries (unique users). This is small and bounded by unique user count. Not the cause.
143
+
144
+ **Verdict**: REFUTED. Cache is small and not growing significantly.
145
+
146
+ ### Heap Snapshot Comparison
147
+
148
+ **Method**: Took two heap snapshots 30 minutes apart using `v8.writeHeapSnapshot()`.
149
+
150
+ **Comparison results** (filtered to top allocations):
151
+ ```
152
+ Constructor Count Delta Size Delta
153
+ ----------------------------------------------
154
+ (closure) +3,621 +14.2 MB
155
+ EventEmitter +3,621 +2.1 MB
156
+ Object +7,242 +4.8 MB
157
+ Array +3,621 +1.4 MB
158
+ ```
159
+
160
+ Closures and EventEmitter objects growing at the same rate — confirms listener accumulation.
161
+
162
+ ---
163
+
164
+ ## 4. Root Cause Confirmation
165
+
166
+ ### Root Cause
167
+ In `/src/features/notifications/orderStatusBroadcast.js`, a listener is registered on `orderEmitter` inside the Express request handler:
168
+
169
+ ```javascript
170
+ // BUG: This runs on EVERY request
171
+ router.post('/orders/:id/status', async (req, res) => {
172
+ const { id } = req.params;
173
+ const { status } = req.body;
174
+
175
+ await updateOrderStatus(id, status);
176
+
177
+ // This adds a NEW listener every time a request comes in
178
+ orderEmitter.on('statusChange', (data) => {
179
+ if (data.orderId === id) {
180
+ broadcastToSubscribers(data);
181
+ }
182
+ });
183
+
184
+ orderEmitter.emit('statusChange', { orderId: id, status });
185
+ res.json({ success: true });
186
+ });
187
+ ```
188
+
189
+ Each request adds a new listener that captures `id` in its closure. The listener is never removed. After thousands of requests, the emitter has thousands of listeners — each holding a reference to its closure scope, preventing garbage collection.
190
+
191
+ ### Why All Symptoms Are Explained
192
+ - **Memory growth ~20MB/hour**: Each listener + closure + captured scope ~1.5KB. At ~120 req/min = 7,200 req/hour = ~10.8MB/hour in listener objects alone, plus associated GC overhead and fragmentation.
193
+ - **p99 latency increase**: When `statusChange` fires, ALL accumulated listeners execute. After 48h of uptime, one emit triggers 300,000+ listener callbacks.
194
+ - **p50 stable**: Most requests complete before the emit, or the synchronous emit overhead is small at lower accumulation levels.
195
+ - **Correlates with request volume**: One listener added per request, independent of WebSocket connections.
196
+
197
+ ---
198
+
199
+ ## 5. Fix
200
+
201
+ ### Code Change
202
+
203
+ ```javascript
204
+ // FIXED: Register the broadcast listener ONCE at module level
205
+ const orderEmitter = require('./events/orderEvents').emitter;
206
+ const { broadcastToSubscribers } = require('./websocket/broadcast');
207
+
208
+ // Single listener registered at startup
209
+ orderEmitter.on('statusChange', (data) => {
210
+ broadcastToSubscribers(data);
211
+ });
212
+
213
+ router.post('/orders/:id/status', async (req, res) => {
214
+ const { id } = req.params;
215
+ const { status } = req.body;
216
+
217
+ await updateOrderStatus(id, status);
218
+ orderEmitter.emit('statusChange', { orderId: id, status });
219
+
220
+ res.json({ success: true });
221
+ });
222
+ ```
223
+
224
+ ### Changes Made
225
+ 1. Moved `orderEmitter.on('statusChange', ...)` out of the request handler to module scope (runs once at startup)
226
+ 2. Removed per-request `orderId` filtering from the listener — `broadcastToSubscribers` already handles routing to the correct WebSocket clients based on their subscriptions
227
+ 3. Added a regression test that verifies listener count stays constant after N requests
228
+
229
+ ### Regression Test
230
+ ```javascript
231
+ describe('orderStatusBroadcast', () => {
232
+ it('should not accumulate listeners across requests', async () => {
233
+ const orderEmitter = require('./events/orderEvents').emitter;
234
+ const initialCount = orderEmitter.listenerCount('statusChange');
235
+
236
+ // Simulate 100 requests
237
+ for (let i = 0; i < 100; i++) {
238
+ await request(app)
239
+ .post(`/orders/order-${i}/status`)
240
+ .send({ status: 'shipped' })
241
+ .expect(200);
242
+ }
243
+
244
+ const finalCount = orderEmitter.listenerCount('statusChange');
245
+ expect(finalCount).toBe(initialCount);
246
+ });
247
+ });
248
+ ```
249
+
250
+ ---
251
+
252
+ ## 6. Verification
253
+
254
+ ### Staging Verification
255
+ - Deployed fix to staging environment
256
+ - Ran 4-hour load test with production-equivalent traffic (120 req/min)
257
+ - Memory stable at 230-250MB throughout (no growth trend)
258
+ - Listener count stable at 3 (startup listeners only)
259
+ - p99 latency stable at 95-130ms throughout
260
+
261
+ ### Production Verification (72-hour observation)
262
+ ```
263
+ Time Since Deploy RSS Memory statusChange Listeners p99 Latency
264
+ ---------------------------------------------------------------------------
265
+ 0h (deploy) 198 MB 3 105 ms
266
+ 12h 237 MB 3 112 ms
267
+ 24h 241 MB 3 108 ms
268
+ 48h 239 MB 3 115 ms
269
+ 72h 244 MB 3 110 ms
270
+ ```
271
+
272
+ Memory stable at ~240MB. No growth trend. p99 latency normal. Zero OOM kills. Zero pod restarts.
273
+
274
+ ### Full Test Suite
275
+ All 847 tests passing. No regressions introduced.
276
+
277
+ ---
278
+
279
+ ## 7. Prevention Measures
280
+
281
+ ### Immediate
282
+ - Added `orderEmitter.setMaxListeners(10)` to catch future accumulation early (Node.js warns when exceeded)
283
+ - Added memory and listener count to the service's health check endpoint
284
+ - Added Datadog monitor: alert if any EventEmitter has more than 50 listeners
285
+
286
+ ### Process
287
+ - Added to code review checklist: "EventEmitter listeners must not be registered inside request handlers or loops"
288
+ - Added ESLint rule to flag `.on()` or `.addListener()` calls inside Express route handlers
289
+ - Updated onboarding documentation with this incident as a case study
290
+
291
+ ---
292
+
293
+ ## Appendix: Commands Used
294
+
295
+ ```bash
296
+ # Heap snapshot from running process
297
+ kill -USR2 <pid>
298
+
299
+ # Check listener counts via diagnostic endpoint
300
+ curl -s http://pod-3:3000/_debug/listeners | jq .
301
+
302
+ # Compare heap snapshots
303
+ # Used Chrome DevTools → Memory → Load snapshots → Comparison view
304
+
305
+ # Git bisect to confirm which commit introduced the bug
306
+ git bisect start
307
+ git bisect bad v2.14.0
308
+ git bisect good v2.13.0
309
+ # Result: commit a3f7c2e "Add WebSocket order status notifications"
310
+
311
+ # Load test the fix in staging
312
+ npx autocannon -c 50 -d 14400 -p 2 http://staging:3000/orders/test-1/status \
313
+ -m POST -H "Content-Type: application/json" -b '{"status":"shipped"}'
314
+ ```