@cubis/foundry 0.3.10 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/Ai Agent Workflow/powers/database-skills/POWER.md +15 -2
  2. package/Ai Agent Workflow/powers/database-skills/SKILL.md +26 -2
  3. package/Ai Agent Workflow/powers/database-skills/engines/mongodb/POWER.md +10 -0
  4. package/Ai Agent Workflow/powers/database-skills/engines/mysql/POWER.md +10 -0
  5. package/Ai Agent Workflow/powers/database-skills/engines/neki/POWER.md +10 -0
  6. package/Ai Agent Workflow/powers/database-skills/engines/postgres/POWER.md +10 -0
  7. package/Ai Agent Workflow/powers/database-skills/engines/redis/POWER.md +10 -0
  8. package/Ai Agent Workflow/powers/database-skills/engines/sqlite/POWER.md +10 -0
  9. package/Ai Agent Workflow/powers/database-skills/engines/supabase/POWER.md +10 -0
  10. package/Ai Agent Workflow/powers/database-skills/engines/vitess/POWER.md +10 -0
  11. package/Ai Agent Workflow/powers/database-skills/steering/readme.md +18 -6
  12. package/Ai Agent Workflow/skills/database-skills/LATEST_VERSIONS.md +36 -0
  13. package/Ai Agent Workflow/skills/database-skills/README.md +11 -2
  14. package/Ai Agent Workflow/skills/database-skills/SKILL.md +85 -20
  15. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/SKILL.md +29 -7
  16. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/aggregation.md +153 -0
  17. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/modeling.md +95 -4
  18. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/mongoose-nestjs.md +133 -4
  19. package/Ai Agent Workflow/skills/database-skills/skills/mysql/SKILL.md +33 -7
  20. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/locking-ddl.md +103 -4
  21. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/query-indexing.md +103 -4
  22. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/replication.md +142 -0
  23. package/Ai Agent Workflow/skills/database-skills/skills/neki/SKILL.md +18 -7
  24. package/Ai Agent Workflow/skills/database-skills/skills/neki/references/architecture.md +135 -4
  25. package/Ai Agent Workflow/skills/database-skills/skills/neki/references/operations.md +76 -4
  26. package/Ai Agent Workflow/skills/database-skills/skills/postgres/SKILL.md +31 -7
  27. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/connection-pooling.md +142 -0
  28. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/migrations.md +126 -0
  29. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/performance-ops.md +116 -4
  30. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/schema-indexing.md +78 -4
  31. package/Ai Agent Workflow/skills/database-skills/skills/redis/SKILL.md +28 -7
  32. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/cache-patterns.md +153 -4
  33. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/data-modeling.md +152 -0
  34. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/operations.md +143 -4
  35. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/SKILL.md +28 -7
  36. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/references/local-first.md +94 -4
  37. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/references/performance.md +104 -4
  38. package/Ai Agent Workflow/skills/database-skills/skills/supabase/SKILL.md +27 -7
  39. package/Ai Agent Workflow/skills/database-skills/skills/supabase/references/performance-operations.md +94 -4
  40. package/Ai Agent Workflow/skills/database-skills/skills/supabase/references/rls-auth.md +105 -4
  41. package/Ai Agent Workflow/skills/database-skills/skills/vitess/SKILL.md +27 -7
  42. package/Ai Agent Workflow/skills/database-skills/skills/vitess/references/operational-safety.md +104 -4
  43. package/Ai Agent Workflow/skills/database-skills/skills/vitess/references/sharding-routing.md +124 -4
  44. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/backend-specialist.md +1 -1
  45. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/database-architect.md +8 -1
  46. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/performance-optimizer.md +2 -0
  47. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/workflows/database.md +11 -6
  48. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/backend-specialist.md +1 -1
  49. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/database-architect.md +8 -1
  50. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/performance-optimizer.md +2 -0
  51. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/workflows/database.md +11 -6
  52. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/backend-specialist.md +1 -1
  53. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/database-architect.md +8 -1
  54. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/performance-optimizer.md +2 -0
  55. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/workflows/database.md +11 -6
  56. package/package.json +1 -1
@@ -1,5 +1,136 @@
1
- # Neki Architecture
1
+ # Neki Architecture and Pre-Sharding Design
2
2
 
3
- - Define shard key and data domain boundaries first.
4
- - Reduce cross-shard dependencies in transactional paths.
5
- - Map query ownership to shard topology.
3
+ ## What is Neki
4
+
5
+ Neki is **sharded Postgres** built by PlanetScale — the company behind Vitess (the MySQL sharding system used at YouTube scale). Neki brings the same horizontal scaling approach to the Postgres ecosystem.
6
+
7
+ > **Status as of early 2026**: Neki is not yet GA. Treat all behavioral assumptions as provisional until official docs stabilize. Re-verify after each preview update.
8
+
9
+ ## What Neki provides
10
+
11
+ - **Horizontal sharding**: Distributes data across multiple Postgres nodes. Applications scale beyond single-node limits without sharding logic in application code.
12
+ - **Managed by PlanetScale**: Operational experience from running Vitess at scale applied to Postgres.
13
+ - **High availability**: PlanetScale-grade uptime with automatic failover.
14
+ - **Postgres protocol compatibility**: Applications connect using standard Postgres drivers.
15
+
16
+ ## Architecture model (conceptual)
17
+
18
+ Neki is architecturally adjacent to Vitess outcomes, but is **not a Vitess fork**. It is built from first principles for Postgres:
19
+
20
+ - A **routing layer** (analogous to VTGate) intercepts queries and routes them to the correct shard.
21
+ - A **shard key** is chosen at schema design time and determines which node stores each row.
22
+ - Data is partitioned horizontally — each shard holds a subset of rows for every sharded table.
23
+ - **Reference tables** (small lookup data) are replicated to all shards.
24
+
25
+ ## Pre-sharding design checklist
26
+
27
+ Designing for Neki compatibility now means you won't need a painful migration later.
28
+
29
+ ### 1. Identify your shard key early
30
+
31
+ Choose based on real query patterns — not theoretical. The shard key should appear in every high-QPS `WHERE` clause.
32
+
33
+ Common choices: `tenant_id`, `org_id`, `user_id`, `account_id`.
34
+
35
+ The shard key must be:
36
+ - Present on every tenant-scoped table
37
+ - High cardinality (even distribution across shards)
38
+ - Immutable after insert (changing it requires data migration)
39
+
40
+ ### 2. Primary key design
41
+
42
+ ```sql
43
+ -- Good: single-column PK that is the shard key
44
+ CREATE TABLE users (user_id BIGINT PRIMARY KEY, ...);
45
+
46
+ -- Good: composite PK with shard key leading on child tables
47
+ CREATE TABLE orders (
48
+ user_id BIGINT NOT NULL,
49
+ id BIGINT GENERATED ALWAYS AS IDENTITY,
50
+ PRIMARY KEY (user_id, id)
51
+ );
52
+
53
+ -- Bad: shard key not leading
54
+ PRIMARY KEY (id, user_id)
55
+ ```
56
+
57
+ Use UUIDs (prefer UUIDv7 for sortability) or app-generated monotonic IDs — global sequences across shards are a coordination bottleneck.
58
+
59
+ ### 3. Co-locate joined tables
60
+
61
+ Tables frequently joined must share the same shard key and be co-located. Always include the shard key in join conditions.
62
+
63
+ ```sql
64
+ -- Correct: shard-local join
65
+ SELECT o.id, oi.product_id FROM orders o
66
+ JOIN order_items oi ON oi.user_id = o.user_id AND oi.order_id = o.id
67
+ WHERE o.user_id = $1;
68
+ ```
69
+
70
+ ### 4. Index design
71
+
72
+ Lead all indexes with the shard key. Scope unique constraints to include it.
73
+
74
+ ```sql
75
+ -- Correct
76
+ CREATE INDEX idx_orders_user_status ON orders (user_id, status, created_at);
77
+ ALTER TABLE orders ADD CONSTRAINT uq_order_number UNIQUE (user_id, order_number);
78
+
79
+ -- Incorrect: missing shard key in leading position
80
+ CREATE INDEX idx_orders_status ON orders (status, created_at);
81
+ ```
82
+
83
+ ### 5. Foreign keys
84
+
85
+ - FKs within the same shard key (co-located data) may be supported.
86
+ - Cross-shard-key FKs must become application-level enforcement before sharding.
87
+ - Audit all FKs before planning a Neki migration.
88
+
89
+ ### 6. Query patterns
90
+
91
+ Every query on sharded tables must include the shard key:
92
+
93
+ ```sql
94
+ -- Correct: routes to single shard
95
+ SELECT * FROM orders WHERE user_id = $1 AND status = 'pending';
96
+
97
+ -- Incorrect: scatter — hits all shards
98
+ SELECT * FROM orders WHERE status = 'pending';
99
+ ```
100
+
101
+ For lookups by a non-shard column, maintain a mapping table and harden it with backfill + miss-rate monitoring.
102
+
103
+ ### 7. Transactions
104
+
105
+ Keep transactions within a single shard key value. Cross-shard transactions require coordination and are significantly slower.
106
+
107
+ ### 8. Global aggregations
108
+
109
+ `COUNT(*)`, `SUM()` across all shards are expensive. Scope to shard key, or maintain pre-computed rollup tables for global stats.
110
+
111
+ ### 9. Reference tables
112
+
113
+ Small, rarely-changing lookup data (countries, currencies, feature flags ≲100K rows, rarely written, no tenant scoping) don't need a shard key — they get replicated to all shards.
114
+
115
+ ## Shard-readiness checklist
116
+
117
+ - [ ] Shard key identified and present on every tenant-scoped table
118
+ - [ ] Composite PKs with shard key leading; shard-safe IDs (UUIDv7 or app-generated)
119
+ - [ ] Shard key in all queries, indexes (leading position), and join conditions
120
+ - [ ] Unique constraints scoped to include shard key
121
+ - [ ] Cross-shard FKs audited; plan for app-level enforcement
122
+ - [ ] Transactions scoped to single shard-key value
123
+ - [ ] Global aggregations identified; rollup/async plan in place
124
+ - [ ] Migrations use online/revertible patterns — avoid long locks
125
+
126
+ ## When to evaluate Neki
127
+
128
+ - Single Postgres node is hitting CPU or storage limits under real load.
129
+ - Multi-tenant SaaS with tenant isolation requirements.
130
+ - Write volume exceeds what vertical scaling can address.
131
+
132
+ Always benchmark on production-like data volume before committing. Keep migration plans reversible and testable.
133
+
134
+ ## Sources
135
+ - Neki product page: https://www.neki.dev/
136
+ - PlanetScale announcement: https://planetscale.com/blog/announcing-neki
@@ -1,5 +1,77 @@
1
- # Neki Operations
1
+ # Neki — Operational Guidance
2
2
 
3
- - Monitor shard-level health and imbalance trends.
4
- - Plan shard migration/resharding as explicit projects.
5
- - Keep rollback criteria defined for every topology change.
3
+ > **Status as of early 2026**: Neki is pre-GA. All operational assumptions are provisional. Re-verify behavior after each preview or doc update from PlanetScale.
4
+
5
+ ## Migration planning principles
6
+
7
+ ### Keep it reversible
8
+
9
+ - Schema migrations should use online, non-blocking patterns (e.g., additive changes first, then backfill, then constraint).
10
+ - Never cut data into Neki from a single-node Postgres setup without a proven rollback path to the original.
11
+ - Stage the migration: dev → staging with production-like data → production with canary traffic.
12
+
13
+ ### Test with production-like data volume
14
+
15
+ Behavior under 100K rows can be dramatically different at 100M rows. Before committing:
16
+ 1. Restore a production snapshot to a staging Neki environment.
17
+ 2. Run your full query workload against it.
18
+ 3. Measure cross-shard scatter rate, latency p99, and aggregation performance.
19
+ 4. Validate that all queries include the shard key.
20
+
21
+ ### Platform lock-in decision criteria
22
+
23
+ Only commit to Neki when:
24
+ - Benchmark results on Neki staging match or exceed your current single-node Postgres.
25
+ - All high-QPS query paths are shard-key-scoped (no unresolved scatter queries).
26
+ - Application code connects via standard Postgres driver with no sharding logic — confirm no changes needed.
27
+ - A rollback path is documented and tested.
28
+
29
+ ## Connection setup
30
+
31
+ Neki exposes a standard Postgres protocol endpoint. Connect the same way as any managed Postgres service:
32
+
33
+ ```
34
+ host: <your-neki-host>
35
+ port: 5432
36
+ sslmode: require (always use TLS in production)
37
+ ```
38
+
39
+ No special driver needed. Use standard `pg`, `psycopg2`, `pgx`, etc.
40
+
41
+ ## Schema change workflow in Neki
42
+
43
+ > Full DDL behavior docs pending GA. Apply conservative practices:
44
+
45
+ 1. **Additive changes first**: add nullable columns without defaults before backfill-and-constrain.
46
+ 2. **Online migrations**: use tools like `pg-osc` patterns — shadow table, backfill, atomic cutover.
47
+ 3. **Test on staging** with a representative data subset before production.
48
+ 4. **Monitor replication lag** during migrations — pause if lag grows unexpectedly.
49
+
50
+ ## Monitoring
51
+
52
+ While Neki-specific observability tooling is not yet documented, apply standard Postgres monitoring:
53
+
54
+ ```sql
55
+ -- Active connections and query state
56
+ SELECT state, count(*) FROM pg_stat_activity GROUP BY state;
57
+
58
+ -- Slow queries (requires pg_stat_statements extension)
59
+ SELECT query, calls, total_exec_time, mean_exec_time
60
+ FROM pg_stat_statements
61
+ ORDER BY total_exec_time DESC
62
+ LIMIT 10;
63
+ ```
64
+
65
+ Additionally at the Neki platform level:
66
+ - Monitor per-shard query distribution — uneven distribution suggests a poor shard key choice.
67
+ - Track cross-shard query rate — high scatter rate is a signal to revisit schema or query design.
68
+
69
+ ## Guardrails
70
+
71
+ - **Never run destructive operations** (`DROP TABLE`, `TRUNCATE`, mass `DELETE`) without explicit user confirmation and a verified backup.
72
+ - **Avoid long-running transactions** — they block vacuum/maintenance on affected shards.
73
+ - **Validate before lock-in**: run a full workload benchmark on production-like data before treating Neki as the primary datastore.
74
+
75
+ ## Sources
76
+ - Neki product page: https://www.neki.dev/
77
+ - PlanetScale announcement: https://planetscale.com/blog/announcing-neki
@@ -1,15 +1,39 @@
1
1
  ---
2
2
  name: postgres
3
- description: PostgreSQL schema, indexing, query optimization, migrations, and operations.
3
+ description: PostgreSQL schema, indexing, pagination, query optimization, migrations, and operations.
4
4
  ---
5
5
 
6
6
  # Postgres
7
7
 
8
- Load references as needed:
8
+ ## Optimization workflow
9
+
10
+ 1. Baseline query with `EXPLAIN (ANALYZE, BUFFERS)`.
11
+ 2. Align index design to `WHERE + JOIN + ORDER BY` shape.
12
+ 3. Prefer keyset pagination for deep lists.
13
+ 4. Re-check planner stats (`ANALYZE`) and maintenance health (`VACUUM`, autovacuum behavior).
14
+ 5. Validate with production-like data skew.
15
+
16
+ ## Indexing techniques
17
+
18
+ - Multicolumn indexes for common combined predicates.
19
+ - Partial indexes for hot filtered subsets.
20
+ - `INCLUDE` columns for index-only scans.
21
+ - GIN for JSONB/search-like containment queries.
22
+ - BRIN for append-mostly time-series style tables.
23
+
24
+ ## Pagination techniques
25
+
26
+ - Prefer seek pagination: `WHERE (sort_col, id) > (...) ORDER BY sort_col, id LIMIT n`.
27
+ - Keep deterministic ordering with unique tie-breakers.
28
+ - Use offset only for shallow pages.
29
+
30
+ ## Performance guardrails
31
+
32
+ - Avoid unused indexes; they increase write and vacuum cost.
33
+ - Keep transactions short to reduce lock and bloat pressure.
34
+ - Validate any planner-sensitive change on realistic row counts.
35
+
36
+ ## References
37
+
9
38
  - `references/schema-indexing.md`
10
39
  - `references/performance-ops.md`
11
-
12
- Key rules:
13
- - Start with `EXPLAIN (ANALYZE, BUFFERS)`.
14
- - Design indexes from real query patterns.
15
- - Keep transactions short; monitor vacuum and bloat.
@@ -0,0 +1,142 @@
1
+ # Postgres — Connection Pooling
2
+
3
+ ## Why connection pooling is necessary
4
+
5
+ Postgres spawns one process per connection, each consuming ~5–10 MB of RAM. At 200+ direct connections:
6
+ - Memory pressure becomes significant.
7
+ - Context-switching overhead increases.
8
+ - Connection setup latency adds up (especially from serverless functions).
9
+
10
+ **Rule**: almost every Postgres deployment in production needs a connection pooler in front.
11
+
12
+ ## PgBouncer — the standard choice
13
+
14
+ PgBouncer is a lightweight, battle-tested TCP proxy for Postgres.
15
+
16
+ ### Pooling modes
17
+
18
+ | Mode | How it works | Use for |
19
+ | --- | --- | --- |
20
+ | **Transaction** (recommended) | A server connection is held only for the duration of a transaction | Stateless apps, serverless, most OLTP |
21
+ | **Session** | One server connection per client session until it disconnects | Apps that use session-level features (`SET`, `LISTEN`, advisory locks) |
22
+ | **Statement** | Returns connection after each statement | Only for apps that don't use multi-statement transactions — rare |
23
+
24
+ **Transaction mode caveat**: prepared statements are session-level in Postgres and break under transaction pooling. Use `pgbouncer_prepared_statements = 1` (PgBouncer 1.21+) or disable prepared statements in your client.
25
+
26
+ ### Typical PgBouncer configuration (`pgbouncer.ini`)
27
+
28
+ ```ini
29
+ [databases]
30
+ mydb = host=127.0.0.1 port=5432 dbname=mydb
31
+
32
+ [pgbouncer]
33
+ listen_addr = 0.0.0.0
34
+ listen_port = 6432
35
+ auth_type = scram-sha-256
36
+ auth_file = /etc/pgbouncer/userlist.txt
37
+ pool_mode = transaction
38
+ max_client_conn = 1000 ; max connections from apps to PgBouncer
39
+ default_pool_size = 20 ; max actual Postgres connections per database/user pair
40
+ reserve_pool_size = 5 ; extra connections for spikes
41
+ reserve_pool_timeout = 3
42
+ server_idle_timeout = 600
43
+ log_connections = 0 ; disable in production (log noise)
44
+ ```
45
+
46
+ ### Pool sizing formula
47
+
48
+ ```
49
+ default_pool_size ≈ (num_postgres_cpu_cores × 2) + num_spindle_disks
50
+ ```
51
+
52
+ For a 4-core managed Postgres: target ~10–15 server connections. App instances × client threads → PgBouncer → bounded Postgres connections.
53
+
54
+ ## Application-level connection pools
55
+
56
+ Even with PgBouncer, application clients should pool connections to PgBouncer (not open/close on each request).
57
+
58
+ ### Node.js — `pg` / `node-postgres`
59
+
60
+ ```ts
61
+ import { Pool } from 'pg';
62
+ const pool = new Pool({
63
+ connectionString: process.env.DATABASE_URL,
64
+ max: 10, // max connections from this app instance to PgBouncer
65
+ idleTimeoutMillis: 30000,
66
+ connectionTimeoutMillis: 5000,
67
+ });
68
+ // Use pool.query() directly or pool.connect() for transactions
69
+ ```
70
+
71
+ ### Python — SQLAlchemy
72
+
73
+ ```python
74
+ engine = create_engine(
75
+ DATABASE_URL,
76
+ pool_size=10,
77
+ max_overflow=5,
78
+ pool_pre_ping=True, # test connection health before use
79
+ pool_recycle=3600, # recycle connections every hour
80
+ )
81
+ ```
82
+
83
+ ### Prisma (Node.js)
84
+
85
+ ```
86
+ # .env
87
+ DATABASE_URL="postgresql://user:pass@pgbouncer-host:6432/mydb?pgbouncer=true"
88
+ ```
89
+
90
+ The `?pgbouncer=true` flag disables prepared statements, which is required for transaction pooling.
91
+
92
+ ## Serverless / edge environments
93
+
94
+ Serverless functions open and close connections per invocation — disastrous for direct Postgres connections.
95
+
96
+ Options:
97
+ 1. **PgBouncer in transaction mode** — each function call uses a pool connection only during its transaction.
98
+ 2. **Supabase Transaction Pooler** — managed PgBouncer built into Supabase (port 6543).
99
+ 3. **Neon serverless driver** — uses HTTP instead of TCP; no persistent connection overhead.
100
+
101
+ ```ts
102
+ // Neon serverless (HTTP-based, no connection overhead)
103
+ import { neon } from '@neondatabase/serverless';
104
+ const sql = neon(process.env.DATABASE_URL);
105
+ const orders = await sql`SELECT * FROM orders WHERE user_id = ${userId}`;
106
+ ```
107
+
108
+ ## Monitoring connections
109
+
110
+ ```sql
111
+ -- Current connection breakdown
112
+ SELECT state, count(*) FROM pg_stat_activity GROUP BY state ORDER BY count DESC;
113
+
114
+ -- Waiting connections (lock or connection wait)
115
+ SELECT pid, state, wait_event_type, wait_event, query
116
+ FROM pg_stat_activity
117
+ WHERE wait_event IS NOT NULL;
118
+
119
+ -- Max connections setting
120
+ SHOW max_connections;
121
+
122
+ -- Current utilization rate
123
+ SELECT count(*) * 100.0 / current_setting('max_connections')::int AS pct_used
124
+ FROM pg_stat_activity;
125
+ ```
126
+
127
+ Alert when `pct_used > 80%` — before you hit the limit.
128
+
129
+ ## Common mistakes
130
+
131
+ | Mistake | Fix |
132
+ | --- | --- |
133
+ | No pooler in serverless | Add PgBouncer or use HTTP driver |
134
+ | `max_connections` set too high on Postgres | Lower it and pool instead |
135
+ | Prepared statements in transaction pool mode | Disable at driver level |
136
+ | App pool size > PgBouncer pool size | App waits; PgBouncer has no server connections left |
137
+ | No `pool_pre_ping` / health check | Stale connections fail silently |
138
+
139
+ ## Sources
140
+ - PgBouncer documentation: https://www.pgbouncer.org/config.html
141
+ - PostgreSQL max_connections: https://www.postgresql.org/docs/current/runtime-config-connection.html
142
+ - Prisma PgBouncer guide: https://www.prisma.io/docs/orm/prisma-client/setup-and-configuration/databases/postgresql#pgbouncer
@@ -0,0 +1,126 @@
1
+ # Postgres — Database Migrations
2
+
3
+ ## Core principles
4
+
5
+ - Migrations must be **idempotent** where possible — safe to run more than once.
6
+ - Migrations must be **reversible** — always write a down migration.
7
+ - Every migration runs inside a transaction (unless it contains commands that can't be transactional, like `CREATE INDEX CONCURRENTLY`).
8
+ - Test on a staging environment with a recent production data dump before production.
9
+
10
+ ## Migration table (simple self-managed setup)
11
+
12
+ ```sql
13
+ CREATE TABLE IF NOT EXISTS _migrations (
14
+ id SERIAL PRIMARY KEY,
15
+ name TEXT NOT NULL UNIQUE,
16
+ applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
17
+ );
18
+ ```
19
+
20
+ ## Zero-downtime migration pattern (additive-first)
21
+
22
+ Never make breaking schema changes in the same deploy as the application code that depends on them. Expand, then contract.
23
+
24
+ ### Phase 1: Expand (add without breaking)
25
+ ```sql
26
+ -- Add new nullable column — safe, no lock, app can read NULL for old rows
27
+ ALTER TABLE orders ADD COLUMN notes TEXT;
28
+
29
+ -- Add new index concurrently — no write lock
30
+ CREATE INDEX CONCURRENTLY idx_orders_notes ON orders (notes) WHERE notes IS NOT NULL;
31
+ ```
32
+
33
+ ### Phase 2: Backfill (populate data)
34
+ ```sql
35
+ -- Backfill in batches — never in one big UPDATE that locks the table
36
+ DO $$
37
+ DECLARE batch_size INT := 1000;
38
+ last_id BIGINT := 0;
39
+ BEGIN
40
+ LOOP
41
+ UPDATE orders SET notes = '' WHERE id > last_id AND notes IS NULL
42
+ RETURNING id INTO last_id;
43
+ EXIT WHEN NOT FOUND;
44
+ PERFORM pg_sleep(0.01); -- brief pause between batches
45
+ END LOOP;
46
+ END $$;
47
+ ```
48
+
49
+ ### Phase 3: Constrain (enforce, after app deploys)
50
+ ```sql
51
+ -- Add NOT NULL only after backfill is complete and app always sets notes
52
+ -- Use VALIDATE CONSTRAINT to avoid a long table lock
53
+ ALTER TABLE orders ADD CONSTRAINT orders_notes_not_null CHECK (notes IS NOT NULL) NOT VALID;
54
+ ALTER TABLE orders VALIDATE CONSTRAINT orders_notes_not_null;
55
+ -- Later, replace with actual NOT NULL (requires table rewrite — schedule maintenance)
56
+ ALTER TABLE orders ALTER COLUMN notes SET NOT NULL;
57
+ ```
58
+
59
+ ## Safe vs unsafe DDL
60
+
61
+ | Operation | Safe online? | Notes |
62
+ | --- | --- | --- |
63
+ | Add nullable column | ✅ | Instant in Postgres 11+ |
64
+ | Add NOT NULL column with DEFAULT | ✅ Postgres 11+ | Older versions rewrite table |
65
+ | Drop column | ✅ | Marks column invisible; no immediate rewrite |
66
+ | Add index | ❌ (blocks writes) | Use `CREATE INDEX CONCURRENTLY` |
67
+ | Add UNIQUE constraint | ❌ | Create unique index concurrently first, then `ADD CONSTRAINT ... USING INDEX` |
68
+ | Rename column | ⚠️ | Breaking change — requires multi-phase deploy |
69
+ | Change column type | ❌ | Usually needs table rewrite; use additive approach |
70
+ | Drop table | ❌ | Irreversible without backup; ensure app no longer references it |
71
+
72
+ ## CREATE INDEX CONCURRENTLY
73
+
74
+ The only way to add an index without blocking writes:
75
+
76
+ ```sql
77
+ -- Run outside a transaction block (psql \c or separate connection)
78
+ CREATE INDEX CONCURRENTLY idx_orders_user_id ON orders (user_id);
79
+ ```
80
+
81
+ Caveats:
82
+ - Cannot run inside a transaction.
83
+ - Takes longer than regular `CREATE INDEX`.
84
+ - If it fails, leaves an `INVALID` index — drop it and retry:
85
+ ```sql
86
+ DROP INDEX CONCURRENTLY idx_orders_user_id;
87
+ ```
88
+
89
+ ## Renaming — multi-phase deploy
90
+
91
+ Never rename a column in a single deploy. The app will break.
92
+
93
+ ```
94
+ Phase 1: Add new column, dual-write in app to both old and new.
95
+ Phase 2: Backfill new column from old.
96
+ Phase 3: Deploy app to read from new column only.
97
+ Phase 4: Remove old column.
98
+ ```
99
+
100
+ ## Migration tools
101
+
102
+ | Tool | Language | Notes |
103
+ | --- | --- | --- |
104
+ | **Flyway** | Java / CLI | SQL-based, version numbered, popular in enterprise |
105
+ | **Liquibase** | Java / CLI | XML/YAML/SQL, rollback built in |
106
+ | **golang-migrate** | Go / CLI | Simple, SQL-based, widely used in Go projects |
107
+ | **Alembic** | Python | SQLAlchemy-integrated, autogenerate support |
108
+ | **Prisma Migrate** | Node.js | Generates SQL from schema diff, dev-friendly |
109
+ | **Drizzle** | Node.js | TypeScript-first, explicit SQL migrations |
110
+
111
+ For any tool: always store migration files in version control and review them in PRs.
112
+
113
+ ## Production checklist before running a migration
114
+
115
+ - [ ] Tested on staging with production data size.
116
+ - [ ] Estimated lock duration checked (`EXPLAIN` or timing on staging).
117
+ - [ ] `CREATE INDEX CONCURRENTLY` used for any new indexes.
118
+ - [ ] Down migration written and tested.
119
+ - [ ] Monitoring dashboard open during migration.
120
+ - [ ] Rollback plan documented.
121
+ - [ ] Maintenance window scheduled if migration is non-online.
122
+
123
+ ## Sources
124
+ - ALTER TABLE: https://www.postgresql.org/docs/current/sql-altertable.html
125
+ - CREATE INDEX CONCURRENTLY: https://www.postgresql.org/docs/current/sql-createindex.html#SQL-CREATEINDEX-CONCURRENTLY
126
+ - Zero-downtime schema changes: https://www.postgresql.org/docs/current/ddl-alter.html
@@ -1,5 +1,117 @@
1
- # Postgres Performance and Operations
1
+ # Postgres Performance and Operations
2
2
 
3
- - Use pg_stat_statements to find high total-cost queries.
4
- - Keep autovacuum healthy and monitor long transactions.
5
- - Use connection pooling for serverless/runtime bursts.
3
+ ## EXPLAIN workflow
4
+
5
+ Always baseline with `EXPLAIN (ANALYZE, BUFFERS)` before and after any change.
6
+
7
+ ```sql
8
+ EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
9
+ SELECT * FROM orders WHERE user_id = 42 ORDER BY created_at DESC LIMIT 20;
10
+ ```
11
+
12
+ Key things to read:
13
+ - **Actual vs estimated rows**: large mismatch → run `ANALYZE` on the table.
14
+ - **`Buffers: shared hit / read`**: high `read` → data not cached, I/O bound.
15
+ - **`Seq Scan`** on a large table with a filter → likely missing index.
16
+ - **`Hash Join` vs `Nested Loop`**: nested loop is fast with small inner set; hash join is better for large sets.
17
+ - **`Sort` + `Limit`**: if sorting before limiting, consider an index with matching sort order.
18
+
19
+ ## pg_stat_statements
20
+
21
+ Tracks cumulative stats for every query shape. Use to find top queries by total time.
22
+
23
+ ```sql
24
+ -- Enable once per cluster
25
+ CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
26
+
27
+ -- Top 10 queries by total execution time
28
+ SELECT query, calls, total_exec_time::int, mean_exec_time::int, rows
29
+ FROM pg_stat_statements
30
+ ORDER BY total_exec_time DESC
31
+ LIMIT 10;
32
+
33
+ -- Reset stats after tuning
34
+ SELECT pg_stat_statements_reset();
35
+ ```
36
+
37
+ ## ANALYZE — keeping planner stats fresh
38
+
39
+ Postgres uses per-column statistics (histograms, MCVs) to estimate row counts. Stale stats = bad plans.
40
+
41
+ ```sql
42
+ ANALYZE orders; -- single table
43
+ ANALYZE VERBOSE orders; -- with output
44
+ ANALYZE; -- whole database
45
+ ```
46
+
47
+ - `autovacuum` runs `ANALYZE` automatically when ~10% of rows change. For bulk loads, run manually.
48
+ - Increase `default_statistics_target` (default 100) for columns with skewed distribution:
49
+ ```sql
50
+ ALTER TABLE orders ALTER COLUMN status SET STATISTICS 500;
51
+ ANALYZE orders;
52
+ ```
53
+
54
+ ## VACUUM and autovacuum
55
+
56
+ Postgres uses MVCC — dead tuples accumulate from UPDATEs and DELETEs. VACUUM reclaims them.
57
+
58
+ ```sql
59
+ VACUUM orders; -- reclaim dead tuples (non-blocking)
60
+ VACUUM ANALYZE orders; -- reclaim + refresh stats
61
+ VACUUM FULL orders; -- rewrite table, reclaims disk — needs exclusive lock, use cautiously
62
+ ```
63
+
64
+ Signs of autovacuum not keeping up:
65
+ ```sql
66
+ -- Tables with high dead tuple counts
67
+ SELECT relname, n_dead_tup, n_live_tup, last_autovacuum
68
+ FROM pg_stat_user_tables
69
+ ORDER BY n_dead_tup DESC;
70
+ ```
71
+
72
+ Tuning autovacuum for hot tables:
73
+ ```sql
74
+ ALTER TABLE orders SET (
75
+ autovacuum_vacuum_scale_factor = 0.01, -- default 0.2 — trigger earlier
76
+ autovacuum_analyze_scale_factor = 0.005
77
+ );
78
+ ```
79
+
80
+ ## Connection pooling
81
+
82
+ Postgres spawns one process per connection. At ~200+ connections, overhead is significant.
83
+ - Use **PgBouncer** (transaction pooling) to multiplex app connections.
84
+ - Size pool to available CPU cores × 2–4. Monitor `pg_stat_activity`.
85
+
86
+ ```sql
87
+ -- Active connections breakdown
88
+ SELECT state, count(*) FROM pg_stat_activity GROUP BY state;
89
+
90
+ -- Long-running queries
91
+ SELECT pid, now() - query_start AS duration, query, state
92
+ FROM pg_stat_activity
93
+ WHERE state != 'idle' AND query_start < now() - interval '30 seconds';
94
+ ```
95
+
96
+ ## Lock monitoring
97
+
98
+ ```sql
99
+ -- Blocked queries and what is blocking them
100
+ SELECT blocked.pid, blocked.query, blocking.pid AS blocking_pid, blocking.query AS blocking_query
101
+ FROM pg_stat_activity blocked
102
+ JOIN pg_stat_activity blocking ON blocking.pid = ANY(pg_blocking_pids(blocked.pid))
103
+ WHERE cardinality(pg_blocking_pids(blocked.pid)) > 0;
104
+ ```
105
+
106
+ ## Key production guardrails
107
+
108
+ - Never run `VACUUM FULL` on a busy production table — takes an exclusive lock.
109
+ - Use `CREATE INDEX CONCURRENTLY` to avoid write blocks.
110
+ - Set `statement_timeout` and `lock_timeout` to prevent runaway queries from starving the system.
111
+ - Avoid long-open transactions — they block autovacuum and cause bloat.
112
+
113
+ ## Sources
114
+ - EXPLAIN: https://www.postgresql.org/docs/current/using-explain.html
115
+ - pg_stat_statements: https://www.postgresql.org/docs/current/pgstatstatements.html
116
+ - ANALYZE: https://www.postgresql.org/docs/current/sql-analyze.html
117
+ - VACUUM / autovacuum: https://www.postgresql.org/docs/current/routine-vacuuming.html