@cubis/foundry 0.3.10 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/Ai Agent Workflow/powers/database-skills/POWER.md +15 -2
  2. package/Ai Agent Workflow/powers/database-skills/SKILL.md +26 -2
  3. package/Ai Agent Workflow/powers/database-skills/engines/mongodb/POWER.md +10 -0
  4. package/Ai Agent Workflow/powers/database-skills/engines/mysql/POWER.md +10 -0
  5. package/Ai Agent Workflow/powers/database-skills/engines/neki/POWER.md +10 -0
  6. package/Ai Agent Workflow/powers/database-skills/engines/postgres/POWER.md +10 -0
  7. package/Ai Agent Workflow/powers/database-skills/engines/redis/POWER.md +10 -0
  8. package/Ai Agent Workflow/powers/database-skills/engines/sqlite/POWER.md +10 -0
  9. package/Ai Agent Workflow/powers/database-skills/engines/supabase/POWER.md +10 -0
  10. package/Ai Agent Workflow/powers/database-skills/engines/vitess/POWER.md +10 -0
  11. package/Ai Agent Workflow/powers/database-skills/steering/readme.md +18 -6
  12. package/Ai Agent Workflow/skills/database-skills/LATEST_VERSIONS.md +36 -0
  13. package/Ai Agent Workflow/skills/database-skills/README.md +11 -2
  14. package/Ai Agent Workflow/skills/database-skills/SKILL.md +85 -20
  15. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/SKILL.md +29 -7
  16. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/aggregation.md +153 -0
  17. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/modeling.md +95 -4
  18. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/mongoose-nestjs.md +133 -4
  19. package/Ai Agent Workflow/skills/database-skills/skills/mysql/SKILL.md +33 -7
  20. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/locking-ddl.md +103 -4
  21. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/query-indexing.md +103 -4
  22. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/replication.md +142 -0
  23. package/Ai Agent Workflow/skills/database-skills/skills/neki/SKILL.md +18 -7
  24. package/Ai Agent Workflow/skills/database-skills/skills/neki/references/architecture.md +135 -4
  25. package/Ai Agent Workflow/skills/database-skills/skills/neki/references/operations.md +76 -4
  26. package/Ai Agent Workflow/skills/database-skills/skills/postgres/SKILL.md +31 -7
  27. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/connection-pooling.md +142 -0
  28. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/migrations.md +126 -0
  29. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/performance-ops.md +116 -4
  30. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/schema-indexing.md +78 -4
  31. package/Ai Agent Workflow/skills/database-skills/skills/redis/SKILL.md +28 -7
  32. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/cache-patterns.md +153 -4
  33. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/data-modeling.md +152 -0
  34. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/operations.md +143 -4
  35. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/SKILL.md +28 -7
  36. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/references/local-first.md +94 -4
  37. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/references/performance.md +104 -4
  38. package/Ai Agent Workflow/skills/database-skills/skills/supabase/SKILL.md +27 -7
  39. package/Ai Agent Workflow/skills/database-skills/skills/supabase/references/performance-operations.md +94 -4
  40. package/Ai Agent Workflow/skills/database-skills/skills/supabase/references/rls-auth.md +105 -4
  41. package/Ai Agent Workflow/skills/database-skills/skills/vitess/SKILL.md +27 -7
  42. package/Ai Agent Workflow/skills/database-skills/skills/vitess/references/operational-safety.md +104 -4
  43. package/Ai Agent Workflow/skills/database-skills/skills/vitess/references/sharding-routing.md +124 -4
  44. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/backend-specialist.md +1 -1
  45. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/database-architect.md +8 -1
  46. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/performance-optimizer.md +2 -0
  47. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/workflows/database.md +11 -6
  48. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/backend-specialist.md +1 -1
  49. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/database-architect.md +8 -1
  50. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/performance-optimizer.md +2 -0
  51. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/workflows/database.md +11 -6
  52. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/backend-specialist.md +1 -1
  53. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/database-architect.md +8 -1
  54. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/performance-optimizer.md +2 -0
  55. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/workflows/database.md +11 -6
  56. package/package.json +1 -1
@@ -1,5 +1,134 @@
1
- # Mongoose + NestJS
1
+ # MongoDB — Mongoose and NestJS Patterns
2
2
 
3
- - Keep repository boundaries clear.
4
- - Define schema validation and indexes in one place.
5
- - Validate query plans on production-like volumes.
3
+ ## Repository pattern with Mongoose + NestJS
4
+
5
+ Keep data access behind a repository class — don't scatter `Model.find()` calls across services.
6
+
7
+ ```ts
8
+ // order.repository.ts
9
+ @Injectable()
10
+ export class OrderRepository {
11
+ constructor(@InjectModel(Order.name) private model: Model<Order>) {}
12
+
13
+ async findByUser(userId: string, limit = 20, afterId?: string): Promise<Order[]> {
14
+ const query: FilterQuery<Order> = { userId };
15
+ if (afterId) query._id = { $gt: new Types.ObjectId(afterId) };
16
+ return this.model
17
+ .find(query)
18
+ .sort({ _id: 1 })
19
+ .limit(limit)
20
+ .lean() // return plain JS objects — skip Mongoose hydration overhead
21
+ .exec();
22
+ }
23
+
24
+ async create(dto: CreateOrderDto): Promise<Order> {
25
+ return this.model.create(dto);
26
+ }
27
+ }
28
+ ```
29
+
30
+ ## Schema index definition
31
+
32
+ Define indexes on the schema, not as ad-hoc calls. This makes them part of your codebase and reviewable.
33
+
34
+ ```ts
35
+ @Schema({ timestamps: true })
36
+ export class Order {
37
+ @Prop({ required: true, index: true })
38
+ userId: string;
39
+
40
+ @Prop({ required: true })
41
+ status: string;
42
+
43
+ @Prop({ type: [{ sku: String, qty: Number, price: Number }] })
44
+ lineItems: LineItem[];
45
+ }
46
+
47
+ // Compound index — define at schema level
48
+ OrderSchema.index({ userId: 1, status: 1, createdAt: -1 });
49
+
50
+ // TTL index — auto-delete after 90 days
51
+ OrderSchema.index({ createdAt: 1 }, { expireAfterSeconds: 60 * 60 * 24 * 90 });
52
+ ```
53
+
54
+ Always track index definitions in migration scripts when adding to existing collections.
55
+
56
+ ## Lean reads and projection
57
+
58
+ - `lean()` returns plain JS objects instead of Mongoose Document instances — no hydration overhead, no change tracking. Use for read paths.
59
+ - Always project only what you need to reduce transfer size.
60
+
61
+ ```ts
62
+ // Lean + projection for list endpoints
63
+ this.model
64
+ .find({ userId })
65
+ .select('status createdAt total') // project only needed fields
66
+ .lean()
67
+ .exec();
68
+
69
+ // Full document with Mongoose methods only when saving/updating
70
+ const doc = await this.model.findById(id).exec();
71
+ doc.status = 'complete';
72
+ await doc.save();
73
+ ```
74
+
75
+ ## Transactions (MongoDB 4.0+ replica set or sharded cluster)
76
+
77
+ ```ts
78
+ const session = await this.connection.startSession();
79
+ session.startTransaction();
80
+ try {
81
+ await this.orderModel.create([orderData], { session });
82
+ await this.inventoryModel.updateOne(
83
+ { sku: orderData.sku },
84
+ { $inc: { qty: -1 } },
85
+ { session }
86
+ );
87
+ await session.commitTransaction();
88
+ } catch (e) {
89
+ await session.abortTransaction();
90
+ throw e;
91
+ } finally {
92
+ session.endSession();
93
+ }
94
+ ```
95
+
96
+ Transactions are only needed for multi-document atomicity. Single-document operations are always atomic in MongoDB.
97
+
98
+ ## Aggregation pipeline in NestJS
99
+
100
+ ```ts
101
+ const result = await this.model.aggregate([
102
+ { $match: { userId, status: 'complete' } },
103
+ { $group: { _id: '$category', total: { $sum: '$amount' } } },
104
+ { $sort: { total: -1 } },
105
+ { $limit: 10 },
106
+ ]);
107
+ ```
108
+
109
+ Use `.aggregate()` for reporting/analytics. For regular queries, prefer `.find()` so Mongoose can apply schema type casting.
110
+
111
+ ## Connection and pool setup (NestJS module)
112
+
113
+ ```ts
114
+ MongooseModule.forRoot(uri, {
115
+ maxPoolSize: 10, // default 5 — tune to app concurrency
116
+ serverSelectionTimeoutMS: 5000,
117
+ socketTimeoutMS: 45000,
118
+ connectTimeoutMS: 10000,
119
+ })
120
+ ```
121
+
122
+ ## Common mistakes
123
+
124
+ - Calling `Model.find()` directly in service/controller — bypasses repository, untestable.
125
+ - Forgetting `.lean()` on list endpoints — returns Mongoose Documents with full overhead.
126
+ - Defining compound indexes ad-hoc in `onModuleInit` — use schema-level definition instead.
127
+ - Not projecting fields on list queries — transfers full documents when only 3 fields are needed.
128
+ - Using `new Model(data).save()` in a loop — batch with `Model.insertMany()` instead.
129
+
130
+ ## Sources
131
+ - Mongoose documentation: https://mongoosejs.com/docs/
132
+ - MongoDB index strategies: https://www.mongodb.com/docs/manual/indexes/
133
+ - MongoDB data modeling: https://www.mongodb.com/docs/manual/data-modeling/
134
+ - MongoDB transactions: https://www.mongodb.com/docs/manual/core/transactions/
@@ -1,15 +1,41 @@
1
1
  ---
2
2
  name: mysql
3
- description: MySQL/InnoDB schema design, indexing, query tuning, and operational safety.
3
+ description: MySQL/InnoDB schema design, indexing, pagination, query tuning, and operational safety.
4
4
  ---
5
5
 
6
6
  # MySQL
7
7
 
8
- Load references as needed:
8
+ ## Version posture
9
+
10
+ - Prefer **8.4 LTS** for long-lived production stability.
11
+ - Use **9.x Innovation** only when you need newest features and can absorb faster change cadence.
12
+
13
+ ## Optimization workflow
14
+
15
+ 1. Baseline with `EXPLAIN` and `EXPLAIN ANALYZE`.
16
+ 2. Tune indexes around dominant filter and sort paths.
17
+ 3. Validate pagination path (`ORDER BY` + index coverage).
18
+ 4. Evaluate DDL lock/replication impact before migration.
19
+
20
+ ## Indexing techniques
21
+
22
+ - Composite indexes that match predicate and ordering direction.
23
+ - Covering indexes for hot read endpoints.
24
+ - Keep clustered primary key narrow to reduce secondary index overhead.
25
+ - Avoid shotgun indexing; measure write amplification impact.
26
+
27
+ ## Pagination techniques
28
+
29
+ - Prefer seek/keyset pagination with deterministic ordering.
30
+ - Include unique tie-breaker for stable page boundaries.
31
+ - Avoid large offset pagination for deep traversal.
32
+
33
+ ## Operational guardrails
34
+
35
+ - Treat online DDL mode and lock behavior as explicit rollout risks.
36
+ - Test DDL on production-like data volume and replica topology.
37
+
38
+ ## References
39
+
9
40
  - `references/query-indexing.md`
10
41
  - `references/locking-ddl.md`
11
-
12
- Key rules:
13
- - Use `EXPLAIN` before optimization.
14
- - Prefer online-safe schema change plans.
15
- - Track lock waits and deadlocks during rollout.
@@ -1,5 +1,104 @@
1
- # MySQL Locking and DDL
1
+ # MySQL Locking and DDL Safety
2
2
 
3
- - Estimate lock impact before ALTER operations.
4
- - Use online DDL where possible.
5
- - Prepare fallback/rollback before production DDL.
3
+ ## Online DDL algorithms
4
+
5
+ MySQL InnoDB can perform many DDL operations without blocking reads/writes. Always check the algorithm before running in production.
6
+
7
+ | Algorithm | Write impact | When used |
8
+ | --- | --- | --- |
9
+ | `INSTANT` | None | Adding nullable columns at end (MySQL 8.0+), some metadata-only |
10
+ | `INPLACE` | No copy; may block briefly at start/end | Most index adds, some column modifications |
11
+ | `COPY` | Full table rewrite; blocks writes for duration | Changing primary key, column type changes, some charset changes |
12
+
13
+ Check before applying:
14
+ ```sql
15
+ ALTER TABLE orders ADD COLUMN notes TEXT, ALGORITHM=INPLACE, LOCK=NONE;
16
+ -- If MySQL rejects it, it needs COPY → use pt-online-schema-change or gh-ost
17
+ ```
18
+
19
+ Force dry-run check without applying:
20
+ ```sql
21
+ -- Will error if it can't do INPLACE, without touching the table
22
+ ALTER TABLE orders ADD INDEX idx_test (status), ALGORITHM=INPLACE, LOCK=NONE;
23
+ ```
24
+
25
+ ## Metadata lock (MDL) exposure
26
+
27
+ DDL acquires a Metadata Lock on the table. Even an `INSTANT` or `INPLACE` operation blocks if a long-running transaction or idle connection holds a conflicting MDL.
28
+
29
+ ```sql
30
+ -- Check for MDL waiters and holders before running DDL
31
+ SELECT r.trx_id waiting_trx_id, r.trx_mysql_thread_id waiting_thread,
32
+ b.trx_id blocking_trx_id, b.trx_mysql_thread_id blocking_thread,
33
+ b.trx_query blocking_query
34
+ FROM information_schema.innodb_lock_waits w
35
+ JOIN information_schema.innodb_trx b ON b.trx_id = w.blocking_trx_id
36
+ JOIN information_schema.innodb_trx r ON r.trx_id = w.requesting_trx_id;
37
+
38
+ -- Also check for long-running active transactions
39
+ SELECT * FROM information_schema.innodb_trx WHERE trx_started < NOW() - INTERVAL 30 SECOND;
40
+ ```
41
+
42
+ Kill blockers with caution before DDL:
43
+ ```sql
44
+ KILL <thread_id>; -- kills connection, rolls back its transaction
45
+ ```
46
+
47
+ ## Replication lag impact
48
+
49
+ - `COPY` algorithm: full table rewrite flows through binary log as row events — replica must replay every row.
50
+ - `INPLACE` lock-free DDL: usually light on replicas.
51
+ - Monitor `Seconds_Behind_Master` / `Seconds_Behind_Source` during DDL.
52
+
53
+ ```sql
54
+ -- On replica
55
+ SHOW REPLICA STATUS\G
56
+ -- Watch: Seconds_Behind_Source
57
+ ```
58
+
59
+ ## Online schema change tools
60
+
61
+ For tables too large or busy for native online DDL:
62
+ - **gh-ost** (GitHub): uses binlog streaming, minimal impact, best for production.
63
+ - **pt-online-schema-change** (Percona): trigger-based, established tooling.
64
+
65
+ Both create a shadow table, migrates data in background, then atomically cuts over with a brief lock.
66
+
67
+ ## InnoDB row-level locking
68
+
69
+ - InnoDB locks rows, not tables (except DDL).
70
+ - `SELECT ... FOR UPDATE` takes exclusive row locks — keep duration short.
71
+ - `REPEATABLE READ` (default) uses **gap locks** to prevent phantom reads; causes more lock contention than `READ COMMITTED`.
72
+ - Switch to `READ COMMITTED` for high-contention OLTP workloads:
73
+ ```sql
74
+ SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
75
+ ```
76
+
77
+ ## Deadlock handling
78
+
79
+ ```sql
80
+ -- Show last deadlock detail
81
+ SHOW ENGINE INNODB STATUS\G -- search for LATEST DETECTED DEADLOCK
82
+
83
+ -- Enable deadlock logging
84
+ SET GLOBAL innodb_print_all_deadlocks = ON;
85
+ ```
86
+
87
+ Prevention:
88
+ - Always access rows in a consistent order across transactions.
89
+ - Keep transactions short — do I/O and computation outside the transaction boundary.
90
+ - Retry with exponential backoff on error 1213 (`ER_LOCK_DEADLOCK`).
91
+
92
+ ## MySQL release tracks
93
+
94
+ | Track | Description |
95
+ | --- | --- |
96
+ | **LTS** (e.g. 8.4, 9.7+) | Long-term support; production recommended |
97
+ | **Innovation** (8.1, 8.2, etc.) | Frequent releases with new features, shorter support window |
98
+
99
+ Check which features are available for your version before proposing DDL changes.
100
+
101
+ ## Sources
102
+ - Online DDL operations: https://dev.mysql.com/doc/refman/8.4/en/innodb-online-ddl-operations.html
103
+ - InnoDB locking: https://dev.mysql.com/doc/refman/8.4/en/innodb-locking.html
104
+ - MySQL release tracks: https://dev.mysql.com/doc/refman/8.4/en/mysql-releases.html
@@ -1,5 +1,104 @@
1
- # MySQL Query and Indexing
1
+ # MySQL Query Optimization and Indexing
2
2
 
3
- - Build indexes by predicate order and sort usage.
4
- - Avoid broad scans in high-traffic paths.
5
- - Prefer cursor pagination over large OFFSET queries.
3
+ ## EXPLAIN workflow
4
+
5
+ Always run `EXPLAIN` (or `EXPLAIN ANALYZE`) before and after index changes.
6
+
7
+ ```sql
8
+ EXPLAIN SELECT * FROM orders WHERE user_id = 42 ORDER BY created_at DESC LIMIT 20;
9
+ EXPLAIN ANALYZE SELECT ...; -- MySQL 8.0+: shows actual row counts and timing
10
+ ```
11
+
12
+ Key columns to read:
13
+
14
+ | Column | Red flag |
15
+ | --- | --- |
16
+ | `type` | `ALL` = full table scan. Target: `ref`, `eq_ref`, `range`, or `const`. |
17
+ | `Extra` | `Using filesort` = no index satisfying ORDER BY. `Using temporary` = costly in-memory or on-disk sort. |
18
+ | `rows` | Estimated rows examined. Should be close to rows returned. |
19
+ | `key` | Which index MySQL chose. `NULL` = no index used. |
20
+
21
+ ## Composite index design (leftmost prefix rule)
22
+
23
+ - Column order is critical: **equality predicates first, then range, then sort**.
24
+ - The planner can use any leading prefix of the index — trailing columns after a range stop being used.
25
+ - **Good**: `(status, user_id, created_at)` for `WHERE status = 'open' AND user_id = 42 ORDER BY created_at`.
26
+ - **Bad**: `(created_at, status)` for `WHERE status = 'open'` — planner must scan the whole index.
27
+
28
+ ```sql
29
+ -- Supports: WHERE status = ? ORDER BY created_at
30
+ -- Supports: WHERE status = ? AND user_id = ?
31
+ CREATE INDEX idx_orders_status_user_created ON orders (status, user_id, created_at);
32
+ ```
33
+
34
+ ## Covering indexes
35
+
36
+ Include all selected columns in the index to avoid a heap row lookup (index-only read).
37
+
38
+ ```sql
39
+ -- Query: SELECT status, total FROM orders WHERE user_id = 42
40
+ CREATE INDEX idx_orders_user_covering ON orders (user_id) INCLUDE (status, total);
41
+ -- Or in older MySQL without INCLUDE, use a composite key that covers the columns
42
+ ```
43
+
44
+ Range predicates in the key stop the index from being used for subsequent columns — use `INCLUDE`-style or a separate index for those.
45
+
46
+ ## Seek (cursor) pagination — avoid OFFSET
47
+
48
+ `OFFSET N` forces MySQL to scan and discard N rows. On large tables this is catastrophically slow.
49
+
50
+ ```sql
51
+ -- BAD: OFFSET pagination
52
+ SELECT * FROM orders ORDER BY id LIMIT 20 OFFSET 10000;
53
+
54
+ -- GOOD: Seek pagination using last seen ID
55
+ SELECT * FROM orders WHERE id > :last_seen_id ORDER BY id LIMIT 20;
56
+
57
+ -- For composite sort keys
58
+ SELECT * FROM orders
59
+ WHERE (created_at, id) < (:last_created_at, :last_id)
60
+ ORDER BY created_at DESC, id DESC
61
+ LIMIT 20;
62
+ ```
63
+
64
+ ## Function calls on indexed columns break index usage
65
+
66
+ ```sql
67
+ -- BAD: function call prevents index use
68
+ SELECT * FROM users WHERE YEAR(created_at) = 2025;
69
+
70
+ -- GOOD: range predicate preserves index
71
+ SELECT * FROM users WHERE created_at >= '2025-01-01' AND created_at < '2026-01-01';
72
+ ```
73
+
74
+ ## Index maintenance
75
+
76
+ ```sql
77
+ -- Find unused indexes (after collecting stats for a while)
78
+ SELECT object_schema, object_name, index_name, count_read
79
+ FROM performance_schema.table_io_waits_summary_by_index_usage
80
+ WHERE count_read = 0 AND object_schema NOT IN ('mysql', 'sys', 'information_schema')
81
+ ORDER BY object_schema, object_name;
82
+
83
+ -- Check index sizes
84
+ SELECT table_name, index_name, stat_value * @@innodb_page_size / 1024 / 1024 AS size_mb
85
+ FROM mysql.innodb_index_stats
86
+ WHERE stat_name = 'size' AND database_name = DATABASE()
87
+ ORDER BY size_mb DESC;
88
+ ```
89
+
90
+ - Drop indexes with `count_read = 0` — every index adds write and lock overhead.
91
+ - Use `ALTER TABLE ... ADD INDEX ..., ALGORITHM=INPLACE, LOCK=NONE` for online index changes.
92
+
93
+ ## Key guardrails
94
+
95
+ - Avoid leading `%` in LIKE (`LIKE '%foo'`) — can't use a B-tree index.
96
+ - Avoid `OR` across different indexed columns — use `UNION ALL` instead.
97
+ - Avoid implicit type coercions in `WHERE` (e.g., `WHERE varchar_col = 123`) — breaks index usage.
98
+ - Batch large INSERTs (500–5000 rows per statement) to reduce per-statement overhead.
99
+
100
+ ## Sources
101
+ - Using EXPLAIN: https://dev.mysql.com/doc/refman/8.4/en/using-explain.html
102
+ - Optimization and indexes: https://dev.mysql.com/doc/refman/8.4/en/optimization-indexes.html
103
+ - LIMIT/OFFSET optimization: https://dev.mysql.com/doc/refman/8.4/en/limit-optimization.html
104
+ - performance_schema index stats: https://dev.mysql.com/doc/refman/8.4/en/table-io-waits-summary-by-index-usage-table.html
@@ -0,0 +1,142 @@
1
+ # MySQL — Replication
2
+
3
+ ## Replication basics
4
+
5
+ MySQL replication streams changes from a **source** (primary) to one or more **replicas** (secondaries) using the **binary log (binlog)**.
6
+
7
+ Common setups:
8
+ - **Single primary + read replicas**: route writes to primary, reads to replicas.
9
+ - **Group Replication / InnoDB Cluster**: multi-primary with automatic failover.
10
+ - **Semi-sync**: primary waits for at least one replica to acknowledge before commit.
11
+
12
+ ## Binary log formats
13
+
14
+ | Format | What it logs | Use for |
15
+ | --- | --- | --- |
16
+ | `ROW` (recommended) | Actual changed rows | Best consistency — exact row deltas |
17
+ | `STATEMENT` | SQL statements | Smaller binlog size, but non-deterministic functions (`NOW()`, `UUID()`) unsafe |
18
+ | `MIXED` | Statement by default, row for unsafe statements | Compromise |
19
+
20
+ ```sql
21
+ -- Check current format
22
+ SHOW VARIABLES LIKE 'binlog_format';
23
+
24
+ -- Set to ROW (recommended for most setups)
25
+ SET GLOBAL binlog_format = 'ROW';
26
+ ```
27
+
28
+ ## Check replication health
29
+
30
+ ```sql
31
+ -- On replica
32
+ SHOW REPLICA STATUS\G
33
+
34
+ -- Key fields to monitor:
35
+ -- Seconds_Behind_Source → replication lag in seconds (0 = caught up)
36
+ -- Replica_SQL_Running → YES (must be YES)
37
+ -- Replica_IO_Running → YES (must be YES)
38
+ -- Last_SQL_Error → empty = no error
39
+ -- Last_IO_Error → empty = no error
40
+ ```
41
+
42
+ Alert when `Seconds_Behind_Source > N` where N depends on your acceptable staleness (typically < 30s for OLTP).
43
+
44
+ ## GTID-based replication (MySQL 5.6+, recommended)
45
+
46
+ GTID (Global Transaction Identifier) gives every committed transaction a unique ID. Enables:
47
+ - Automatic failover without manually computing binlog coordinates.
48
+ - Easier replica promotion.
49
+
50
+ ```ini
51
+ # my.cnf on source and all replicas
52
+ gtid_mode = ON
53
+ enforce_gtid_consistency = ON
54
+ ```
55
+
56
+ ```sql
57
+ -- Check GTID executed set on replica
58
+ SHOW GLOBAL VARIABLES LIKE 'gtid_executed';
59
+ -- Should match source's gtid_executed when fully caught up
60
+ ```
61
+
62
+ ## Read replica routing
63
+
64
+ Route reads to replicas only for **eventually consistent** reads — data on the replica may be seconds behind the source.
65
+
66
+ ```ts
67
+ // Example: separate pools per role
68
+ const writePool = createPool({ host: PRIMARY_HOST });
69
+ const readPool = createPool({ host: REPLICA_HOST });
70
+
71
+ // Writes always go to primary
72
+ await writePool.query('INSERT INTO orders ...');
73
+
74
+ // Reads that can tolerate slight lag
75
+ const orders = await readPool.query('SELECT * FROM orders WHERE ...');
76
+ ```
77
+
78
+ **Never** route reads to replica for:
79
+ - Reading immediately after a write in the same request ("read your own writes").
80
+ - Writes that depend on current state (check-and-set patterns).
81
+
82
+ ## Replication lag and DDL impact
83
+
84
+ DDL with `COPY` algorithm generates a full table rewrite in the binlog — the replica must replay every row. This causes massive lag on large tables.
85
+
86
+ Best practices:
87
+ - Use `ALGORITHM=INPLACE, LOCK=NONE` for all DDL when possible.
88
+ - Schedule large `COPY`-algorithm DDL during off-peak.
89
+ - Monitor `Seconds_Behind_Source` during DDL and pause if lag grows.
90
+ - Consider gh-ost or pt-online-schema-change for zero-downtime DDL on replicas.
91
+
92
+ ## Binlog retention
93
+
94
+ ```sql
95
+ -- How long binlogs are kept (days)
96
+ SHOW VARIABLES LIKE 'binlog_expire_logs_seconds'; -- MySQL 8.0
97
+ SHOW VARIABLES LIKE 'expire_logs_days'; -- MySQL 5.7
98
+
99
+ -- Set retention (in seconds, MySQL 8.0)
100
+ SET GLOBAL binlog_expire_logs_seconds = 604800; -- 7 days
101
+ ```
102
+
103
+ Keep binlogs long enough to:
104
+ - Recover from a replica rebuild without a full dump.
105
+ - Support point-in-time recovery.
106
+ - Feed CDC (change data capture) consumers.
107
+
108
+ ## Semi-synchronous replication
109
+
110
+ Prevents data loss on primary crash at the cost of slightly higher write latency.
111
+
112
+ ```sql
113
+ -- Install and enable on source
114
+ INSTALL PLUGIN rpl_semi_sync_source SONAME 'semisync_source.so';
115
+ SET GLOBAL rpl_semi_sync_source_enabled = 1;
116
+
117
+ -- Install and enable on replica
118
+ INSTALL PLUGIN rpl_semi_sync_replica SONAME 'semisync_replica.so';
119
+ SET GLOBAL rpl_semi_sync_replica_enabled = 1;
120
+ ```
121
+
122
+ With semi-sync: source waits for at least one replica ACK per commit. If no replica ACKs within `rpl_semi_sync_source_timeout` ms, falls back to async automatically.
123
+
124
+ ## Monitoring replication in production
125
+
126
+ ```sql
127
+ -- Source: check active replica connections
128
+ SHOW PROCESSLIST; -- look for "Waiting for semi-sync ACK" or "Binlog Dump"
129
+
130
+ -- Replica: continuous lag check
131
+ SELECT lag.seconds_behind_source
132
+ FROM performance_schema.replication_applier_status_by_worker lag;
133
+
134
+ -- Source binlog position
135
+ SHOW BINARY LOG STATUS\G
136
+ -- File, Position — use for replica setup without GTID
137
+ ```
138
+
139
+ ## Sources
140
+ - Replication overview: https://dev.mysql.com/doc/refman/8.4/en/replication.html
141
+ - GTID-based replication: https://dev.mysql.com/doc/refman/8.4/en/replication-gtids.html
142
+ - Semi-sync replication: https://dev.mysql.com/doc/refman/8.4/en/replication-semisync.html
@@ -1,15 +1,26 @@
1
1
  ---
2
2
  name: neki
3
- description: Neki-oriented guidance for sharded Postgres planning, placement, and operational constraints.
3
+ description: Neki planning guidance for sharded Postgres architecture decisions and operational guardrails.
4
4
  ---
5
5
 
6
6
  # Neki
7
7
 
8
- Load references as needed:
8
+ Neki is currently pre-GA (announced and under active development), so guidance is architecture-first and risk-aware.
9
+
10
+ ## Planning workflow
11
+
12
+ 1. Define shard key, tenant locality, and cross-shard boundaries.
13
+ 2. Map query classes to expected shard-local or cross-shard paths.
14
+ 3. Define migration milestones and fallback checkpoints.
15
+ 4. Preserve compatibility path with current managed Postgres baseline.
16
+
17
+ ## Performance planning focus
18
+
19
+ - Prioritize shard-local access for hot request paths.
20
+ - Plan read/write amplification expectations early.
21
+ - Avoid hard assumptions about undocumented internals.
22
+
23
+ ## References
24
+
9
25
  - `references/architecture.md`
10
26
  - `references/operations.md`
11
-
12
- Key rules:
13
- - Treat shard boundaries as primary architecture decisions.
14
- - Model tenant/data locality early.
15
- - Validate failover and maintenance behavior before production.