codingbuddy-rules 2.4.2 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.ai-rules/CHANGELOG.md +122 -0
- package/.ai-rules/agents/README.md +527 -11
- package/.ai-rules/agents/accessibility-specialist.json +0 -1
- package/.ai-rules/agents/act-mode.json +0 -1
- package/.ai-rules/agents/agent-architect.json +0 -1
- package/.ai-rules/agents/ai-ml-engineer.json +0 -1
- package/.ai-rules/agents/architecture-specialist.json +14 -2
- package/.ai-rules/agents/backend-developer.json +14 -2
- package/.ai-rules/agents/code-quality-specialist.json +0 -1
- package/.ai-rules/agents/data-engineer.json +0 -1
- package/.ai-rules/agents/devops-engineer.json +24 -2
- package/.ai-rules/agents/documentation-specialist.json +0 -1
- package/.ai-rules/agents/eval-mode.json +0 -1
- package/.ai-rules/agents/event-architecture-specialist.json +719 -0
- package/.ai-rules/agents/frontend-developer.json +14 -2
- package/.ai-rules/agents/i18n-specialist.json +0 -1
- package/.ai-rules/agents/integration-specialist.json +11 -1
- package/.ai-rules/agents/migration-specialist.json +676 -0
- package/.ai-rules/agents/mobile-developer.json +0 -1
- package/.ai-rules/agents/observability-specialist.json +747 -0
- package/.ai-rules/agents/performance-specialist.json +24 -2
- package/.ai-rules/agents/plan-mode.json +0 -1
- package/.ai-rules/agents/platform-engineer.json +0 -1
- package/.ai-rules/agents/security-specialist.json +27 -16
- package/.ai-rules/agents/seo-specialist.json +0 -1
- package/.ai-rules/agents/solution-architect.json +0 -1
- package/.ai-rules/agents/technical-planner.json +0 -1
- package/.ai-rules/agents/test-strategy-specialist.json +14 -2
- package/.ai-rules/agents/ui-ux-designer.json +0 -1
- package/.ai-rules/rules/core.md +25 -0
- package/.ai-rules/skills/README.md +35 -0
- package/.ai-rules/skills/database-migration/SKILL.md +531 -0
- package/.ai-rules/skills/database-migration/expand-contract-patterns.md +314 -0
- package/.ai-rules/skills/database-migration/large-scale-migration.md +414 -0
- package/.ai-rules/skills/database-migration/rollback-strategies.md +359 -0
- package/.ai-rules/skills/database-migration/validation-procedures.md +428 -0
- package/.ai-rules/skills/dependency-management/SKILL.md +381 -0
- package/.ai-rules/skills/dependency-management/license-compliance.md +282 -0
- package/.ai-rules/skills/dependency-management/lock-file-management.md +437 -0
- package/.ai-rules/skills/dependency-management/major-upgrade-guide.md +292 -0
- package/.ai-rules/skills/dependency-management/security-vulnerability-response.md +230 -0
- package/.ai-rules/skills/incident-response/SKILL.md +373 -0
- package/.ai-rules/skills/incident-response/communication-templates.md +322 -0
- package/.ai-rules/skills/incident-response/escalation-matrix.md +347 -0
- package/.ai-rules/skills/incident-response/postmortem-template.md +351 -0
- package/.ai-rules/skills/incident-response/severity-classification.md +256 -0
- package/.ai-rules/skills/performance-optimization/CREATION-LOG.md +87 -0
- package/.ai-rules/skills/performance-optimization/SKILL.md +76 -0
- package/.ai-rules/skills/performance-optimization/documentation-template.md +70 -0
- package/.ai-rules/skills/pr-review/SKILL.md +768 -0
- package/.ai-rules/skills/refactoring/SKILL.md +192 -0
- package/.ai-rules/skills/refactoring/refactoring-catalog.md +1377 -0
- package/package.json +1 -1
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
# Expand-Contract Patterns
|
|
2
|
+
|
|
3
|
+
Zero-downtime migration techniques for production database changes.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The Expand-Contract pattern (also called Parallel Change) enables schema changes without downtime by:
|
|
8
|
+
1. **Expand**: Add new structure alongside old
|
|
9
|
+
2. **Migrate**: Move data gradually
|
|
10
|
+
3. **Contract**: Remove old structure after verification
|
|
11
|
+
|
|
12
|
+
## When to Use Expand-Contract
|
|
13
|
+
|
|
14
|
+
| Scenario | Direct Change | Expand-Contract |
|
|
15
|
+
|----------|---------------|-----------------|
|
|
16
|
+
| Add nullable column | OK | Overkill |
|
|
17
|
+
| Add NOT NULL column | Risky | Recommended |
|
|
18
|
+
| Rename column | Impossible | Required |
|
|
19
|
+
| Change column type | Risky | Required |
|
|
20
|
+
| Split table | Impossible | Required |
|
|
21
|
+
| Remove column | OK (if unused) | Recommended |
|
|
22
|
+
|
|
23
|
+
## Pattern 1: Adding NOT NULL Column
|
|
24
|
+
|
|
25
|
+
**Problem:** Adding NOT NULL column requires default or backfill, which locks table.
|
|
26
|
+
|
|
27
|
+
**Solution: Three-Phase Approach**
|
|
28
|
+
|
|
29
|
+
### Phase 1: Expand (Add Nullable)
|
|
30
|
+
```sql
|
|
31
|
+
-- Add as nullable first (instant, no lock)
|
|
32
|
+
ALTER TABLE users ADD COLUMN email_verified BOOLEAN;
|
|
33
|
+
|
|
34
|
+
-- Application: Write to new column, read from both
|
|
35
|
+
-- Code: user.email_verified ?? user.legacy_verified
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Phase 2: Migrate (Backfill)
|
|
39
|
+
```sql
|
|
40
|
+
-- Backfill in batches (no lock)
|
|
41
|
+
UPDATE users
|
|
42
|
+
SET email_verified = COALESCE(legacy_verified, false)
|
|
43
|
+
WHERE email_verified IS NULL
|
|
44
|
+
AND id BETWEEN :start AND :end;
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Phase 3: Contract (Add Constraint)
|
|
48
|
+
```sql
|
|
49
|
+
-- After backfill complete, add NOT NULL
|
|
50
|
+
ALTER TABLE users ALTER COLUMN email_verified SET NOT NULL;
|
|
51
|
+
|
|
52
|
+
-- Remove old column (if replacing)
|
|
53
|
+
ALTER TABLE users DROP COLUMN legacy_verified;
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Application Changes:**
|
|
57
|
+
1. Deploy: Write to both columns
|
|
58
|
+
2. After Phase 2: Switch reads to new column
|
|
59
|
+
3. After Phase 3: Remove old column references
|
|
60
|
+
|
|
61
|
+
## Pattern 2: Renaming Column
|
|
62
|
+
|
|
63
|
+
**Problem:** Renaming column breaks existing queries instantly.
|
|
64
|
+
|
|
65
|
+
**Solution: Shadow Column**
|
|
66
|
+
|
|
67
|
+
### Phase 1: Add New Column
|
|
68
|
+
```sql
|
|
69
|
+
ALTER TABLE products ADD COLUMN product_name VARCHAR(255);
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Phase 2: Dual-Write Trigger
|
|
73
|
+
```sql
|
|
74
|
+
-- Trigger to sync old -> new
|
|
75
|
+
CREATE TRIGGER sync_product_name
|
|
76
|
+
BEFORE INSERT OR UPDATE ON products
|
|
77
|
+
FOR EACH ROW
|
|
78
|
+
EXECUTE FUNCTION sync_columns('name', 'product_name');
|
|
79
|
+
|
|
80
|
+
-- Backfill existing data
|
|
81
|
+
UPDATE products SET product_name = name WHERE product_name IS NULL;
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Phase 3: Application Migration
|
|
85
|
+
```
|
|
86
|
+
1. Deploy: Read from new, write to both
|
|
87
|
+
2. Verify: All reads using new column
|
|
88
|
+
3. Remove trigger and old column
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Phase 4: Contract
|
|
92
|
+
```sql
|
|
93
|
+
DROP TRIGGER sync_product_name ON products;
|
|
94
|
+
ALTER TABLE products DROP COLUMN name;
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Pattern 3: Changing Column Type
|
|
98
|
+
|
|
99
|
+
**Problem:** Type change may require table rewrite and data transformation.
|
|
100
|
+
|
|
101
|
+
**Solution: Shadow Column with Transform**
|
|
102
|
+
|
|
103
|
+
### Example: VARCHAR(50) to TEXT with validation
|
|
104
|
+
|
|
105
|
+
### Phase 1: Add New Column
|
|
106
|
+
```sql
|
|
107
|
+
ALTER TABLE comments ADD COLUMN content_v2 TEXT;
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Phase 2: Backfill with Transform
|
|
111
|
+
```sql
|
|
112
|
+
-- Batch update with transformation
|
|
113
|
+
UPDATE comments
|
|
114
|
+
SET content_v2 = TRIM(content)
|
|
115
|
+
WHERE content_v2 IS NULL
|
|
116
|
+
AND id BETWEEN :start AND :end;
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Phase 3: Verify and Switch
|
|
120
|
+
```sql
|
|
121
|
+
-- Verify all data migrated
|
|
122
|
+
SELECT COUNT(*) FROM comments WHERE content_v2 IS NULL AND content IS NOT NULL;
|
|
123
|
+
|
|
124
|
+
-- Application: Switch to new column
|
|
125
|
+
-- Rename in next migration
|
|
126
|
+
ALTER TABLE comments DROP COLUMN content;
|
|
127
|
+
ALTER TABLE comments RENAME COLUMN content_v2 TO content;
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Pattern 4: Splitting Tables
|
|
131
|
+
|
|
132
|
+
**Problem:** Monolithic table needs to be split for performance or normalization.
|
|
133
|
+
|
|
134
|
+
**Solution: Gradual Extraction**
|
|
135
|
+
|
|
136
|
+
### Example: Extract `user_preferences` from `users`
|
|
137
|
+
|
|
138
|
+
### Phase 1: Create New Table
|
|
139
|
+
```sql
|
|
140
|
+
CREATE TABLE user_preferences (
|
|
141
|
+
user_id BIGINT PRIMARY KEY REFERENCES users(id),
|
|
142
|
+
theme VARCHAR(50),
|
|
143
|
+
notifications_enabled BOOLEAN,
|
|
144
|
+
language VARCHAR(10)
|
|
145
|
+
);
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Phase 2: Dual-Write
|
|
149
|
+
```sql
|
|
150
|
+
-- Trigger on users to sync to new table
|
|
151
|
+
CREATE TRIGGER sync_user_preferences
|
|
152
|
+
AFTER INSERT OR UPDATE ON users
|
|
153
|
+
FOR EACH ROW
|
|
154
|
+
EXECUTE FUNCTION sync_to_preferences();
|
|
155
|
+
|
|
156
|
+
-- Backfill existing data
|
|
157
|
+
INSERT INTO user_preferences (user_id, theme, notifications_enabled, language)
|
|
158
|
+
SELECT id, theme, notifications_enabled, language
|
|
159
|
+
FROM users
|
|
160
|
+
ON CONFLICT (user_id) DO NOTHING;
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Phase 3: Application Migration
|
|
164
|
+
```
|
|
165
|
+
1. Read from user_preferences (with fallback to users)
|
|
166
|
+
2. Write to both tables
|
|
167
|
+
3. Remove fallback after verification
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Phase 4: Contract
|
|
171
|
+
```sql
|
|
172
|
+
DROP TRIGGER sync_user_preferences ON users;
|
|
173
|
+
ALTER TABLE users DROP COLUMN theme;
|
|
174
|
+
ALTER TABLE users DROP COLUMN notifications_enabled;
|
|
175
|
+
ALTER TABLE users DROP COLUMN language;
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Pattern 5: Online Index Creation
|
|
179
|
+
|
|
180
|
+
**Problem:** CREATE INDEX locks table for writes.
|
|
181
|
+
|
|
182
|
+
**Solution: CONCURRENTLY option (PostgreSQL)**
|
|
183
|
+
|
|
184
|
+
```sql
|
|
185
|
+
-- Non-blocking index creation
|
|
186
|
+
CREATE INDEX CONCURRENTLY idx_users_email ON users(email);
|
|
187
|
+
|
|
188
|
+
-- Note: Takes longer, but no locks
|
|
189
|
+
-- Note: Can fail, leaving invalid index
|
|
190
|
+
-- Always verify:
|
|
191
|
+
SELECT * FROM pg_indexes WHERE indexname = 'idx_users_email';
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
**MySQL Online DDL:**
|
|
195
|
+
```sql
|
|
196
|
+
ALTER TABLE users ADD INDEX idx_email (email), ALGORITHM=INPLACE, LOCK=NONE;
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Pattern 6: Foreign Key Addition
|
|
200
|
+
|
|
201
|
+
**Problem:** Adding FK requires validation scan, blocking writes.
|
|
202
|
+
|
|
203
|
+
**Solution: NOT VALID + VALIDATE SEPARATELY**
|
|
204
|
+
|
|
205
|
+
### PostgreSQL
|
|
206
|
+
```sql
|
|
207
|
+
-- Add FK without validation (instant)
|
|
208
|
+
ALTER TABLE orders
|
|
209
|
+
ADD CONSTRAINT fk_orders_user
|
|
210
|
+
FOREIGN KEY (user_id) REFERENCES users(id)
|
|
211
|
+
NOT VALID;
|
|
212
|
+
|
|
213
|
+
-- Validate in background (non-blocking)
|
|
214
|
+
ALTER TABLE orders VALIDATE CONSTRAINT fk_orders_user;
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### MySQL
|
|
218
|
+
```sql
|
|
219
|
+
-- Disable FK checks temporarily
|
|
220
|
+
SET FOREIGN_KEY_CHECKS = 0;
|
|
221
|
+
|
|
222
|
+
ALTER TABLE orders
|
|
223
|
+
ADD CONSTRAINT fk_orders_user
|
|
224
|
+
FOREIGN KEY (user_id) REFERENCES users(id);
|
|
225
|
+
|
|
226
|
+
SET FOREIGN_KEY_CHECKS = 1;
|
|
227
|
+
|
|
228
|
+
-- Run integrity check manually
|
|
229
|
+
SELECT o.id, o.user_id
|
|
230
|
+
FROM orders o
|
|
231
|
+
LEFT JOIN users u ON o.user_id = u.id
|
|
232
|
+
WHERE u.id IS NULL;
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
## Duration Guidelines
|
|
236
|
+
|
|
237
|
+
| Table Size | Expand Phase | Migrate Phase | Contract Phase |
|
|
238
|
+
|------------|--------------|---------------|----------------|
|
|
239
|
+
| <100K rows | Instant | <1 min | Instant |
|
|
240
|
+
| 100K-1M | Instant | 5-15 min | <1 min |
|
|
241
|
+
| 1M-10M | Instant | 1-4 hours | 5-15 min |
|
|
242
|
+
| 10M-100M | Instant | 4-24 hours | 1-4 hours |
|
|
243
|
+
| >100M | Instant | Days (batched) | 4-24 hours |
|
|
244
|
+
|
|
245
|
+
## Application Coordination
|
|
246
|
+
|
|
247
|
+
### Feature Flags for Migration
|
|
248
|
+
```typescript
|
|
249
|
+
// Read with fallback
|
|
250
|
+
function getUserEmail(user: User): string {
|
|
251
|
+
if (featureFlags.useNewEmailColumn) {
|
|
252
|
+
return user.email_normalized ?? user.email;
|
|
253
|
+
}
|
|
254
|
+
return user.email;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Write to both
|
|
258
|
+
async function updateUserEmail(userId: string, email: string) {
|
|
259
|
+
await db.user.update({
|
|
260
|
+
where: { id: userId },
|
|
261
|
+
data: {
|
|
262
|
+
email: email,
|
|
263
|
+
email_normalized: normalizeEmail(email),
|
|
264
|
+
},
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
### Rollout Strategy
|
|
270
|
+
1. **10% traffic**: Test new column reads
|
|
271
|
+
2. **50% traffic**: Verify performance
|
|
272
|
+
3. **100% traffic**: Full cutover
|
|
273
|
+
4. **Cleanup**: Remove old column
|
|
274
|
+
|
|
275
|
+
## Common Mistakes
|
|
276
|
+
|
|
277
|
+
| Mistake | Consequence | Prevention |
|
|
278
|
+
|---------|-------------|------------|
|
|
279
|
+
| Skip dual-write | Data inconsistency | Always write to both during migration |
|
|
280
|
+
| Contract too early | Application errors | Verify all reads switched before contract |
|
|
281
|
+
| No feature flag | Risky rollback | Use flags to control migration phases |
|
|
282
|
+
| Ignore failed index | Wasted space, slow queries | Check index validity after CONCURRENTLY |
|
|
283
|
+
| Rush timeline | Incomplete migration | Plan realistic durations per phase |
|
|
284
|
+
|
|
285
|
+
## Verification Queries
|
|
286
|
+
|
|
287
|
+
### Check Migration Progress
|
|
288
|
+
```sql
|
|
289
|
+
-- Percentage complete
|
|
290
|
+
SELECT
|
|
291
|
+
COUNT(*) FILTER (WHERE new_column IS NOT NULL) as migrated,
|
|
292
|
+
COUNT(*) as total,
|
|
293
|
+
ROUND(100.0 * COUNT(*) FILTER (WHERE new_column IS NOT NULL) / COUNT(*), 2) as percent
|
|
294
|
+
FROM target_table;
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Verify Data Consistency
|
|
298
|
+
```sql
|
|
299
|
+
-- Compare old vs new
|
|
300
|
+
SELECT COUNT(*)
|
|
301
|
+
FROM target_table
|
|
302
|
+
WHERE old_column != new_column
|
|
303
|
+
AND old_column IS NOT NULL
|
|
304
|
+
AND new_column IS NOT NULL;
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
### Check for Orphaned References
|
|
308
|
+
```sql
|
|
309
|
+
-- After FK migration
|
|
310
|
+
SELECT COUNT(*)
|
|
311
|
+
FROM child_table c
|
|
312
|
+
LEFT JOIN parent_table p ON c.new_parent_id = p.id
|
|
313
|
+
WHERE c.new_parent_id IS NOT NULL AND p.id IS NULL;
|
|
314
|
+
```
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
# Large-Scale Migration Guide
|
|
2
|
+
|
|
3
|
+
Strategies for migrating tables with millions or billions of rows.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Large tables require special handling to avoid:
|
|
8
|
+
- Lock contention causing application timeouts
|
|
9
|
+
- Replication lag affecting read replicas
|
|
10
|
+
- Memory exhaustion from large transactions
|
|
11
|
+
- Blocking other database operations
|
|
12
|
+
|
|
13
|
+
## Batch Size Guidelines
|
|
14
|
+
|
|
15
|
+
| Row Count | Recommended Batch | Pause Between | Est. Duration |
|
|
16
|
+
|-----------|-------------------|---------------|---------------|
|
|
17
|
+
| 1M-10M | 10,000 | 100ms | 10-60 min |
|
|
18
|
+
| 10M-100M | 5,000 | 200ms | 1-8 hours |
|
|
19
|
+
| 100M-1B | 1,000 | 500ms | 8-48 hours |
|
|
20
|
+
| >1B | 500 | 1s | Days |
|
|
21
|
+
|
|
22
|
+
**Factors affecting batch size:**
|
|
23
|
+
- Row width (bytes per row)
|
|
24
|
+
- Index count (more indexes = slower writes)
|
|
25
|
+
- Available memory
|
|
26
|
+
- Replication topology
|
|
27
|
+
- Peak vs off-peak traffic
|
|
28
|
+
|
|
29
|
+
## Lock Minimization Strategies
|
|
30
|
+
|
|
31
|
+
### Strategy 1: Primary Key Chunking
|
|
32
|
+
|
|
33
|
+
```sql
|
|
34
|
+
-- Find chunk boundaries
|
|
35
|
+
SELECT id FROM target_table
|
|
36
|
+
ORDER BY id
|
|
37
|
+
OFFSET 10000 ROWS FETCH NEXT 1 ROWS ONLY;
|
|
38
|
+
|
|
39
|
+
-- Process chunk
|
|
40
|
+
UPDATE target_table
|
|
41
|
+
SET column = transform(column)
|
|
42
|
+
WHERE id >= :chunk_start AND id < :chunk_end;
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Benefits:**
|
|
46
|
+
- Predictable chunk sizes
|
|
47
|
+
- Uses primary key index efficiently
|
|
48
|
+
- Easy progress tracking
|
|
49
|
+
|
|
50
|
+
### Strategy 2: Modulo Partitioning
|
|
51
|
+
|
|
52
|
+
```sql
|
|
53
|
+
-- Process rows where id % 100 = 0, then 1, then 2, etc.
|
|
54
|
+
UPDATE target_table
|
|
55
|
+
SET column = transform(column)
|
|
56
|
+
WHERE id % 100 = :partition
|
|
57
|
+
AND column IS NULL;
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Benefits:**
|
|
61
|
+
- Spreads load across table
|
|
62
|
+
- Reduces hot spots
|
|
63
|
+
- Good for random access patterns
|
|
64
|
+
|
|
65
|
+
### Strategy 3: Range-Based Processing
|
|
66
|
+
|
|
67
|
+
```sql
|
|
68
|
+
-- For timestamp-based tables
|
|
69
|
+
UPDATE target_table
|
|
70
|
+
SET column = transform(column)
|
|
71
|
+
WHERE created_at >= :range_start
|
|
72
|
+
AND created_at < :range_end;
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**Benefits:**
|
|
76
|
+
- Aligns with data partitioning
|
|
77
|
+
- Good for time-series data
|
|
78
|
+
- Predictable lock scope
|
|
79
|
+
|
|
80
|
+
## Progress Monitoring
|
|
81
|
+
|
|
82
|
+
### Progress Table Pattern
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
CREATE TABLE migration_progress (
|
|
86
|
+
migration_name VARCHAR(100) PRIMARY KEY,
|
|
87
|
+
last_processed_id BIGINT,
|
|
88
|
+
total_processed BIGINT DEFAULT 0,
|
|
89
|
+
started_at TIMESTAMP DEFAULT NOW(),
|
|
90
|
+
last_updated_at TIMESTAMP DEFAULT NOW(),
|
|
91
|
+
estimated_total BIGINT,
|
|
92
|
+
status VARCHAR(20) DEFAULT 'running'
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
-- Update progress after each batch
|
|
96
|
+
UPDATE migration_progress
|
|
97
|
+
SET last_processed_id = :current_id,
|
|
98
|
+
total_processed = total_processed + :batch_count,
|
|
99
|
+
last_updated_at = NOW()
|
|
100
|
+
WHERE migration_name = :name;
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### ETA Calculation
|
|
104
|
+
|
|
105
|
+
```sql
|
|
106
|
+
SELECT
|
|
107
|
+
migration_name,
|
|
108
|
+
total_processed,
|
|
109
|
+
estimated_total,
|
|
110
|
+
ROUND(100.0 * total_processed / estimated_total, 2) as percent_complete,
|
|
111
|
+
last_updated_at - started_at as elapsed,
|
|
112
|
+
(last_updated_at - started_at) * (estimated_total - total_processed) / NULLIF(total_processed, 0) as eta_remaining
|
|
113
|
+
FROM migration_progress
|
|
114
|
+
WHERE migration_name = :name;
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Real-Time Monitoring Query
|
|
118
|
+
|
|
119
|
+
```sql
|
|
120
|
+
-- PostgreSQL: Monitor active migration
|
|
121
|
+
SELECT
|
|
122
|
+
pid,
|
|
123
|
+
now() - query_start as duration,
|
|
124
|
+
state,
|
|
125
|
+
wait_event_type,
|
|
126
|
+
wait_event,
|
|
127
|
+
LEFT(query, 100) as query_preview
|
|
128
|
+
FROM pg_stat_activity
|
|
129
|
+
WHERE query LIKE '%migration%'
|
|
130
|
+
AND state != 'idle';
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Peak Traffic Avoidance
|
|
134
|
+
|
|
135
|
+
### Traffic Pattern Analysis
|
|
136
|
+
|
|
137
|
+
```sql
|
|
138
|
+
-- Find low-traffic windows (PostgreSQL)
|
|
139
|
+
SELECT
|
|
140
|
+
date_trunc('hour', created_at) as hour,
|
|
141
|
+
COUNT(*) as transactions
|
|
142
|
+
FROM audit_log
|
|
143
|
+
WHERE created_at > NOW() - INTERVAL '7 days'
|
|
144
|
+
GROUP BY 1
|
|
145
|
+
ORDER BY 2 ASC
|
|
146
|
+
LIMIT 10;
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Adaptive Throttling
|
|
150
|
+
|
|
151
|
+
```sql
|
|
152
|
+
-- Pseudocode for adaptive batch processing
|
|
153
|
+
DO $$
|
|
154
|
+
DECLARE
|
|
155
|
+
batch_size INT := 10000;
|
|
156
|
+
min_batch INT := 1000;
|
|
157
|
+
max_batch INT := 50000;
|
|
158
|
+
target_lag_ms INT := 5000;
|
|
159
|
+
current_lag_ms INT;
|
|
160
|
+
BEGIN
|
|
161
|
+
LOOP
|
|
162
|
+
-- Process batch
|
|
163
|
+
PERFORM process_batch(batch_size);
|
|
164
|
+
|
|
165
|
+
-- Check replication lag
|
|
166
|
+
SELECT EXTRACT(MILLISECONDS FROM replay_lag) INTO current_lag_ms
|
|
167
|
+
FROM pg_stat_replication;
|
|
168
|
+
|
|
169
|
+
-- Adjust batch size based on lag
|
|
170
|
+
IF current_lag_ms > target_lag_ms THEN
|
|
171
|
+
batch_size := GREATEST(min_batch, batch_size * 0.8);
|
|
172
|
+
PERFORM pg_sleep(1); -- Extra pause
|
|
173
|
+
ELSIF current_lag_ms < target_lag_ms * 0.5 THEN
|
|
174
|
+
batch_size := LEAST(max_batch, batch_size * 1.2);
|
|
175
|
+
END IF;
|
|
176
|
+
|
|
177
|
+
EXIT WHEN migration_complete();
|
|
178
|
+
END LOOP;
|
|
179
|
+
END $$;
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Scheduled Execution
|
|
183
|
+
|
|
184
|
+
```yaml
|
|
185
|
+
# Example: Kubernetes CronJob for off-peak migration
|
|
186
|
+
apiVersion: batch/v1
|
|
187
|
+
kind: CronJob
|
|
188
|
+
metadata:
|
|
189
|
+
name: database-migration
|
|
190
|
+
spec:
|
|
191
|
+
schedule: "0 2 * * *" # 2 AM daily
|
|
192
|
+
jobTemplate:
|
|
193
|
+
spec:
|
|
194
|
+
template:
|
|
195
|
+
spec:
|
|
196
|
+
containers:
|
|
197
|
+
- name: migration
|
|
198
|
+
image: migration-runner
|
|
199
|
+
env:
|
|
200
|
+
- name: MAX_DURATION_HOURS
|
|
201
|
+
value: "4"
|
|
202
|
+
- name: STOP_BEFORE_PEAK
|
|
203
|
+
value: "06:00"
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## Memory Management
|
|
207
|
+
|
|
208
|
+
### Cursor-Based Processing
|
|
209
|
+
|
|
210
|
+
```sql
|
|
211
|
+
-- PostgreSQL: Server-side cursor
|
|
212
|
+
DECLARE migration_cursor CURSOR FOR
|
|
213
|
+
SELECT id, column FROM target_table
|
|
214
|
+
WHERE needs_migration = true
|
|
215
|
+
ORDER BY id;
|
|
216
|
+
|
|
217
|
+
-- Fetch in batches
|
|
218
|
+
FETCH 1000 FROM migration_cursor;
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Streaming Updates (Application)
|
|
222
|
+
|
|
223
|
+
```typescript
|
|
224
|
+
// Node.js example with cursor streaming
|
|
225
|
+
async function* streamRows(db: Database, query: string) {
|
|
226
|
+
const cursor = db.query(query).cursor();
|
|
227
|
+
for await (const row of cursor) {
|
|
228
|
+
yield row;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
async function migrateBatched(db: Database) {
|
|
233
|
+
let batch: Row[] = [];
|
|
234
|
+
const BATCH_SIZE = 1000;
|
|
235
|
+
|
|
236
|
+
for await (const row of streamRows(db, 'SELECT * FROM table')) {
|
|
237
|
+
batch.push(transformRow(row));
|
|
238
|
+
|
|
239
|
+
if (batch.length >= BATCH_SIZE) {
|
|
240
|
+
await db.batchUpdate(batch);
|
|
241
|
+
batch = [];
|
|
242
|
+
await sleep(100); // Pause between batches
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (batch.length > 0) {
|
|
247
|
+
await db.batchUpdate(batch);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## Replication Lag Management
|
|
253
|
+
|
|
254
|
+
### Monitor Lag
|
|
255
|
+
|
|
256
|
+
```sql
|
|
257
|
+
-- PostgreSQL
|
|
258
|
+
SELECT client_addr, state, sent_lsn, replay_lsn,
|
|
259
|
+
(sent_lsn - replay_lsn) as lag_bytes,
|
|
260
|
+
replay_lag
|
|
261
|
+
FROM pg_stat_replication;
|
|
262
|
+
|
|
263
|
+
-- MySQL
|
|
264
|
+
SHOW SLAVE STATUS\G
|
|
265
|
+
-- Look for: Seconds_Behind_Master
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### Lag-Aware Processing
|
|
269
|
+
|
|
270
|
+
```python
|
|
271
|
+
# Python example
|
|
272
|
+
def migrate_with_lag_check(db, max_lag_seconds=10):
|
|
273
|
+
while True:
|
|
274
|
+
lag = db.get_replication_lag()
|
|
275
|
+
|
|
276
|
+
if lag > max_lag_seconds:
|
|
277
|
+
print(f"Lag {lag}s > threshold, pausing...")
|
|
278
|
+
time.sleep(lag) # Wait for lag to catch up
|
|
279
|
+
continue
|
|
280
|
+
|
|
281
|
+
rows_updated = db.process_batch(1000)
|
|
282
|
+
|
|
283
|
+
if rows_updated == 0:
|
|
284
|
+
break
|
|
285
|
+
|
|
286
|
+
time.sleep(0.1) # Brief pause
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
## Abort and Resume
|
|
290
|
+
|
|
291
|
+
### Checkpoint Pattern
|
|
292
|
+
|
|
293
|
+
```sql
|
|
294
|
+
-- Create checkpoint table
|
|
295
|
+
CREATE TABLE migration_checkpoints (
|
|
296
|
+
checkpoint_id SERIAL PRIMARY KEY,
|
|
297
|
+
migration_name VARCHAR(100),
|
|
298
|
+
last_id BIGINT,
|
|
299
|
+
batch_count INT,
|
|
300
|
+
created_at TIMESTAMP DEFAULT NOW()
|
|
301
|
+
);
|
|
302
|
+
|
|
303
|
+
-- Save checkpoint after each batch
|
|
304
|
+
INSERT INTO migration_checkpoints (migration_name, last_id, batch_count)
|
|
305
|
+
VALUES (:name, :last_id, :count);
|
|
306
|
+
|
|
307
|
+
-- Resume from checkpoint
|
|
308
|
+
SELECT last_id FROM migration_checkpoints
|
|
309
|
+
WHERE migration_name = :name
|
|
310
|
+
ORDER BY checkpoint_id DESC
|
|
311
|
+
LIMIT 1;
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### Abort Triggers
|
|
315
|
+
|
|
316
|
+
| Condition | Threshold | Action |
|
|
317
|
+
|-----------|-----------|--------|
|
|
318
|
+
| Replication lag | >30s | Pause |
|
|
319
|
+
| Replication lag | >60s | Abort |
|
|
320
|
+
| Lock wait | >30s | Reduce batch |
|
|
321
|
+
| Lock wait | >60s | Abort |
|
|
322
|
+
| Error rate | >1% | Pause |
|
|
323
|
+
| Error rate | >5% | Abort |
|
|
324
|
+
| Memory usage | >80% | Reduce batch |
|
|
325
|
+
| Memory usage | >95% | Abort |
|
|
326
|
+
|
|
327
|
+
### Graceful Shutdown
|
|
328
|
+
|
|
329
|
+
```python
|
|
330
|
+
import signal
|
|
331
|
+
import sys
|
|
332
|
+
|
|
333
|
+
running = True
|
|
334
|
+
|
|
335
|
+
def handle_signal(signum, frame):
|
|
336
|
+
global running
|
|
337
|
+
print("Received shutdown signal, finishing current batch...")
|
|
338
|
+
running = False
|
|
339
|
+
|
|
340
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
|
341
|
+
signal.signal(signal.SIGINT, handle_signal)
|
|
342
|
+
|
|
343
|
+
while running:
|
|
344
|
+
process_batch()
|
|
345
|
+
save_checkpoint()
|
|
346
|
+
|
|
347
|
+
print("Migration paused, can resume from checkpoint")
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
## Parallel Processing
|
|
351
|
+
|
|
352
|
+
### Multi-Worker Pattern
|
|
353
|
+
|
|
354
|
+
```
|
|
355
|
+
Worker 1: id % 4 = 0
|
|
356
|
+
Worker 2: id % 4 = 1
|
|
357
|
+
Worker 3: id % 4 = 2
|
|
358
|
+
Worker 4: id % 4 = 3
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
### Coordination Table
|
|
362
|
+
|
|
363
|
+
```sql
|
|
364
|
+
CREATE TABLE migration_workers (
|
|
365
|
+
worker_id INT PRIMARY KEY,
|
|
366
|
+
range_start BIGINT,
|
|
367
|
+
range_end BIGINT,
|
|
368
|
+
status VARCHAR(20),
|
|
369
|
+
last_processed BIGINT,
|
|
370
|
+
started_at TIMESTAMP,
|
|
371
|
+
completed_at TIMESTAMP
|
|
372
|
+
);
|
|
373
|
+
|
|
374
|
+
-- Claim work range
|
|
375
|
+
UPDATE migration_workers
|
|
376
|
+
SET status = 'running', started_at = NOW()
|
|
377
|
+
WHERE worker_id = :id AND status = 'pending'
|
|
378
|
+
RETURNING range_start, range_end;
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
## Validation During Migration
|
|
382
|
+
|
|
383
|
+
### Continuous Validation
|
|
384
|
+
|
|
385
|
+
```sql
|
|
386
|
+
-- Run periodically during migration
|
|
387
|
+
SELECT
|
|
388
|
+
(SELECT COUNT(*) FROM source_table) as source_count,
|
|
389
|
+
(SELECT COUNT(*) FROM target_table) as target_count,
|
|
390
|
+
(SELECT COUNT(*) FROM source_table WHERE migrated_at IS NOT NULL) as migrated_count;
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
### Sampling Validation
|
|
394
|
+
|
|
395
|
+
```sql
|
|
396
|
+
-- Verify random sample
|
|
397
|
+
SELECT s.id, s.value, t.transformed_value,
|
|
398
|
+
expected_transform(s.value) as expected
|
|
399
|
+
FROM source_table s
|
|
400
|
+
JOIN target_table t ON s.id = t.id
|
|
401
|
+
WHERE random() < 0.001 -- 0.1% sample
|
|
402
|
+
AND t.transformed_value != expected_transform(s.value);
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
## Common Pitfalls
|
|
406
|
+
|
|
407
|
+
| Pitfall | Impact | Prevention |
|
|
408
|
+
|---------|--------|------------|
|
|
409
|
+
| No checkpoints | Lost progress on failure | Save checkpoint every batch |
|
|
410
|
+
| Fixed batch size | Inefficient or overwhelming | Adaptive sizing based on lag |
|
|
411
|
+
| Process all rows | Memory exhaustion | Use streaming/cursors |
|
|
412
|
+
| Single transaction | Lock entire table | Commit per batch |
|
|
413
|
+
| No pause between batches | Replication lag | Add configurable sleep |
|
|
414
|
+
| Ignore errors | Silent data loss | Log and retry or abort |
|