@patricio0312rev/skillset 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/LICENSE +21 -0
- package/README.md +176 -0
- package/bin/cli.js +37 -0
- package/package.json +55 -0
- package/src/commands/init.js +301 -0
- package/src/index.js +168 -0
- package/src/lib/config.js +200 -0
- package/src/lib/generator.js +166 -0
- package/src/utils/display.js +95 -0
- package/src/utils/readme.js +196 -0
- package/src/utils/tool-specific.js +233 -0
- package/templates/ai-engineering/agent-orchestration-planner/ SKILL.md +266 -0
- package/templates/ai-engineering/cost-latency-optimizer/ SKILL.md +270 -0
- package/templates/ai-engineering/doc-to-vector-dataset-generator/ SKILL.md +239 -0
- package/templates/ai-engineering/evaluation-harness/ SKILL.md +219 -0
- package/templates/ai-engineering/guardrails-safety-filter-builder/ SKILL.md +226 -0
- package/templates/ai-engineering/llm-debugger/ SKILL.md +283 -0
- package/templates/ai-engineering/prompt-regression-tester/ SKILL.md +216 -0
- package/templates/ai-engineering/prompt-template-builder/ SKILL.md +393 -0
- package/templates/ai-engineering/rag-pipeline-builder/ SKILL.md +244 -0
- package/templates/ai-engineering/tool-function-schema-designer/ SKILL.md +219 -0
- package/templates/architecture/adr-writer/ SKILL.md +250 -0
- package/templates/architecture/api-versioning-deprecation-planner/ SKILL.md +331 -0
- package/templates/architecture/domain-model-boundaries-mapper/ SKILL.md +300 -0
- package/templates/architecture/migration-planner/ SKILL.md +376 -0
- package/templates/architecture/performance-budget-setter/ SKILL.md +318 -0
- package/templates/architecture/reliability-strategy-builder/ SKILL.md +286 -0
- package/templates/architecture/rfc-generator/ SKILL.md +362 -0
- package/templates/architecture/scalability-playbook/ SKILL.md +279 -0
- package/templates/architecture/system-design-generator/ SKILL.md +339 -0
- package/templates/architecture/tech-debt-prioritizer/ SKILL.md +329 -0
- package/templates/backend/api-contract-normalizer/ SKILL.md +487 -0
- package/templates/backend/api-endpoint-generator/ SKILL.md +415 -0
- package/templates/backend/auth-module-builder/ SKILL.md +99 -0
- package/templates/backend/background-jobs-designer/ SKILL.md +166 -0
- package/templates/backend/caching-strategist/ SKILL.md +190 -0
- package/templates/backend/error-handling-standardizer/ SKILL.md +174 -0
- package/templates/backend/rate-limiting-abuse-protection/ SKILL.md +147 -0
- package/templates/backend/rbac-permissions-builder/ SKILL.md +158 -0
- package/templates/backend/service-layer-extractor/ SKILL.md +269 -0
- package/templates/backend/webhook-receiver-hardener/ SKILL.md +211 -0
- package/templates/ci-cd/artifact-sbom-publisher/ SKILL.md +236 -0
- package/templates/ci-cd/caching-strategy-optimizer/ SKILL.md +195 -0
- package/templates/ci-cd/deployment-checklist-generator/ SKILL.md +381 -0
- package/templates/ci-cd/github-actions-pipeline-creator/ SKILL.md +348 -0
- package/templates/ci-cd/monorepo-ci-optimizer/ SKILL.md +298 -0
- package/templates/ci-cd/preview-environments-builder/ SKILL.md +187 -0
- package/templates/ci-cd/quality-gates-enforcer/ SKILL.md +342 -0
- package/templates/ci-cd/release-automation-builder/ SKILL.md +281 -0
- package/templates/ci-cd/rollback-workflow-builder/ SKILL.md +372 -0
- package/templates/ci-cd/secrets-env-manager/ SKILL.md +242 -0
- package/templates/db-management/backup-restore-runbook-generator/ SKILL.md +505 -0
- package/templates/db-management/data-integrity-auditor/ SKILL.md +505 -0
- package/templates/db-management/data-retention-archiving-planner/ SKILL.md +430 -0
- package/templates/db-management/data-seeding-fixtures-builder/ SKILL.md +375 -0
- package/templates/db-management/db-performance-watchlist/ SKILL.md +425 -0
- package/templates/db-management/etl-sync-job-builder/ SKILL.md +457 -0
- package/templates/db-management/multi-tenant-safety-checker/ SKILL.md +398 -0
- package/templates/db-management/prisma-migration-assistant/ SKILL.md +379 -0
- package/templates/db-management/schema-consistency-checker/ SKILL.md +440 -0
- package/templates/db-management/sql-query-optimizer/ SKILL.md +324 -0
- package/templates/foundation/changelog-writer/ SKILL.md +431 -0
- package/templates/foundation/code-formatter-installer/ SKILL.md +320 -0
- package/templates/foundation/codebase-summarizer/ SKILL.md +360 -0
- package/templates/foundation/dependency-doctor/ SKILL.md +163 -0
- package/templates/foundation/dev-environment-bootstrapper/ SKILL.md +259 -0
- package/templates/foundation/dev-onboarding-builder/ SKILL.md +556 -0
- package/templates/foundation/docs-starter-kit/ SKILL.md +574 -0
- package/templates/foundation/explaining-code/SKILL.md +13 -0
- package/templates/foundation/git-hygiene-enforcer/ SKILL.md +455 -0
- package/templates/foundation/project-scaffolder/ SKILL.md +65 -0
- package/templates/foundation/project-scaffolder/references/templates.md +126 -0
- package/templates/foundation/repo-structure-linter/ SKILL.md +0 -0
- package/templates/foundation/repo-structure-linter/references/conventions.md +98 -0
- package/templates/frontend/animation-micro-interaction-pack/ SKILL.md +41 -0
- package/templates/frontend/component-scaffold-generator/ SKILL.md +562 -0
- package/templates/frontend/design-to-component-translator/ SKILL.md +547 -0
- package/templates/frontend/form-wizard-builder/ SKILL.md +553 -0
- package/templates/frontend/frontend-refactor-planner/ SKILL.md +37 -0
- package/templates/frontend/i18n-frontend-implementer/ SKILL.md +44 -0
- package/templates/frontend/modal-drawer-system/ SKILL.md +377 -0
- package/templates/frontend/page-layout-builder/ SKILL.md +630 -0
- package/templates/frontend/state-ux-flow-builder/ SKILL.md +23 -0
- package/templates/frontend/table-builder/ SKILL.md +350 -0
- package/templates/performance/alerting-dashboard-builder/ SKILL.md +162 -0
- package/templates/performance/backend-latency-profiler-helper/ SKILL.md +108 -0
- package/templates/performance/caching-cdn-strategy-planner/ SKILL.md +150 -0
- package/templates/performance/capacity-planning-helper/ SKILL.md +242 -0
- package/templates/performance/core-web-vitals-tuner/ SKILL.md +126 -0
- package/templates/performance/incident-runbook-generator/ SKILL.md +162 -0
- package/templates/performance/load-test-scenario-builder/ SKILL.md +256 -0
- package/templates/performance/observability-setup/ SKILL.md +232 -0
- package/templates/performance/postmortem-writer/ SKILL.md +203 -0
- package/templates/performance/structured-logging-standardizer/ SKILL.md +122 -0
- package/templates/security/auth-security-reviewer/ SKILL.md +428 -0
- package/templates/security/dependency-vulnerability-triage/ SKILL.md +495 -0
- package/templates/security/input-validation-sanitization-auditor/ SKILL.md +76 -0
- package/templates/security/pii-redaction-logging-policy-builder/ SKILL.md +65 -0
- package/templates/security/rbac-policy-tester/ SKILL.md +80 -0
- package/templates/security/secrets-scanner/ SKILL.md +462 -0
- package/templates/security/secure-headers-csp-builder/ SKILL.md +404 -0
- package/templates/security/security-incident-playbook-generator/ SKILL.md +76 -0
- package/templates/security/security-pr-checklist-skill/ SKILL.md +62 -0
- package/templates/security/threat-model-generator/ SKILL.md +394 -0
- package/templates/testing/contract-testing-builder/ SKILL.md +492 -0
- package/templates/testing/coverage-strategist/ SKILL.md +436 -0
- package/templates/testing/e2e-test-builder/ SKILL.md +382 -0
- package/templates/testing/flaky-test-detective/ SKILL.md +416 -0
- package/templates/testing/integration-test-builder/ SKILL.md +525 -0
- package/templates/testing/mocking-assistant/ SKILL.md +383 -0
- package/templates/testing/snapshot-test-refactorer/ SKILL.md +375 -0
- package/templates/testing/test-data-factory-builder/ SKILL.md +449 -0
- package/templates/testing/test-reporting-triage-skill/ SKILL.md +469 -0
- package/templates/testing/unit-test-generator/ SKILL.md +548 -0
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: etl-sync-job-builder
|
|
3
|
+
description: Designs reliable ETL and data synchronization jobs with incremental updates, idempotency guarantees, watermark tracking, error handling, and retry logic. Use for "ETL jobs", "data sync", "incremental sync", or "data pipeline".
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# ETL/Sync Job Builder
|
|
7
|
+
|
|
8
|
+
Build reliable, incremental data synchronization pipelines.
|
|
9
|
+
|
|
10
|
+
## ETL Job Pattern
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
// jobs/sync-users.ts
|
|
14
|
+
interface SyncJob {
|
|
15
|
+
name: string;
|
|
16
|
+
source: "database" | "api" | "file";
|
|
17
|
+
destination: "database" | "warehouse" | "s3";
|
|
18
|
+
schedule: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class ETLJob {
|
|
22
|
+
constructor(private name: string, private watermarkKey: string) {}
|
|
23
|
+
|
|
24
|
+
async run() {
|
|
25
|
+
console.log(`🔄 Starting ${this.name}...`);
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
// 1. Get last watermark
|
|
29
|
+
const lastSync = await this.getWatermark();
|
|
30
|
+
console.log(` Last sync: ${lastSync}`);
|
|
31
|
+
|
|
32
|
+
// 2. Extract data
|
|
33
|
+
const data = await this.extract(lastSync);
|
|
34
|
+
console.log(` Extracted ${data.length} records`);
|
|
35
|
+
|
|
36
|
+
// 3. Transform data
|
|
37
|
+
const transformed = await this.transform(data);
|
|
38
|
+
|
|
39
|
+
// 4. Load data
|
|
40
|
+
await this.load(transformed);
|
|
41
|
+
|
|
42
|
+
// 5. Update watermark
|
|
43
|
+
await this.updateWatermark(new Date());
|
|
44
|
+
|
|
45
|
+
console.log(`✅ ${this.name} complete`);
|
|
46
|
+
} catch (error) {
|
|
47
|
+
console.error(`❌ ${this.name} failed:`, error);
|
|
48
|
+
throw error;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
private async extract(since: Date) {
|
|
53
|
+
// Extract logic
|
|
54
|
+
return [];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
private async transform(data: any[]) {
|
|
58
|
+
// Transform logic
|
|
59
|
+
return data;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
private async load(data: any[]) {
|
|
63
|
+
// Load logic
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
private async getWatermark(): Promise<Date> {
|
|
67
|
+
const watermark = await prisma.syncWatermark.findUnique({
|
|
68
|
+
where: { key: this.watermarkKey },
|
|
69
|
+
});
|
|
70
|
+
return watermark?.lastSync || new Date(0);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
private async updateWatermark(timestamp: Date) {
|
|
74
|
+
await prisma.syncWatermark.upsert({
|
|
75
|
+
where: { key: this.watermarkKey },
|
|
76
|
+
create: { key: this.watermarkKey, lastSync: timestamp },
|
|
77
|
+
update: { lastSync: timestamp },
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Watermark Strategy
|
|
84
|
+
|
|
85
|
+
```prisma
|
|
86
|
+
// Track sync progress
|
|
87
|
+
model SyncWatermark {
|
|
88
|
+
key String @id
|
|
89
|
+
lastSync DateTime
|
|
90
|
+
metadata Json?
|
|
91
|
+
|
|
92
|
+
@@index([lastSync])
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
// Incremental sync using watermark
|
|
98
|
+
async function syncOrdersIncremental() {
|
|
99
|
+
// Get last sync time
|
|
100
|
+
const watermark = await prisma.syncWatermark.findUnique({
|
|
101
|
+
where: { key: "orders_sync" },
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
const lastSync = watermark?.lastSync || new Date(0);
|
|
105
|
+
|
|
106
|
+
// Fetch only new/updated records
|
|
107
|
+
const newOrders = await sourceDb.order.findMany({
|
|
108
|
+
where: {
|
|
109
|
+
updated_at: { gt: lastSync },
|
|
110
|
+
},
|
|
111
|
+
orderBy: { updated_at: "asc" },
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
console.log(`📦 Syncing ${newOrders.length} orders...`);
|
|
115
|
+
|
|
116
|
+
// Process in batches
|
|
117
|
+
for (let i = 0; i < newOrders.length; i += 100) {
|
|
118
|
+
const batch = newOrders.slice(i, i + 100);
|
|
119
|
+
|
|
120
|
+
await destinationDb.order.createMany({
|
|
121
|
+
data: batch,
|
|
122
|
+
skipDuplicates: true, // Idempotency
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Update watermark to latest record
|
|
127
|
+
if (newOrders.length > 0) {
|
|
128
|
+
const latestTimestamp = newOrders[newOrders.length - 1].updated_at;
|
|
129
|
+
|
|
130
|
+
await prisma.syncWatermark.upsert({
|
|
131
|
+
where: { key: "orders_sync" },
|
|
132
|
+
create: { key: "orders_sync", lastSync: latestTimestamp },
|
|
133
|
+
update: { lastSync: latestTimestamp },
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
console.log(`✅ Sync complete`);
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Idempotent Upsert Pattern
|
|
142
|
+
|
|
143
|
+
```typescript
|
|
144
|
+
// Idempotent sync - safe to run multiple times
|
|
145
|
+
async function syncUsersIdempotent(users: User[]) {
|
|
146
|
+
for (const user of users) {
|
|
147
|
+
await prisma.user.upsert({
|
|
148
|
+
where: { id: user.id },
|
|
149
|
+
create: user,
|
|
150
|
+
update: {
|
|
151
|
+
email: user.email,
|
|
152
|
+
name: user.name,
|
|
153
|
+
updated_at: user.updated_at,
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Batch upsert for better performance
|
|
160
|
+
async function syncUsersBatch(users: User[]) {
|
|
161
|
+
// PostgreSQL: Use ON CONFLICT
|
|
162
|
+
await prisma.$executeRaw`
|
|
163
|
+
INSERT INTO users (id, email, name, updated_at)
|
|
164
|
+
SELECT * FROM UNNEST(
|
|
165
|
+
${users.map((u) => u.id)}::bigint[],
|
|
166
|
+
${users.map((u) => u.email)}::text[],
|
|
167
|
+
${users.map((u) => u.name)}::text[],
|
|
168
|
+
${users.map((u) => u.updated_at)}::timestamp[]
|
|
169
|
+
)
|
|
170
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
171
|
+
email = EXCLUDED.email,
|
|
172
|
+
name = EXCLUDED.name,
|
|
173
|
+
updated_at = EXCLUDED.updated_at
|
|
174
|
+
WHERE users.updated_at < EXCLUDED.updated_at
|
|
175
|
+
`;
|
|
176
|
+
}
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## Retry Logic with Exponential Backoff
|
|
180
|
+
|
|
181
|
+
```typescript
|
|
182
|
+
async function syncWithRetry<T>(
|
|
183
|
+
operation: () => Promise<T>,
|
|
184
|
+
maxRetries: number = 3,
|
|
185
|
+
baseDelay: number = 1000
|
|
186
|
+
): Promise<T> {
|
|
187
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
188
|
+
try {
|
|
189
|
+
return await operation();
|
|
190
|
+
} catch (error) {
|
|
191
|
+
if (attempt === maxRetries) throw error;
|
|
192
|
+
|
|
193
|
+
const delay = baseDelay * Math.pow(2, attempt);
|
|
194
|
+
console.log(` Retry ${attempt + 1}/${maxRetries} after ${delay}ms`);
|
|
195
|
+
await sleep(delay);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
throw new Error("Max retries exceeded");
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Usage
|
|
203
|
+
await syncWithRetry(
|
|
204
|
+
async () => {
|
|
205
|
+
return await syncOrders();
|
|
206
|
+
},
|
|
207
|
+
3,
|
|
208
|
+
1000
|
|
209
|
+
);
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Change Data Capture (CDC)
|
|
213
|
+
|
|
214
|
+
```typescript
|
|
215
|
+
// Listen to database changes
|
|
216
|
+
import { PrismaClient } from "@prisma/client";
|
|
217
|
+
|
|
218
|
+
const prisma = new PrismaClient();
|
|
219
|
+
|
|
220
|
+
// PostgreSQL: Listen to logical replication
|
|
221
|
+
async function setupCDC() {
|
|
222
|
+
await prisma.$executeRaw`
|
|
223
|
+
CREATE PUBLICATION orders_publication FOR TABLE orders;
|
|
224
|
+
`;
|
|
225
|
+
|
|
226
|
+
// Subscribe to changes (using pg library)
|
|
227
|
+
const client = await pg.connect();
|
|
228
|
+
|
|
229
|
+
client.query("LISTEN orders_changed;");
|
|
230
|
+
|
|
231
|
+
client.on("notification", async (msg) => {
|
|
232
|
+
const change = JSON.parse(msg.payload);
|
|
233
|
+
|
|
234
|
+
if (change.operation === "INSERT" || change.operation === "UPDATE") {
|
|
235
|
+
await syncOrder(change.data);
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## Conflict Resolution
|
|
242
|
+
|
|
243
|
+
```typescript
|
|
244
|
+
interface ConflictResolution {
|
|
245
|
+
strategy: "source-wins" | "dest-wins" | "latest-wins" | "merge";
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
async function syncWithConflictResolution(
|
|
249
|
+
sourceRecord: any,
|
|
250
|
+
destRecord: any,
|
|
251
|
+
strategy: ConflictResolution["strategy"]
|
|
252
|
+
) {
|
|
253
|
+
if (strategy === "source-wins") {
|
|
254
|
+
return sourceRecord;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if (strategy === "dest-wins") {
|
|
258
|
+
return destRecord;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
if (strategy === "latest-wins") {
|
|
262
|
+
return sourceRecord.updated_at > destRecord.updated_at
|
|
263
|
+
? sourceRecord
|
|
264
|
+
: destRecord;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
if (strategy === "merge") {
|
|
268
|
+
// Merge non-null fields
|
|
269
|
+
return {
|
|
270
|
+
...destRecord,
|
|
271
|
+
...Object.fromEntries(
|
|
272
|
+
Object.entries(sourceRecord).filter(([_, v]) => v != null)
|
|
273
|
+
),
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
## Monitoring & Observability
|
|
280
|
+
|
|
281
|
+
```typescript
|
|
282
|
+
// Track sync job metrics
|
|
283
|
+
interface SyncMetrics {
|
|
284
|
+
jobName: string;
|
|
285
|
+
startTime: Date;
|
|
286
|
+
endTime: Date;
|
|
287
|
+
recordsProcessed: number;
|
|
288
|
+
recordsInserted: number;
|
|
289
|
+
recordsUpdated: number;
|
|
290
|
+
recordsSkipped: number;
|
|
291
|
+
errors: number;
|
|
292
|
+
durationMs: number;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
async function logSyncMetrics(metrics: SyncMetrics) {
|
|
296
|
+
await prisma.syncMetric.create({
|
|
297
|
+
data: metrics,
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
console.log(`
|
|
301
|
+
📊 Sync Metrics
|
|
302
|
+
Job: ${metrics.jobName}
|
|
303
|
+
Records: ${metrics.recordsProcessed}
|
|
304
|
+
Inserted: ${metrics.recordsInserted}
|
|
305
|
+
Updated: ${metrics.recordsUpdated}
|
|
306
|
+
Errors: ${metrics.errors}
|
|
307
|
+
Duration: ${metrics.durationMs}ms
|
|
308
|
+
`);
|
|
309
|
+
}
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## Full ETL Job Example
|
|
313
|
+
|
|
314
|
+
```typescript
|
|
315
|
+
// jobs/sync-orders-to-warehouse.ts
|
|
316
|
+
export class OrdersETLJob extends ETLJob {
|
|
317
|
+
constructor() {
|
|
318
|
+
super("orders-etl", "orders_warehouse_sync");
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
async extract(since: Date): Promise<Order[]> {
|
|
322
|
+
return prisma.order.findMany({
|
|
323
|
+
where: {
|
|
324
|
+
updated_at: { gt: since },
|
|
325
|
+
},
|
|
326
|
+
include: {
|
|
327
|
+
items: true,
|
|
328
|
+
user: true,
|
|
329
|
+
},
|
|
330
|
+
orderBy: { updated_at: "asc" },
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
async transform(orders: Order[]): Promise<WarehouseOrder[]> {
|
|
335
|
+
return orders.map((order) => ({
|
|
336
|
+
order_id: order.id,
|
|
337
|
+
user_email: order.user.email,
|
|
338
|
+
total_amount: order.total,
|
|
339
|
+
item_count: order.items.length,
|
|
340
|
+
status: order.status,
|
|
341
|
+
order_date: order.created_at,
|
|
342
|
+
synced_at: new Date(),
|
|
343
|
+
}));
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
async load(data: WarehouseOrder[]): Promise<void> {
|
|
347
|
+
const batchSize = 100;
|
|
348
|
+
|
|
349
|
+
for (let i = 0; i < data.length; i += batchSize) {
|
|
350
|
+
const batch = data.slice(i, i + batchSize);
|
|
351
|
+
|
|
352
|
+
await warehouseDb.$executeRaw`
|
|
353
|
+
INSERT INTO orders_fact (
|
|
354
|
+
order_id, user_email, total_amount, item_count,
|
|
355
|
+
status, order_date, synced_at
|
|
356
|
+
)
|
|
357
|
+
VALUES ${batch
|
|
358
|
+
.map(
|
|
359
|
+
(o) => `(
|
|
360
|
+
${o.order_id}, '${o.user_email}', ${o.total_amount},
|
|
361
|
+
${o.item_count}, '${o.status}', '${o.order_date}',
|
|
362
|
+
'${o.synced_at}'
|
|
363
|
+
)`
|
|
364
|
+
)
|
|
365
|
+
.join(",")}
|
|
366
|
+
ON CONFLICT (order_id) DO UPDATE SET
|
|
367
|
+
total_amount = EXCLUDED.total_amount,
|
|
368
|
+
status = EXCLUDED.status,
|
|
369
|
+
synced_at = EXCLUDED.synced_at
|
|
370
|
+
`;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Run job
|
|
376
|
+
new OrdersETLJob().run();
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
## Scheduling
|
|
380
|
+
|
|
381
|
+
```typescript
|
|
382
|
+
// Schedule ETL jobs
|
|
383
|
+
import cron from "node-cron";
|
|
384
|
+
|
|
385
|
+
// Run every hour
|
|
386
|
+
cron.schedule("0 * * * *", async () => {
|
|
387
|
+
await new OrdersETLJob().run();
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
// Run every 15 minutes
|
|
391
|
+
cron.schedule("*/15 * * * *", async () => {
|
|
392
|
+
await syncUsersIncremental();
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
// Run nightly at 2 AM
|
|
396
|
+
cron.schedule("0 2 * * *", async () => {
|
|
397
|
+
await fullDataSync();
|
|
398
|
+
});
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
## Error Handling & Recovery
|
|
402
|
+
|
|
403
|
+
```typescript
|
|
404
|
+
async function syncWithErrorHandling() {
|
|
405
|
+
const checkpoint = await getCheckpoint();
|
|
406
|
+
let processedRecords = 0;
|
|
407
|
+
|
|
408
|
+
try {
|
|
409
|
+
const records = await fetchRecords(checkpoint);
|
|
410
|
+
|
|
411
|
+
for (const record of records) {
|
|
412
|
+
try {
|
|
413
|
+
await processRecord(record);
|
|
414
|
+
processedRecords++;
|
|
415
|
+
|
|
416
|
+
// Save checkpoint every 100 records
|
|
417
|
+
if (processedRecords % 100 === 0) {
|
|
418
|
+
await saveCheckpoint(record.id);
|
|
419
|
+
}
|
|
420
|
+
} catch (error) {
|
|
421
|
+
// Log error but continue
|
|
422
|
+
console.error(`Failed to process record ${record.id}:`, error);
|
|
423
|
+
await logFailedRecord(record.id, error);
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
await saveCheckpoint("completed");
|
|
428
|
+
} catch (error) {
|
|
429
|
+
// Critical failure - job will retry from checkpoint
|
|
430
|
+
console.error("Job failed:", error);
|
|
431
|
+
throw error;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
## Best Practices
|
|
437
|
+
|
|
438
|
+
1. **Incremental sync**: Use watermarks, don't full-scan
|
|
439
|
+
2. **Idempotent operations**: Safe to retry
|
|
440
|
+
3. **Batch processing**: Process 100-1000 records at a time
|
|
441
|
+
4. **Checkpointing**: Resume from failure point
|
|
442
|
+
5. **Retry with backoff**: Handle transient failures
|
|
443
|
+
6. **Monitor metrics**: Track job health
|
|
444
|
+
7. **Test thoroughly**: Including failure scenarios
|
|
445
|
+
|
|
446
|
+
## Output Checklist
|
|
447
|
+
|
|
448
|
+
- [ ] ETL job class created
|
|
449
|
+
- [ ] Watermark tracking implemented
|
|
450
|
+
- [ ] Incremental sync logic
|
|
451
|
+
- [ ] Idempotent upsert operations
|
|
452
|
+
- [ ] Retry logic with backoff
|
|
453
|
+
- [ ] Conflict resolution strategy
|
|
454
|
+
- [ ] Monitoring and metrics
|
|
455
|
+
- [ ] Error handling and recovery
|
|
456
|
+
- [ ] Job scheduling configured
|
|
457
|
+
- [ ] Testing including failure cases
|