payment-kit 1.25.8 → 1.25.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,729 @@
1
+ /* eslint-disable no-continue */
2
+ /* eslint-disable no-await-in-loop */
3
+ import crypto from 'crypto';
4
+ import fs from 'fs';
5
+
6
+ import { nanoid } from 'nanoid';
7
+ import { Op, Sequelize, ModelStatic, Model } from 'sequelize';
8
+
9
+ import dayjs from '../dayjs';
10
+ import logger from '../logger';
11
+ import {
12
+ ArchiveMetadata,
13
+ CreditGrant,
14
+ CreditTransaction,
15
+ Customer,
16
+ Discount,
17
+ Event,
18
+ Invoice,
19
+ InvoiceItem,
20
+ Job,
21
+ MeterEvent,
22
+ PaymentIntent,
23
+ Payout,
24
+ Price,
25
+ PriceQuote,
26
+ Product,
27
+ Refund,
28
+ SetupIntent,
29
+ Subscription,
30
+ SubscriptionItem,
31
+ SubscriptionSchedule,
32
+ CheckoutSession,
33
+ UsageRecord,
34
+ WebhookAttempt,
35
+ Coupon,
36
+ } from '../../store/models';
37
+ import { sequelize as mainSequelize } from '../../store/sequelize';
38
+ import { getRetentionConfig, TableRetentionPolicy } from './config';
39
+ import { acquireArchiveLock, releaseArchiveLock } from './lock';
40
+ import { buildArchiveQueryPlan } from './policy';
41
+ import { createRevenueSnapshotsForArchive } from './snapshot';
42
+ import {
43
+ cleanupOldArchiveFiles,
44
+ ensureArchiveTable,
45
+ getArchiveDir,
46
+ getArchiveFilePath,
47
+ getArchiveFilePathForYear,
48
+ getFileSize,
49
+ getRecordYear,
50
+ openArchiveSequelize,
51
+ } from './store';
52
+
53
+ type TableMigrationResult = {
54
+ /**
55
+ * Number of records that completed the full archive+delete cycle.
56
+ * NOT the number of rows inserted into archive DB (which may differ on retry due to INSERT OR IGNORE).
57
+ * Uses toArchive.length (batch size from main DB query), accumulated only after destroy succeeds.
58
+ * This design supports idempotent retry: on re-run, already-archived rows are ignored but still
59
+ * counted here because they will be deleted from main DB, keeping archived_count === deleted_count.
60
+ */
61
+ archived_count: number;
62
+ deleted_count: number;
63
+ failed_count: number;
64
+ failed_ids: string[];
65
+ oldest_record?: number;
66
+ newest_record?: number;
67
+ };
68
+
69
+ type ArchiveJobOptions = {
70
+ tables?: string[];
71
+ dryRun?: boolean;
72
+ triggeredBy: 'cron' | 'manual';
73
+ triggeredByUserId?: string;
74
+ };
75
+
76
+ const ACTIVE_SUBSCRIPTION_STATUSES = ['active', 'past_due', 'trialing', 'paused', 'incomplete'];
77
+
78
+ const TABLE_MODELS: Record<string, ModelStatic<Model>> = {
79
+ meter_events: MeterEvent,
80
+ credit_transactions: CreditTransaction,
81
+ events: Event,
82
+ webhook_attempts: WebhookAttempt,
83
+ jobs: Job,
84
+ payment_intents: PaymentIntent,
85
+ invoices: Invoice,
86
+ invoice_items: InvoiceItem,
87
+ refunds: Refund,
88
+ payouts: Payout,
89
+ subscriptions: Subscription,
90
+ subscription_items: SubscriptionItem,
91
+ subscription_schedules: SubscriptionSchedule,
92
+ checkout_sessions: CheckoutSession,
93
+ credit_grants: CreditGrant,
94
+ products: Product,
95
+ prices: Price,
96
+ coupons: Coupon,
97
+ customers: Customer,
98
+ discounts: Discount,
99
+ usage_records: UsageRecord,
100
+ price_quotes: PriceQuote,
101
+ setup_intents: SetupIntent,
102
+ };
103
+
104
+ function getRecordField(record: any, field: string) {
105
+ return record?.[field];
106
+ }
107
+
108
+ function extractSubscriptionId(tableName: string, record: any): string | undefined {
109
+ if (tableName === 'meter_events') {
110
+ return record?.payload?.subscription_id;
111
+ }
112
+ return record?.subscription_id;
113
+ }
114
+
115
+ function extractCustomerId(tableName: string, record: any): string | undefined {
116
+ if (tableName === 'meter_events') {
117
+ return record?.payload?.customer_id;
118
+ }
119
+ return record?.customer_id;
120
+ }
121
+
122
+ type ActiveSubscriptionSets = {
123
+ subscriptionIds: Set<string>;
124
+ customerIds: Set<string>;
125
+ };
126
+
127
+ async function loadActiveSubscriptionSets(): Promise<ActiveSubscriptionSets> {
128
+ const activeSubscriptions = await Subscription.findAll({
129
+ where: { status: { [Op.in]: ACTIVE_SUBSCRIPTION_STATUSES } },
130
+ attributes: ['id', 'customer_id'],
131
+ raw: true,
132
+ });
133
+
134
+ const subscriptionIds = new Set<string>();
135
+ const customerIds = new Set<string>();
136
+ for (const sub of activeSubscriptions) {
137
+ subscriptionIds.add((sub as any).id);
138
+ if ((sub as any).customer_id) {
139
+ customerIds.add((sub as any).customer_id);
140
+ }
141
+ }
142
+ return { subscriptionIds, customerIds };
143
+ }
144
+
145
+ function hasActiveLinkedSubscription(tableName: string, record: any, activeSubs: ActiveSubscriptionSets): boolean {
146
+ const subscriptionId = extractSubscriptionId(tableName, record);
147
+ if (subscriptionId && activeSubs.subscriptionIds.has(subscriptionId)) {
148
+ return true;
149
+ }
150
+
151
+ const customerId = extractCustomerId(tableName, record);
152
+ if (customerId && activeSubs.customerIds.has(customerId)) {
153
+ return true;
154
+ }
155
+
156
+ return false;
157
+ }
158
+
159
+ function getDateTimestamp(value?: Date | string | number): number | undefined {
160
+ if (!value) return undefined;
161
+ if (value instanceof Date) return value.getTime();
162
+ const date = new Date(value);
163
+ return Number.isNaN(date.getTime()) ? undefined : date.getTime();
164
+ }
165
+
166
+ function updateMinMax(result: TableMigrationResult, timestamp?: number) {
167
+ if (!timestamp) return;
168
+ if (result.oldest_record === undefined || timestamp < result.oldest_record) {
169
+ result.oldest_record = timestamp;
170
+ }
171
+ if (result.newest_record === undefined || timestamp > result.newest_record) {
172
+ result.newest_record = timestamp;
173
+ }
174
+ }
175
+
176
+ /**
177
+ * Serialize JSON fields for bulkInsert.
178
+ * QueryInterface.bulkInsert doesn't auto-serialize JSON fields like Model operations do.
179
+ * We need to manually stringify objects/arrays (except Date) before insertion.
180
+ */
181
+ function serializeJsonFields(record: Record<string, any>): Record<string, any> {
182
+ const result: Record<string, any> = {};
183
+ for (const [key, value] of Object.entries(record)) {
184
+ if (value === null || value === undefined) {
185
+ result[key] = value;
186
+ } else if (value instanceof Date) {
187
+ result[key] = value;
188
+ } else if (typeof value === 'object') {
189
+ // Arrays and plain objects need to be stringified
190
+ result[key] = JSON.stringify(value);
191
+ } else {
192
+ result[key] = value;
193
+ }
194
+ }
195
+ return result;
196
+ }
197
+
198
+ function checkDiskSpace(minFreeDiskMB: number) {
199
+ if (typeof fs.statfsSync !== 'function') {
200
+ return;
201
+ }
202
+ // Check disk space on the partition where archive files are stored
203
+ const archiveDir = getArchiveDir();
204
+ const stats = fs.statfsSync(archiveDir);
205
+ const freeBytes = stats.bavail * stats.bsize;
206
+ const freeMB = Math.floor(freeBytes / (1024 * 1024));
207
+ if (freeMB < minFreeDiskMB) {
208
+ throw new Error(`Not enough free disk space: ${freeMB}MB < ${minFreeDiskMB}MB`);
209
+ }
210
+ }
211
+
212
+ type CascadeArchiveContext = {
213
+ archiveConnections: Map<number, Sequelize>;
214
+ mainSequelize: Sequelize;
215
+ results: Record<string, TableMigrationResult>;
216
+ dryRun: boolean;
217
+ ensuredTables: Set<string>; // Track which table+year combinations have been ensured
218
+ };
219
+
220
+ /**
221
+ /**
222
+ * Get or create an archive Sequelize connection for a specific year.
223
+ */
224
+ function getOrCreateArchiveConnection(year: number, connections: Map<number, Sequelize>): Sequelize {
225
+ let conn = connections.get(year);
226
+ if (!conn) {
227
+ const filePath = getArchiveFilePathForYear(year);
228
+ conn = openArchiveSequelize(filePath);
229
+ connections.set(year, conn);
230
+ }
231
+ return conn;
232
+ }
233
+
234
+ /**
235
+ * Group records by their created_at year.
236
+ */
237
+ function groupRecordsByYear(records: any[]): Map<number, any[]> {
238
+ const groups = new Map<number, any[]>();
239
+ for (const record of records) {
240
+ const year = getRecordYear(record);
241
+ const group = groups.get(year);
242
+ if (group) {
243
+ group.push(record);
244
+ } else {
245
+ groups.set(year, [record]);
246
+ }
247
+ }
248
+ return groups;
249
+ }
250
+
251
+ async function archiveCascadeRelations(
252
+ policy: TableRetentionPolicy,
253
+ parentIds: string[],
254
+ ctx: CascadeArchiveContext
255
+ ): Promise<boolean> {
256
+ if (!policy.cascadeRelations || parentIds.length === 0) {
257
+ return true;
258
+ }
259
+
260
+ for (const [childTable, foreignKey] of Object.entries(policy.cascadeRelations)) {
261
+ const childModel = TABLE_MODELS[childTable];
262
+ if (!childModel) {
263
+ logger.warn('cascade child model not found', { childTable });
264
+ continue;
265
+ }
266
+
267
+ const childRecords = await childModel.findAll({
268
+ where: { [foreignKey]: { [Op.in]: parentIds } },
269
+ });
270
+
271
+ if (childRecords.length === 0) {
272
+ continue;
273
+ }
274
+
275
+ if (!ctx.results[childTable]) {
276
+ ctx.results[childTable] = {
277
+ archived_count: 0,
278
+ deleted_count: 0,
279
+ failed_count: 0,
280
+ failed_ids: [],
281
+ };
282
+ }
283
+ const childResult = ctx.results[childTable]!;
284
+
285
+ // Group child records by year (based on their own created_at)
286
+ const childDataList = childRecords.map((r) => r.toJSON());
287
+ const groupedByYear = groupRecordsByYear(childDataList);
288
+
289
+ if (ctx.dryRun) {
290
+ childResult.archived_count += childDataList.length;
291
+ continue;
292
+ }
293
+
294
+ // Process each year group separately
295
+ for (const [year, yearRecords] of groupedByYear) {
296
+ const archiveConn = await getOrCreateArchiveConnection(year, ctx.archiveConnections);
297
+ const tableYearKey = `${childTable}_${year}`;
298
+
299
+ // Ensure table exists in this year's archive db
300
+ if (!ctx.ensuredTables.has(tableYearKey)) {
301
+ await ensureArchiveTable(childTable, ctx.mainSequelize, archiveConn);
302
+ ctx.ensuredTables.add(tableYearKey);
303
+ }
304
+
305
+ const serializedRecords = yearRecords.map((r) => serializeJsonFields({ ...r, archived_at: new Date() }));
306
+
307
+ try {
308
+ await archiveConn
309
+ .getQueryInterface()
310
+ .bulkInsert(childTable, serializedRecords, { ignoreDuplicates: true } as any);
311
+ } catch (insertError: any) {
312
+ const failedIds = serializedRecords.map((d) => d.id).filter(Boolean);
313
+ childResult.failed_count += failedIds.length;
314
+ childResult.failed_ids.push(...failedIds);
315
+ logger.error('cascade archive insert failed', { childTable, year, error: insertError });
316
+ return false;
317
+ }
318
+ }
319
+
320
+ // Delete all child records from main DB after successful insert to all year archives
321
+ const childIds = childDataList.map((d) => d.id).filter(Boolean);
322
+ try {
323
+ const deletedCount = await childModel.destroy({ where: { id: { [Op.in]: childIds } } });
324
+ // IMPORTANT: Count after both insert+delete succeed. Uses childDataList.length (not INSERT affected rows)
325
+ // because INSERT OR IGNORE skips duplicates but we still need to count them for consistency.
326
+ // This is intentional — see design note on idempotent retry.
327
+ childResult.archived_count += childDataList.length;
328
+ childResult.deleted_count += deletedCount;
329
+ } catch (deleteError: any) {
330
+ logger.error('cascade archive delete failed', { childTable, error: deleteError });
331
+ return false;
332
+ }
333
+ }
334
+
335
+ return true;
336
+ }
337
+
338
+ async function getTablePreview(tableName: string, policyRetentionDays: number, policy: any) {
339
+ const model = TABLE_MODELS[tableName];
340
+ if (!model) return null;
341
+ const cutoffDate = dayjs().subtract(policyRetentionDays, 'day').toDate();
342
+ const { where, dateField } = buildArchiveQueryPlan(model, policy, cutoffDate);
343
+ const [count, oldest, newest] = await Promise.all([
344
+ model.count({ where }),
345
+ model.min(dateField, { where }),
346
+ model.max(dateField, { where }),
347
+ ]);
348
+ return {
349
+ count,
350
+ oldestRecord: oldest ? new Date(oldest as any).toISOString() : undefined,
351
+ newestRecord: newest ? new Date(newest as any).toISOString() : undefined,
352
+ };
353
+ }
354
+
355
+ export async function recoverFromCrash(): Promise<void> {
356
+ try {
357
+ const staleJobs = await ArchiveMetadata.findAll({ where: { status: 'in_progress' } });
358
+ if (staleJobs.length === 0) {
359
+ return;
360
+ }
361
+
362
+ for (const job of staleJobs) {
363
+ logger.warn('recovering stale archive job', { id: job.id, archive_file: job.archive_file });
364
+ await job.update({
365
+ status: 'failed',
366
+ error: 'Process crashed or restarted during archive job',
367
+ duration_ms: Date.now() - new Date(job.created_at).getTime(),
368
+ });
369
+ }
370
+
371
+ // Clear any stale locks
372
+ const { ArchiveLock } = await import('../../store/models/archive-lock');
373
+ await ArchiveLock.update({ locked_by: null, locked_at: null, expires_at: null }, { where: { id: 'archive_job' } });
374
+
375
+ logger.info('archive crash recovery completed', { recoveredJobs: staleJobs.length });
376
+ } catch (error) {
377
+ logger.error('archive crash recovery failed', { error });
378
+ }
379
+ }
380
+
381
+ export async function previewArchive(options: { tables?: string[] }) {
382
+ const config = getRetentionConfig();
383
+ const tables = options.tables || Object.keys(config.tables);
384
+ const preview: Record<string, any> = {};
385
+ for (const tableName of tables) {
386
+ const policy = config.tables[tableName];
387
+ if (!policy?.enabled || policy.retentionDays <= 0) {
388
+ continue;
389
+ }
390
+ const data = await getTablePreview(tableName, policy.retentionDays, policy);
391
+ if (data) {
392
+ preview[tableName] = data;
393
+ }
394
+ }
395
+ return preview;
396
+ }
397
+
398
+ export async function runArchiveJob(options: ArchiveJobOptions) {
399
+ const config = getRetentionConfig();
400
+ if (!config.enabled) {
401
+ return { status: 'disabled' };
402
+ }
403
+
404
+ const instanceId = nanoid();
405
+ const lockAcquired = await acquireArchiveLock(instanceId);
406
+ if (!lockAcquired) {
407
+ throw new Error('Archive job already running');
408
+ }
409
+
410
+ const startAt = Date.now();
411
+ // Use Map to manage archive connections by year (e.g., 2024 -> archive-2024.db)
412
+ const archiveConnections = new Map<number, Sequelize>();
413
+ const ensuredTables = new Set<string>(); // Track table+year combinations
414
+ let metadata: ArchiveMetadata | null = null;
415
+
416
+ try {
417
+ checkDiskSpace(config.storage.minFreeDiskMB);
418
+
419
+ // Archive files are now organized by data year, not execution time
420
+ // metadata.archive_file will store comma-separated list of files touched
421
+ metadata = await ArchiveMetadata.create({
422
+ id: nanoid(),
423
+ archive_file: '', // Will be updated with actual files at the end
424
+ date_range_start: 0,
425
+ date_range_end: 0,
426
+ tables: {},
427
+ total_records: 0,
428
+ status: 'in_progress',
429
+ triggered_by: options.triggeredBy,
430
+ triggered_by_user_id: options.triggeredByUserId,
431
+ query_count: 0,
432
+ });
433
+
434
+ const results: Record<string, TableMigrationResult> = {};
435
+ let globalMin: number | undefined;
436
+ let globalMax: number | undefined;
437
+
438
+ // Pre-load active subscription sets once for all tables
439
+ const activeSubs = await loadActiveSubscriptionSets();
440
+
441
+ // Create revenue snapshots for months that will be archived
442
+ const minRetentionDays = Math.min(
443
+ ...Object.values(config.tables)
444
+ .filter((p) => p.enabled && p.retentionDays > 0)
445
+ .map((p) => p.retentionDays)
446
+ );
447
+ const earliestCutoff = dayjs().subtract(minRetentionDays, 'day').unix();
448
+ if (!options.dryRun) {
449
+ const snapshotCount = await createRevenueSnapshotsForArchive(earliestCutoff, metadata.id);
450
+ logger.info('Revenue snapshots created before archive', { snapshotCount });
451
+ }
452
+
453
+ const tableNames = options.tables || Object.keys(config.tables);
454
+ for (const tableName of tableNames) {
455
+ const policy = config.tables[tableName];
456
+ if (!policy?.enabled || policy.retentionDays <= 0) {
457
+ continue;
458
+ }
459
+
460
+ const model = TABLE_MODELS[tableName];
461
+ if (!model) {
462
+ logger.warn('archive table skipped (model not found)', { tableName });
463
+ continue;
464
+ }
465
+
466
+ const cutoffDate = dayjs().subtract(policy.retentionDays, 'day').toDate();
467
+ const { where, dateField } = buildArchiveQueryPlan(model, policy, cutoffDate);
468
+
469
+ const result: TableMigrationResult = {
470
+ archived_count: 0,
471
+ deleted_count: 0,
472
+ failed_count: 0,
473
+ failed_ids: [],
474
+ };
475
+
476
+ results[tableName] = result;
477
+
478
+ // Table schema will be ensured per-year when we encounter data for that year
479
+
480
+ // Cursor-based pagination: track last processed (dateField, id) to avoid offset drift
481
+ let cursorDate: any = null;
482
+ let cursorId: string | null = null;
483
+
484
+ // eslint-disable-next-line no-constant-condition
485
+ while (true) {
486
+ // Build cursor condition on top of the base where clause
487
+ const cursorConditions =
488
+ cursorId && cursorDate !== null
489
+ ? [
490
+ {
491
+ [Op.or]: [
492
+ { [dateField]: { [Op.gt]: cursorDate } },
493
+ { [Op.and]: [{ [dateField]: cursorDate }, { id: { [Op.gt]: cursorId } }] },
494
+ ],
495
+ },
496
+ ]
497
+ : [];
498
+ const existingAnd: any[] = (where as any)[Op.and] || [];
499
+ const batchWhere: any = {
500
+ ...where,
501
+ [Op.and]: [...existingAnd, ...cursorConditions],
502
+ };
503
+
504
+ const records = await model.findAll({
505
+ where: batchWhere,
506
+ order: [
507
+ [dateField, 'ASC'],
508
+ ['id', 'ASC'],
509
+ ],
510
+ limit: config.defaults.batchSize,
511
+ });
512
+
513
+ if (records.length === 0) {
514
+ break;
515
+ }
516
+
517
+ // Advance cursor to the last record in this batch
518
+ const lastRecord = records[records.length - 1]!.toJSON();
519
+ cursorDate = getRecordField(lastRecord, dateField);
520
+ cursorId = lastRecord.id;
521
+
522
+ // Separate archivable records from skipped ones
523
+ const toArchive: any[] = [];
524
+ for (const record of records) {
525
+ const data = record.toJSON();
526
+ const recordTime = getDateTimestamp(getRecordField(data, dateField));
527
+ updateMinMax(result, recordTime);
528
+ if (recordTime) {
529
+ globalMin = globalMin === undefined ? recordTime : Math.min(globalMin, recordTime);
530
+ globalMax = globalMax === undefined ? recordTime : Math.max(globalMax, recordTime);
531
+ }
532
+
533
+ if (policy.excludeConditions?.hasActiveSubscription) {
534
+ if (hasActiveLinkedSubscription(tableName, data, activeSubs)) {
535
+ continue;
536
+ }
537
+ }
538
+
539
+ toArchive.push(data);
540
+ }
541
+
542
+ if (toArchive.length === 0) {
543
+ await new Promise((resolve) => {
544
+ setTimeout(resolve, 100);
545
+ });
546
+ continue;
547
+ }
548
+
549
+ if (options.dryRun) {
550
+ result.archived_count += toArchive.length;
551
+ await archiveCascadeRelations(policy, toArchive.map((d) => d.id).filter(Boolean), {
552
+ archiveConnections,
553
+ mainSequelize,
554
+ results,
555
+ dryRun: true,
556
+ ensuredTables,
557
+ });
558
+ await new Promise((resolve) => {
559
+ setTimeout(resolve, 100);
560
+ });
561
+ continue;
562
+ }
563
+
564
+ // Batch insert into archive DB, grouped by data year
565
+ // ignoreDuplicates: true → Sequelize v6 SQLite dialect generates INSERT OR IGNORE INTO ...
566
+ // On primary key conflict (retry after crash), conflicting rows are silently skipped.
567
+ // Note: serializeJsonFields is required because bulkInsert doesn't auto-serialize JSON fields.
568
+ const groupedByYear = groupRecordsByYear(toArchive);
569
+ let insertFailed = false;
570
+
571
+ for (const [year, yearRecords] of groupedByYear) {
572
+ const archiveConn = await getOrCreateArchiveConnection(year, archiveConnections);
573
+ const tableYearKey = `${tableName}_${year}`;
574
+
575
+ // Ensure table exists in this year's archive db
576
+ if (!ensuredTables.has(tableYearKey)) {
577
+ await ensureArchiveTable(tableName, mainSequelize, archiveConn);
578
+ ensuredTables.add(tableYearKey);
579
+ }
580
+
581
+ const archiveRows = yearRecords.map((data) =>
582
+ serializeJsonFields({
583
+ ...data,
584
+ archived_at: new Date(),
585
+ })
586
+ );
587
+
588
+ try {
589
+ await archiveConn.getQueryInterface().bulkInsert(tableName, archiveRows, { ignoreDuplicates: true } as any);
590
+ } catch (insertError: any) {
591
+ // Insert failed: nothing written to archive, records safe in main DB
592
+ const failedIds = yearRecords.map((d) => d.id).filter(Boolean);
593
+ result.failed_count += failedIds.length;
594
+ result.failed_ids.push(...failedIds);
595
+ logger.error('archive batch insert failed', {
596
+ tableName,
597
+ year,
598
+ count: failedIds.length,
599
+ error: insertError,
600
+ });
601
+ insertFailed = true;
602
+ break;
603
+ }
604
+ }
605
+
606
+ if (insertFailed) {
607
+ break; // Stop this table, proceed to next
608
+ }
609
+
610
+ // Archive cascade relations (child tables) before deleting parent.
611
+ // Execution order: INSERT parent → INSERT child → DELETE child → DELETE parent.
612
+ // Crash at any point is recoverable via INSERT OR IGNORE on retry:
613
+ // - Crash before DELETE child: retry re-inserts (ignored) then deletes normally
614
+ // - Crash after DELETE child but before DELETE parent: retry finds 0 children
615
+ // in main DB (already deleted), skips cascade, then deletes parent. Archive
616
+ // already has both parent and child from the previous run.
617
+ // All intermediate crash states have been verified recoverable — no data loss path exists.
618
+ const parentIds = toArchive.map((d) => d.id).filter(Boolean);
619
+ const cascadeSuccess = await archiveCascadeRelations(policy, parentIds, {
620
+ archiveConnections,
621
+ mainSequelize,
622
+ results,
623
+ dryRun: false,
624
+ ensuredTables,
625
+ });
626
+ if (!cascadeSuccess) {
627
+ logger.error('cascade archive failed, stopping table', { tableName });
628
+ break;
629
+ }
630
+
631
+ // Batch delete from main DB (only after successful insert + cascade)
632
+ const idsToDelete = toArchive.map((data) => data.id).filter(Boolean);
633
+ try {
634
+ const deletedCount = await model.destroy({ where: { id: { [Op.in]: idsToDelete } } });
635
+ // IMPORTANT: Count after insert+cascade+delete all succeed. Uses toArchive.length (not INSERT
636
+ // affected rows) because INSERT OR IGNORE skips duplicates on retry but we still count them.
637
+ // This ensures archived_count === deleted_count for consistency check.
638
+ // DO NOT move this before destroy — cascade/delete failure must not inflate archived_count.
639
+ result.archived_count += toArchive.length;
640
+ result.deleted_count += deletedCount;
641
+ } catch (deleteError: any) {
642
+ // Data in both places — consistency check will catch this
643
+ logger.error('archive batch delete failed after successful insert', {
644
+ tableName,
645
+ count: idsToDelete.length,
646
+ error: deleteError,
647
+ });
648
+ break; // Stop this table, consistency check handles the rest
649
+ }
650
+
651
+ checkDiskSpace(config.storage.minFreeDiskMB);
652
+ await new Promise((resolve) => {
653
+ setTimeout(resolve, 100);
654
+ });
655
+ }
656
+ }
657
+
658
+ if (options.dryRun) {
659
+ await metadata.update({ status: 'completed' });
660
+ return { status: 'dry_run_complete', tables: results };
661
+ }
662
+
663
+ // Consistency gate: archived_count tracks records that completed the full "archive + delete"
664
+ // cycle, NOT the number of rows physically inserted into archive DB (which may differ on
665
+ // retry due to INSERT OR IGNORE). This is intentional for idempotent retry semantics.
666
+ for (const [tableName, result] of Object.entries(results)) {
667
+ if (result.archived_count !== result.deleted_count) {
668
+ throw new Error(
669
+ `Data consistency check failed for ${tableName}: archived=${result.archived_count}, deleted=${result.deleted_count}, failed=${result.failed_count}`
670
+ );
671
+ }
672
+ }
673
+
674
+ // Cleanup old archive files if exceeding max limit
675
+ const removedFiles = cleanupOldArchiveFiles(config.storage.maxArchiveFiles);
676
+ if (removedFiles.length > 0) {
677
+ logger.info('cleaned up old archive files', { removedFiles });
678
+ }
679
+
680
+ // Calculate checksum and file size for all touched archive files
681
+ const touchedYears = Array.from(archiveConnections.keys()).sort();
682
+ const archiveFileNames = touchedYears.map((year) => `archive-${year}.db`);
683
+ let totalFileSize = 0;
684
+ const checksums: string[] = [];
685
+
686
+ for (const fileName of archiveFileNames) {
687
+ const filePath = getArchiveFilePath(fileName);
688
+ totalFileSize += getFileSize(filePath);
689
+ try {
690
+ const fileChecksum = crypto.createHash('sha256').update(fs.readFileSync(filePath)).digest('hex');
691
+ checksums.push(`${fileName}:${fileChecksum.substring(0, 8)}`);
692
+ } catch {
693
+ checksums.push(`${fileName}:error`);
694
+ }
695
+ }
696
+
697
+ const totalRecords = Object.values(results).reduce((sum, item) => sum + item.archived_count, 0);
698
+ const duration = Date.now() - startAt;
699
+
700
+ await metadata.update({
701
+ tables: results,
702
+ total_records: totalRecords,
703
+ status: 'completed',
704
+ archive_file: archiveFileNames.join(','), // Store all touched files
705
+ checksum: checksums.join(';'),
706
+ file_size: totalFileSize,
707
+ duration_ms: duration,
708
+ date_range_start: globalMin ? Math.floor(globalMin / 1000) : 0,
709
+ date_range_end: globalMax ? Math.floor(globalMax / 1000) : 0,
710
+ });
711
+
712
+ return { status: 'completed', tables: results, total_records: totalRecords, archive_files: archiveFileNames };
713
+ } catch (error: any) {
714
+ if (metadata) {
715
+ await metadata.update({ status: 'failed', error: error?.message || String(error) });
716
+ }
717
+ throw error;
718
+ } finally {
719
+ // Close all archive connections
720
+ for (const conn of archiveConnections.values()) {
721
+ try {
722
+ await conn.close();
723
+ } catch (closeError) {
724
+ logger.warn('failed to close archive connection', { error: closeError });
725
+ }
726
+ }
727
+ await releaseArchiveLock(instanceId);
728
+ }
729
+ }