npm - payment-kit - Versions diffs - 1.25.8 → 1.25.10 - Mend

payment-kit 1.25.8 → 1.25.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/api/src/crons/index.ts +24 -0
package/api/src/libs/archive/config.ts +254 -0
package/api/src/libs/archive/executor.ts +729 -0
package/api/src/libs/archive/index.ts +7 -0
package/api/src/libs/archive/lock.ts +50 -0
package/api/src/libs/archive/policy.ts +55 -0
package/api/src/libs/archive/query.ts +136 -0
package/api/src/libs/archive/snapshot.ts +291 -0
package/api/src/libs/archive/store.ts +200 -0
package/api/src/queues/archive.ts +32 -0
package/api/src/routes/archive.ts +176 -0
package/api/src/routes/index.ts +2 -0
package/api/src/routes/payment-stats.ts +167 -20
package/api/src/store/migrations/20260203-archive.ts +12 -0
package/api/src/store/migrations/20260204-revenue-snapshot.ts +19 -0
package/api/src/store/models/archive-lock.ts +55 -0
package/api/src/store/models/archive-metadata.ts +132 -0
package/api/src/store/models/index.ts +9 -0
package/api/src/store/models/revenue-snapshot.ts +110 -0
package/api/tests/libs/archive-config.spec.ts +185 -0
package/api/tests/libs/archive-executor.spec.ts +678 -0
package/api/tests/libs/archive-lock.spec.ts +130 -0
package/api/tests/libs/archive-policy.spec.ts +255 -0
package/api/tests/libs/archive-query.spec.ts +267 -0
package/api/tests/libs/archive-store.spec.ts +159 -0
package/blocklet.prefs.json +187 -0
package/blocklet.yml +1 -1
package/package.json +10 -10
package/src/locales/en.tsx +4 -0
package/src/locales/zh.tsx +4 -0
package/src/pages/admin/overview.tsx +2 -0
package/vite.config.ts +1 -0

package/api/src/libs/archive/executor.ts ADDED Viewed

@@ -0,0 +1,729 @@
+/* eslint-disable no-continue */
+/* eslint-disable no-await-in-loop */
+import crypto from 'crypto';
+import fs from 'fs';
+import { nanoid } from 'nanoid';
+import { Op, Sequelize, ModelStatic, Model } from 'sequelize';
+import dayjs from '../dayjs';
+import logger from '../logger';
+import {
+  ArchiveMetadata,
+  CreditGrant,
+  CreditTransaction,
+  Customer,
+  Discount,
+  Event,
+  Invoice,
+  InvoiceItem,
+  Job,
+  MeterEvent,
+  PaymentIntent,
+  Payout,
+  Price,
+  PriceQuote,
+  Product,
+  Refund,
+  SetupIntent,
+  Subscription,
+  SubscriptionItem,
+  SubscriptionSchedule,
+  CheckoutSession,
+  UsageRecord,
+  WebhookAttempt,
+  Coupon,
+} from '../../store/models';
+import { sequelize as mainSequelize } from '../../store/sequelize';
+import { getRetentionConfig, TableRetentionPolicy } from './config';
+import { acquireArchiveLock, releaseArchiveLock } from './lock';
+import { buildArchiveQueryPlan } from './policy';
+import { createRevenueSnapshotsForArchive } from './snapshot';
+import {
+  cleanupOldArchiveFiles,
+  ensureArchiveTable,
+  getArchiveDir,
+  getArchiveFilePath,
+  getArchiveFilePathForYear,
+  getFileSize,
+  getRecordYear,
+  openArchiveSequelize,
+} from './store';
+type TableMigrationResult = {
+  /**
+   * Number of records that completed the full archive+delete cycle.
+   * NOT the number of rows inserted into archive DB (which may differ on retry due to INSERT OR IGNORE).
+   * Uses toArchive.length (batch size from main DB query), accumulated only after destroy succeeds.
+   * This design supports idempotent retry: on re-run, already-archived rows are ignored but still
+   * counted here because they will be deleted from main DB, keeping archived_count === deleted_count.
+   */
+  archived_count: number;
+  deleted_count: number;
+  failed_count: number;
+  failed_ids: string[];
+  oldest_record?: number;
+  newest_record?: number;
+};
+type ArchiveJobOptions = {
+  tables?: string[];
+  dryRun?: boolean;
+  triggeredBy: 'cron' | 'manual';
+  triggeredByUserId?: string;
+};
+const ACTIVE_SUBSCRIPTION_STATUSES = ['active', 'past_due', 'trialing', 'paused', 'incomplete'];
+const TABLE_MODELS: Record<string, ModelStatic<Model>> = {
+  meter_events: MeterEvent,
+  credit_transactions: CreditTransaction,
+  events: Event,
+  webhook_attempts: WebhookAttempt,
+  jobs: Job,
+  payment_intents: PaymentIntent,
+  invoices: Invoice,
+  invoice_items: InvoiceItem,
+  refunds: Refund,
+  payouts: Payout,
+  subscriptions: Subscription,
+  subscription_items: SubscriptionItem,
+  subscription_schedules: SubscriptionSchedule,
+  checkout_sessions: CheckoutSession,
+  credit_grants: CreditGrant,
+  products: Product,
+  prices: Price,
+  coupons: Coupon,
+  customers: Customer,
+  discounts: Discount,
+  usage_records: UsageRecord,
+  price_quotes: PriceQuote,
+  setup_intents: SetupIntent,
+};
+function getRecordField(record: any, field: string) {
+  return record?.[field];
+}
+function extractSubscriptionId(tableName: string, record: any): string | undefined {
+  if (tableName === 'meter_events') {
+    return record?.payload?.subscription_id;
+  }
+  return record?.subscription_id;
+}
+function extractCustomerId(tableName: string, record: any): string | undefined {
+  if (tableName === 'meter_events') {
+    return record?.payload?.customer_id;
+  }
+  return record?.customer_id;
+}
+type ActiveSubscriptionSets = {
+  subscriptionIds: Set<string>;
+  customerIds: Set<string>;
+};
+async function loadActiveSubscriptionSets(): Promise<ActiveSubscriptionSets> {
+  const activeSubscriptions = await Subscription.findAll({
+    where: { status: { [Op.in]: ACTIVE_SUBSCRIPTION_STATUSES } },
+    attributes: ['id', 'customer_id'],
+    raw: true,
+  });
+  const subscriptionIds = new Set<string>();
+  const customerIds = new Set<string>();
+  for (const sub of activeSubscriptions) {
+    subscriptionIds.add((sub as any).id);
+    if ((sub as any).customer_id) {
+      customerIds.add((sub as any).customer_id);
+    }
+  }
+  return { subscriptionIds, customerIds };
+}
+function hasActiveLinkedSubscription(tableName: string, record: any, activeSubs: ActiveSubscriptionSets): boolean {
+  const subscriptionId = extractSubscriptionId(tableName, record);
+  if (subscriptionId && activeSubs.subscriptionIds.has(subscriptionId)) {
+    return true;
+  }
+  const customerId = extractCustomerId(tableName, record);
+  if (customerId && activeSubs.customerIds.has(customerId)) {
+    return true;
+  }
+  return false;
+}
+function getDateTimestamp(value?: Date | string | number): number | undefined {
+  if (!value) return undefined;
+  if (value instanceof Date) return value.getTime();
+  const date = new Date(value);
+  return Number.isNaN(date.getTime()) ? undefined : date.getTime();
+}
+function updateMinMax(result: TableMigrationResult, timestamp?: number) {
+  if (!timestamp) return;
+  if (result.oldest_record === undefined || timestamp < result.oldest_record) {
+    result.oldest_record = timestamp;
+  }
+  if (result.newest_record === undefined || timestamp > result.newest_record) {
+    result.newest_record = timestamp;
+  }
+}
+/**
+ * Serialize JSON fields for bulkInsert.
+ * QueryInterface.bulkInsert doesn't auto-serialize JSON fields like Model operations do.
+ * We need to manually stringify objects/arrays (except Date) before insertion.
+ */
+function serializeJsonFields(record: Record<string, any>): Record<string, any> {
+  const result: Record<string, any> = {};
+  for (const [key, value] of Object.entries(record)) {
+    if (value === null || value === undefined) {
+      result[key] = value;
+    } else if (value instanceof Date) {
+      result[key] = value;
+    } else if (typeof value === 'object') {
+      // Arrays and plain objects need to be stringified
+      result[key] = JSON.stringify(value);
+    } else {
+      result[key] = value;
+    }
+  }
+  return result;
+}
+function checkDiskSpace(minFreeDiskMB: number) {
+  if (typeof fs.statfsSync !== 'function') {
+    return;
+  }
+  // Check disk space on the partition where archive files are stored
+  const archiveDir = getArchiveDir();
+  const stats = fs.statfsSync(archiveDir);
+  const freeBytes = stats.bavail * stats.bsize;
+  const freeMB = Math.floor(freeBytes / (1024 * 1024));
+  if (freeMB < minFreeDiskMB) {
+    throw new Error(`Not enough free disk space: ${freeMB}MB < ${minFreeDiskMB}MB`);
+  }
+}
+type CascadeArchiveContext = {
+  archiveConnections: Map<number, Sequelize>;
+  mainSequelize: Sequelize;
+  results: Record<string, TableMigrationResult>;
+  dryRun: boolean;
+  ensuredTables: Set<string>; // Track which table+year combinations have been ensured
+};
+/**
+/**
+ * Get or create an archive Sequelize connection for a specific year.
+ */
+function getOrCreateArchiveConnection(year: number, connections: Map<number, Sequelize>): Sequelize {
+  let conn = connections.get(year);
+  if (!conn) {
+    const filePath = getArchiveFilePathForYear(year);
+    conn = openArchiveSequelize(filePath);
+    connections.set(year, conn);
+  }
+  return conn;
+}
+/**
+ * Group records by their created_at year.
+ */
+function groupRecordsByYear(records: any[]): Map<number, any[]> {
+  const groups = new Map<number, any[]>();
+  for (const record of records) {
+    const year = getRecordYear(record);
+    const group = groups.get(year);
+    if (group) {
+      group.push(record);
+    } else {
+      groups.set(year, [record]);
+    }
+  }
+  return groups;
+}
+async function archiveCascadeRelations(
+  policy: TableRetentionPolicy,
+  parentIds: string[],
+  ctx: CascadeArchiveContext
+): Promise<boolean> {
+  if (!policy.cascadeRelations || parentIds.length === 0) {
+    return true;
+  }
+  for (const [childTable, foreignKey] of Object.entries(policy.cascadeRelations)) {
+    const childModel = TABLE_MODELS[childTable];
+    if (!childModel) {
+      logger.warn('cascade child model not found', { childTable });
+      continue;
+    }
+    const childRecords = await childModel.findAll({
+      where: { [foreignKey]: { [Op.in]: parentIds } },
+    });
+    if (childRecords.length === 0) {
+      continue;
+    }
+    if (!ctx.results[childTable]) {
+      ctx.results[childTable] = {
+        archived_count: 0,
+        deleted_count: 0,
+        failed_count: 0,
+        failed_ids: [],
+      };
+    }
+    const childResult = ctx.results[childTable]!;
+    // Group child records by year (based on their own created_at)
+    const childDataList = childRecords.map((r) => r.toJSON());
+    const groupedByYear = groupRecordsByYear(childDataList);
+    if (ctx.dryRun) {
+      childResult.archived_count += childDataList.length;
+      continue;
+    }
+    // Process each year group separately
+    for (const [year, yearRecords] of groupedByYear) {
+      const archiveConn = await getOrCreateArchiveConnection(year, ctx.archiveConnections);
+      const tableYearKey = `${childTable}_${year}`;
+      // Ensure table exists in this year's archive db
+      if (!ctx.ensuredTables.has(tableYearKey)) {
+        await ensureArchiveTable(childTable, ctx.mainSequelize, archiveConn);
+        ctx.ensuredTables.add(tableYearKey);
+      }
+      const serializedRecords = yearRecords.map((r) => serializeJsonFields({ ...r, archived_at: new Date() }));
+      try {
+        await archiveConn
+          .getQueryInterface()
+          .bulkInsert(childTable, serializedRecords, { ignoreDuplicates: true } as any);
+      } catch (insertError: any) {
+        const failedIds = serializedRecords.map((d) => d.id).filter(Boolean);
+        childResult.failed_count += failedIds.length;
+        childResult.failed_ids.push(...failedIds);
+        logger.error('cascade archive insert failed', { childTable, year, error: insertError });
+        return false;
+      }
+    }
+    // Delete all child records from main DB after successful insert to all year archives
+    const childIds = childDataList.map((d) => d.id).filter(Boolean);
+    try {
+      const deletedCount = await childModel.destroy({ where: { id: { [Op.in]: childIds } } });
+      // IMPORTANT: Count after both insert+delete succeed. Uses childDataList.length (not INSERT affected rows)
+      // because INSERT OR IGNORE skips duplicates but we still need to count them for consistency.
+      // This is intentional — see design note on idempotent retry.
+      childResult.archived_count += childDataList.length;
+      childResult.deleted_count += deletedCount;
+    } catch (deleteError: any) {
+      logger.error('cascade archive delete failed', { childTable, error: deleteError });
+      return false;
+    }
+  }
+  return true;
+}
+async function getTablePreview(tableName: string, policyRetentionDays: number, policy: any) {
+  const model = TABLE_MODELS[tableName];
+  if (!model) return null;
+  const cutoffDate = dayjs().subtract(policyRetentionDays, 'day').toDate();
+  const { where, dateField } = buildArchiveQueryPlan(model, policy, cutoffDate);
+  const [count, oldest, newest] = await Promise.all([
+    model.count({ where }),
+    model.min(dateField, { where }),
+    model.max(dateField, { where }),
+  ]);
+  return {
+    count,
+    oldestRecord: oldest ? new Date(oldest as any).toISOString() : undefined,
+    newestRecord: newest ? new Date(newest as any).toISOString() : undefined,
+  };
+}
+export async function recoverFromCrash(): Promise<void> {
+  try {
+    const staleJobs = await ArchiveMetadata.findAll({ where: { status: 'in_progress' } });
+    if (staleJobs.length === 0) {
+      return;
+    }
+    for (const job of staleJobs) {
+      logger.warn('recovering stale archive job', { id: job.id, archive_file: job.archive_file });
+      await job.update({
+        status: 'failed',
+        error: 'Process crashed or restarted during archive job',
+        duration_ms: Date.now() - new Date(job.created_at).getTime(),
+      });
+    }
+    // Clear any stale locks
+    const { ArchiveLock } = await import('../../store/models/archive-lock');
+    await ArchiveLock.update({ locked_by: null, locked_at: null, expires_at: null }, { where: { id: 'archive_job' } });
+    logger.info('archive crash recovery completed', { recoveredJobs: staleJobs.length });
+  } catch (error) {
+    logger.error('archive crash recovery failed', { error });
+  }
+}
+export async function previewArchive(options: { tables?: string[] }) {
+  const config = getRetentionConfig();
+  const tables = options.tables || Object.keys(config.tables);
+  const preview: Record<string, any> = {};
+  for (const tableName of tables) {
+    const policy = config.tables[tableName];
+    if (!policy?.enabled || policy.retentionDays <= 0) {
+      continue;
+    }
+    const data = await getTablePreview(tableName, policy.retentionDays, policy);
+    if (data) {
+      preview[tableName] = data;
+    }
+  }
+  return preview;
+}
+export async function runArchiveJob(options: ArchiveJobOptions) {
+  const config = getRetentionConfig();
+  if (!config.enabled) {
+    return { status: 'disabled' };
+  }
+  const instanceId = nanoid();
+  const lockAcquired = await acquireArchiveLock(instanceId);
+  if (!lockAcquired) {
+    throw new Error('Archive job already running');
+  }
+  const startAt = Date.now();
+  // Use Map to manage archive connections by year (e.g., 2024 -> archive-2024.db)
+  const archiveConnections = new Map<number, Sequelize>();
+  const ensuredTables = new Set<string>(); // Track table+year combinations
+  let metadata: ArchiveMetadata | null = null;
+  try {
+    checkDiskSpace(config.storage.minFreeDiskMB);
+    // Archive files are now organized by data year, not execution time
+    // metadata.archive_file will store comma-separated list of files touched
+    metadata = await ArchiveMetadata.create({
+      id: nanoid(),
+      archive_file: '', // Will be updated with actual files at the end
+      date_range_start: 0,
+      date_range_end: 0,
+      tables: {},
+      total_records: 0,
+      status: 'in_progress',
+      triggered_by: options.triggeredBy,
+      triggered_by_user_id: options.triggeredByUserId,
+      query_count: 0,
+    });
+    const results: Record<string, TableMigrationResult> = {};
+    let globalMin: number | undefined;
+    let globalMax: number | undefined;
+    // Pre-load active subscription sets once for all tables
+    const activeSubs = await loadActiveSubscriptionSets();
+    // Create revenue snapshots for months that will be archived
+    const minRetentionDays = Math.min(
+      ...Object.values(config.tables)
+        .filter((p) => p.enabled && p.retentionDays > 0)
+        .map((p) => p.retentionDays)
+    );
+    const earliestCutoff = dayjs().subtract(minRetentionDays, 'day').unix();
+    if (!options.dryRun) {
+      const snapshotCount = await createRevenueSnapshotsForArchive(earliestCutoff, metadata.id);
+      logger.info('Revenue snapshots created before archive', { snapshotCount });
+    }
+    const tableNames = options.tables || Object.keys(config.tables);
+    for (const tableName of tableNames) {
+      const policy = config.tables[tableName];
+      if (!policy?.enabled || policy.retentionDays <= 0) {
+        continue;
+      }
+      const model = TABLE_MODELS[tableName];
+      if (!model) {
+        logger.warn('archive table skipped (model not found)', { tableName });
+        continue;
+      }
+      const cutoffDate = dayjs().subtract(policy.retentionDays, 'day').toDate();
+      const { where, dateField } = buildArchiveQueryPlan(model, policy, cutoffDate);
+      const result: TableMigrationResult = {
+        archived_count: 0,
+        deleted_count: 0,
+        failed_count: 0,
+        failed_ids: [],
+      };
+      results[tableName] = result;
+      // Table schema will be ensured per-year when we encounter data for that year
+      // Cursor-based pagination: track last processed (dateField, id) to avoid offset drift
+      let cursorDate: any = null;
+      let cursorId: string | null = null;
+      // eslint-disable-next-line no-constant-condition
+      while (true) {
+        // Build cursor condition on top of the base where clause
+        const cursorConditions =
+          cursorId && cursorDate !== null
+            ? [
+                {
+                  [Op.or]: [
+                    { [dateField]: { [Op.gt]: cursorDate } },
+                    { [Op.and]: [{ [dateField]: cursorDate }, { id: { [Op.gt]: cursorId } }] },
+                  ],
+                },
+              ]
+            : [];
+        const existingAnd: any[] = (where as any)[Op.and] || [];
+        const batchWhere: any = {
+          ...where,
+          [Op.and]: [...existingAnd, ...cursorConditions],
+        };
+        const records = await model.findAll({
+          where: batchWhere,
+          order: [
+            [dateField, 'ASC'],
+            ['id', 'ASC'],
+          ],
+          limit: config.defaults.batchSize,
+        });
+        if (records.length === 0) {
+          break;
+        }
+        // Advance cursor to the last record in this batch
+        const lastRecord = records[records.length - 1]!.toJSON();
+        cursorDate = getRecordField(lastRecord, dateField);
+        cursorId = lastRecord.id;
+        // Separate archivable records from skipped ones
+        const toArchive: any[] = [];
+        for (const record of records) {
+          const data = record.toJSON();
+          const recordTime = getDateTimestamp(getRecordField(data, dateField));
+          updateMinMax(result, recordTime);
+          if (recordTime) {
+            globalMin = globalMin === undefined ? recordTime : Math.min(globalMin, recordTime);
+            globalMax = globalMax === undefined ? recordTime : Math.max(globalMax, recordTime);
+          }
+          if (policy.excludeConditions?.hasActiveSubscription) {
+            if (hasActiveLinkedSubscription(tableName, data, activeSubs)) {
+              continue;
+            }
+          }
+          toArchive.push(data);
+        }
+        if (toArchive.length === 0) {
+          await new Promise((resolve) => {
+            setTimeout(resolve, 100);
+          });
+          continue;
+        }
+        if (options.dryRun) {
+          result.archived_count += toArchive.length;
+          await archiveCascadeRelations(policy, toArchive.map((d) => d.id).filter(Boolean), {
+            archiveConnections,
+            mainSequelize,
+            results,
+            dryRun: true,
+            ensuredTables,
+          });
+          await new Promise((resolve) => {
+            setTimeout(resolve, 100);
+          });
+          continue;
+        }
+        // Batch insert into archive DB, grouped by data year
+        // ignoreDuplicates: true → Sequelize v6 SQLite dialect generates INSERT OR IGNORE INTO ...
+        // On primary key conflict (retry after crash), conflicting rows are silently skipped.
+        // Note: serializeJsonFields is required because bulkInsert doesn't auto-serialize JSON fields.
+        const groupedByYear = groupRecordsByYear(toArchive);
+        let insertFailed = false;
+        for (const [year, yearRecords] of groupedByYear) {
+          const archiveConn = await getOrCreateArchiveConnection(year, archiveConnections);
+          const tableYearKey = `${tableName}_${year}`;
+          // Ensure table exists in this year's archive db
+          if (!ensuredTables.has(tableYearKey)) {
+            await ensureArchiveTable(tableName, mainSequelize, archiveConn);
+            ensuredTables.add(tableYearKey);
+          }
+          const archiveRows = yearRecords.map((data) =>
+            serializeJsonFields({
+              ...data,
+              archived_at: new Date(),
+            })
+          );
+          try {
+            await archiveConn.getQueryInterface().bulkInsert(tableName, archiveRows, { ignoreDuplicates: true } as any);
+          } catch (insertError: any) {
+            // Insert failed: nothing written to archive, records safe in main DB
+            const failedIds = yearRecords.map((d) => d.id).filter(Boolean);
+            result.failed_count += failedIds.length;
+            result.failed_ids.push(...failedIds);
+            logger.error('archive batch insert failed', {
+              tableName,
+              year,
+              count: failedIds.length,
+              error: insertError,
+            });
+            insertFailed = true;
+            break;
+          }
+        }
+        if (insertFailed) {
+          break; // Stop this table, proceed to next
+        }
+        // Archive cascade relations (child tables) before deleting parent.
+        // Execution order: INSERT parent → INSERT child → DELETE child → DELETE parent.
+        // Crash at any point is recoverable via INSERT OR IGNORE on retry:
+        //   - Crash before DELETE child: retry re-inserts (ignored) then deletes normally
+        //   - Crash after DELETE child but before DELETE parent: retry finds 0 children
+        //     in main DB (already deleted), skips cascade, then deletes parent. Archive
+        //     already has both parent and child from the previous run.
+        // All intermediate crash states have been verified recoverable — no data loss path exists.
+        const parentIds = toArchive.map((d) => d.id).filter(Boolean);
+        const cascadeSuccess = await archiveCascadeRelations(policy, parentIds, {
+          archiveConnections,
+          mainSequelize,
+          results,
+          dryRun: false,
+          ensuredTables,
+        });
+        if (!cascadeSuccess) {
+          logger.error('cascade archive failed, stopping table', { tableName });
+          break;
+        }
+        // Batch delete from main DB (only after successful insert + cascade)
+        const idsToDelete = toArchive.map((data) => data.id).filter(Boolean);
+        try {
+          const deletedCount = await model.destroy({ where: { id: { [Op.in]: idsToDelete } } });
+          // IMPORTANT: Count after insert+cascade+delete all succeed. Uses toArchive.length (not INSERT
+          // affected rows) because INSERT OR IGNORE skips duplicates on retry but we still count them.
+          // This ensures archived_count === deleted_count for consistency check.
+          // DO NOT move this before destroy — cascade/delete failure must not inflate archived_count.
+          result.archived_count += toArchive.length;
+          result.deleted_count += deletedCount;
+        } catch (deleteError: any) {
+          // Data in both places — consistency check will catch this
+          logger.error('archive batch delete failed after successful insert', {
+            tableName,
+            count: idsToDelete.length,
+            error: deleteError,
+          });
+          break; // Stop this table, consistency check handles the rest
+        }
+        checkDiskSpace(config.storage.minFreeDiskMB);
+        await new Promise((resolve) => {
+          setTimeout(resolve, 100);
+        });
+      }
+    }
+    if (options.dryRun) {
+      await metadata.update({ status: 'completed' });
+      return { status: 'dry_run_complete', tables: results };
+    }
+    // Consistency gate: archived_count tracks records that completed the full "archive + delete"
+    // cycle, NOT the number of rows physically inserted into archive DB (which may differ on
+    // retry due to INSERT OR IGNORE). This is intentional for idempotent retry semantics.
+    for (const [tableName, result] of Object.entries(results)) {
+      if (result.archived_count !== result.deleted_count) {
+        throw new Error(
+          `Data consistency check failed for ${tableName}: archived=${result.archived_count}, deleted=${result.deleted_count}, failed=${result.failed_count}`
+        );
+      }
+    }
+    // Cleanup old archive files if exceeding max limit
+    const removedFiles = cleanupOldArchiveFiles(config.storage.maxArchiveFiles);
+    if (removedFiles.length > 0) {
+      logger.info('cleaned up old archive files', { removedFiles });
+    }
+    // Calculate checksum and file size for all touched archive files
+    const touchedYears = Array.from(archiveConnections.keys()).sort();
+    const archiveFileNames = touchedYears.map((year) => `archive-${year}.db`);
+    let totalFileSize = 0;
+    const checksums: string[] = [];
+    for (const fileName of archiveFileNames) {
+      const filePath = getArchiveFilePath(fileName);
+      totalFileSize += getFileSize(filePath);
+      try {
+        const fileChecksum = crypto.createHash('sha256').update(fs.readFileSync(filePath)).digest('hex');
+        checksums.push(`${fileName}:${fileChecksum.substring(0, 8)}`);
+      } catch {
+        checksums.push(`${fileName}:error`);
+      }
+    }
+    const totalRecords = Object.values(results).reduce((sum, item) => sum + item.archived_count, 0);
+    const duration = Date.now() - startAt;
+    await metadata.update({
+      tables: results,
+      total_records: totalRecords,
+      status: 'completed',
+      archive_file: archiveFileNames.join(','), // Store all touched files
+      checksum: checksums.join(';'),
+      file_size: totalFileSize,
+      duration_ms: duration,
+      date_range_start: globalMin ? Math.floor(globalMin / 1000) : 0,
+      date_range_end: globalMax ? Math.floor(globalMax / 1000) : 0,
+    });
+    return { status: 'completed', tables: results, total_records: totalRecords, archive_files: archiveFileNames };
+  } catch (error: any) {
+    if (metadata) {
+      await metadata.update({ status: 'failed', error: error?.message || String(error) });
+    }
+    throw error;
+  } finally {
+    // Close all archive connections
+    for (const conn of archiveConnections.values()) {
+      try {
+        await conn.close();
+      } catch (closeError) {
+        logger.warn('failed to close archive connection', { error: closeError });
+      }
+    }
+    await releaseArchiveLock(instanceId);
+  }
+}