resplite 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,9 @@ function sleep(ms) {
37
37
 
38
38
  /**
39
39
  * Run bulk import: SCAN keys from Redis, import into RespLite DB with checkpointing.
40
+ * On SIGINT/SIGTERM, checkpoint progress, set run status to ABORTED, close DB and rethrow.
41
+ * DB is always closed in a finally block (graceful shutdown when process is interrupted).
42
+ *
40
43
  * @param {import('redis').RedisClientType} redisClient
41
44
  * @param {string} dbPath
42
45
  * @param {string} runId
@@ -65,41 +68,47 @@ export async function runBulkImport(redisClient, dbPath, runId, options = {}) {
65
68
  } = options;
66
69
 
67
70
  const db = openDb(dbPath, { pragmaTemplate });
68
- const keys = createKeysStorage(db);
69
- const strings = createStringsStorage(db, keys);
70
- const hashes = createHashesStorage(db, keys);
71
- const sets = createSetsStorage(db, keys);
72
- const lists = createListsStorage(db, keys);
73
- const zsets = createZsetsStorage(db, keys);
74
- const storages = { keys, strings, hashes, sets, lists, zsets };
75
-
76
- createRun(db, runId, sourceUri, { scan_count_hint: scan_count });
77
- let run = getRun(db, runId);
78
- if (!run) throw new Error(`Run ${runId} not found`);
79
-
80
- let cursor = resume && run.scan_cursor !== undefined ? parseInt(String(run.scan_cursor), 10) : 0;
81
- let scanned_keys = resume ? (run.scanned_keys || 0) : 0;
82
- let migrated_keys = resume ? (run.migrated_keys || 0) : 0;
83
- let skipped_keys = resume ? (run.skipped_keys || 0) : 0;
84
- let error_keys = resume ? (run.error_keys || 0) : 0;
85
- let migrated_bytes = resume ? (run.migrated_bytes || 0) : 0;
86
-
87
- if (!resume) {
88
- updateBulkProgress(db, runId, { scan_cursor: String(cursor), scanned_keys, migrated_keys, skipped_keys, error_keys, migrated_bytes });
89
- }
90
-
91
- let lastCheckpointTime = Date.now();
92
- let batchScanned = 0;
93
- let batchBytes = 0;
94
- const minIntervalMs = max_rps > 0 ? 1000 / max_rps : 0;
95
- let lastKeyTime = 0;
71
+ let abortRequested = false;
72
+ const onSignal = () => {
73
+ abortRequested = true;
74
+ };
75
+ process.on('SIGINT', onSignal);
76
+ process.on('SIGTERM', onSignal);
96
77
 
97
78
  try {
98
- do {
79
+ const keys = createKeysStorage(db);
80
+ const strings = createStringsStorage(db, keys);
81
+ const hashes = createHashesStorage(db, keys);
82
+ const sets = createSetsStorage(db, keys);
83
+ const lists = createListsStorage(db, keys);
84
+ const zsets = createZsetsStorage(db, keys);
85
+ const storages = { keys, strings, hashes, sets, lists, zsets };
86
+
87
+ createRun(db, runId, sourceUri, { scan_count_hint: scan_count });
88
+ let run = getRun(db, runId);
89
+ if (!run) throw new Error(`Run ${runId} not found`);
90
+
91
+ let cursor = resume && run.scan_cursor !== undefined ? parseInt(String(run.scan_cursor), 10) : 0;
92
+ let scanned_keys = resume ? (run.scanned_keys || 0) : 0;
93
+ let migrated_keys = resume ? (run.migrated_keys || 0) : 0;
94
+ let skipped_keys = resume ? (run.skipped_keys || 0) : 0;
95
+ let error_keys = resume ? (run.error_keys || 0) : 0;
96
+ let migrated_bytes = resume ? (run.migrated_bytes || 0) : 0;
97
+
98
+ if (!resume) {
99
+ updateBulkProgress(db, runId, { scan_cursor: String(cursor), scanned_keys, migrated_keys, skipped_keys, error_keys, migrated_bytes });
100
+ }
101
+
102
+ let lastCheckpointTime = Date.now();
103
+ let batchScanned = 0;
104
+ let batchBytes = 0;
105
+ const minIntervalMs = max_rps > 0 ? 1000 / max_rps : 0;
106
+ let lastKeyTime = 0;
107
+
108
+ outer: do {
99
109
  run = getRun(db, runId);
100
- if (run && run.status === RUN_STATUS.ABORTED) {
101
- break;
102
- }
110
+ if (run && run.status === RUN_STATUS.ABORTED) break;
111
+ if (abortRequested) break;
103
112
  while (run && run.status === RUN_STATUS.PAUSED) {
104
113
  await sleep(2000);
105
114
  run = getRun(db, runId);
@@ -111,8 +120,9 @@ export async function runBulkImport(redisClient, dbPath, runId, options = {}) {
111
120
  const keyList = parsed.keys || [];
112
121
 
113
122
  for (const keyName of keyList) {
123
+ if (abortRequested) break outer;
114
124
  run = getRun(db, runId);
115
- if (run && run.status === RUN_STATUS.ABORTED) break;
125
+ if (run && run.status === RUN_STATUS.ABORTED) break outer;
116
126
  while (run && run.status === RUN_STATUS.PAUSED) {
117
127
  await sleep(2000);
118
128
  run = getRun(db, runId);
@@ -162,6 +172,23 @@ export async function runBulkImport(redisClient, dbPath, runId, options = {}) {
162
172
  }
163
173
  } while (cursor !== 0);
164
174
 
175
+ if (abortRequested) {
176
+ updateBulkProgress(db, runId, {
177
+ scan_cursor: String(cursor),
178
+ scanned_keys,
179
+ migrated_keys,
180
+ skipped_keys,
181
+ error_keys,
182
+ migrated_bytes,
183
+ });
184
+ setRunStatus(db, runId, RUN_STATUS.ABORTED);
185
+ run = getRun(db, runId);
186
+ if (onProgress && run) onProgress(run);
187
+ const err = new Error('Bulk import interrupted by signal (SIGINT/SIGTERM)');
188
+ err.code = 'BULK_ABORTED';
189
+ throw err;
190
+ }
191
+
165
192
  updateBulkProgress(db, runId, {
166
193
  scan_cursor: '0',
167
194
  scanned_keys,
@@ -173,9 +200,15 @@ export async function runBulkImport(redisClient, dbPath, runId, options = {}) {
173
200
  setRunStatus(db, runId, RUN_STATUS.COMPLETED);
174
201
  return getRun(db, runId);
175
202
  } catch (err) {
176
- setRunStatus(db, runId, RUN_STATUS.FAILED);
177
- updateBulkProgress(db, runId, { last_error: err.message });
178
- logError(db, runId, 'bulk', err.message, null);
203
+ if (err.code !== 'BULK_ABORTED') {
204
+ setRunStatus(db, runId, RUN_STATUS.FAILED);
205
+ updateBulkProgress(db, runId, { last_error: err.message });
206
+ logError(db, runId, 'bulk', err.message, null);
207
+ }
179
208
  throw err;
209
+ } finally {
210
+ process.off('SIGINT', onSignal);
211
+ process.off('SIGTERM', onSignal);
212
+ db.close();
180
213
  }
181
214
  }
@@ -24,6 +24,7 @@ import { runPreflight, readKeyspaceEvents, setKeyspaceEvents } from './preflight
24
24
  import { runBulkImport } from './bulk.js';
25
25
  import { runApplyDirty } from './apply-dirty.js';
26
26
  import { runVerify } from './verify.js';
27
+ import { runMigrateSearch } from './migrate-search.js';
27
28
  import { getRun, getDirtyCounts } from './registry.js';
28
29
 
29
30
  /**
@@ -50,6 +51,7 @@ import { getRun, getDirtyCounts } from './registry.js';
50
51
  * status(): { run: object, dirty: object } | null,
51
52
  * applyDirty(opts?: { batchKeys?: number, maxRps?: number }): Promise<object>,
52
53
  * verify(opts?: { samplePct?: number, maxSample?: number }): Promise<object>,
54
+ * migrateSearch(opts?: { onlyIndices?: string[], scanCount?: number, maxRps?: number, batchDocs?: number, maxSuggestions?: number, skipExisting?: boolean, withSuggestions?: boolean, onProgress?: function }): Promise<object>,
53
55
  * close(): Promise<void>,
54
56
  * }}
55
57
  */
@@ -173,6 +175,35 @@ export function createMigration({
173
175
  return runVerify(client, to, { pragmaTemplate, samplePct, maxSample });
174
176
  },
175
177
 
178
+ /**
179
+ * Step 5 — Migrate search indices: copy RediSearch index schemas and documents
180
+ * into RespLite FT.* tables.
181
+ *
182
+ * Requires RediSearch (Redis Stack or redis/search module) on the source.
183
+ * Only HASH-based indices with TEXT/TAG/NUMERIC fields are supported.
184
+ * TAG and NUMERIC fields are mapped to TEXT.
185
+ *
186
+ * @param {{
187
+ * onlyIndices?: string[],
188
+ * scanCount?: number,
189
+ * maxRps?: number,
190
+ * batchDocs?: number,
191
+ * maxSuggestions?: number,
192
+ * skipExisting?: boolean,
193
+ * withSuggestions?: boolean,
194
+ * onProgress?: (result: object) => void
195
+ * }} [opts]
196
+ * @returns {Promise<{ indices: object[], aborted: boolean }>}
197
+ */
198
+ async migrateSearch(opts = {}) {
199
+ const client = await getClient();
200
+ return runMigrateSearch(client, to, {
201
+ pragmaTemplate,
202
+ maxRps,
203
+ ...opts,
204
+ });
205
+ },
206
+
176
207
  /**
177
208
  * Disconnect from Redis. Call when done with all migration operations.
178
209
  */
@@ -185,6 +216,6 @@ export function createMigration({
185
216
  };
186
217
  }
187
218
 
188
- export { runPreflight, readKeyspaceEvents, setKeyspaceEvents, runBulkImport, runApplyDirty, runVerify };
219
+ export { runPreflight, readKeyspaceEvents, setKeyspaceEvents, runBulkImport, runApplyDirty, runVerify, runMigrateSearch };
189
220
  export { startDirtyTracker } from './tracker.js';
190
221
  export { getRun, getDirtyCounts, createRun, setRunStatus, logError } from './registry.js';
@@ -0,0 +1,457 @@
1
+ /**
2
+ * Migrate RediSearch indices to RespLite FT.* search indices (SPEC_F §F.10).
3
+ *
4
+ * For each index in the source Redis:
5
+ * 1. FT._LIST → enumerate index names
6
+ * 2. FT.INFO → read schema (prefix patterns, field attributes)
7
+ * 3. Map RediSearch field types to RespLite TEXT fields
8
+ * 4. FT.CREATE in RespLite (skip if already exists and skipExisting=true)
9
+ * 5. SCAN keys by prefix → HGETALL → addDocument in SQLite batches
10
+ * 6. FT.SUGGET → import suggestions
11
+ *
12
+ * Graceful shutdown: SIGINT/SIGTERM finishes the current document, checkpoints, closes DB.
13
+ */
14
+
15
+ import { openDb } from '../storage/sqlite/db.js';
16
+ import { createIndex, addDocument, suggestionAdd } from '../storage/sqlite/search.js';
17
+
18
+ const INDEX_NAME_RE = /^[A-Za-z][A-Za-z0-9:_-]{0,63}$/;
19
+
20
+ /** RediSearch field types that can be represented as TEXT in RespLite. */
21
+ const TEXT_COMPATIBLE = new Set(['TEXT', 'TAG', 'NUMERIC']);
22
+
23
+ function sleep(ms) {
24
+ return new Promise((resolve) => setTimeout(resolve, ms));
25
+ }
26
+
27
+ /**
28
+ * List all RediSearch index names via FT._LIST.
29
+ * Returns [] if the command fails (e.g. RediSearch module not loaded).
30
+ * @param {import('redis').RedisClientType} redisClient
31
+ * @returns {Promise<string[]>}
32
+ */
33
+ async function listSearchIndices(redisClient) {
34
+ try {
35
+ const raw = await redisClient.sendCommand(['FT._LIST']);
36
+ if (!Array.isArray(raw)) return [];
37
+ return raw.map(String);
38
+ } catch (_) {
39
+ return [];
40
+ }
41
+ }
42
+
43
+ /**
44
+ * Parse a flat alternating [key, value, key, value, …] Redis response array into a plain object.
45
+ * Keys are lower-cased; nested arrays are kept as-is.
46
+ * @param {unknown} arr
47
+ * @returns {Record<string, unknown>}
48
+ */
49
+ function parseFlat(arr) {
50
+ if (!Array.isArray(arr)) return {};
51
+ const out = {};
52
+ for (let i = 0; i + 1 < arr.length; i += 2) {
53
+ out[String(arr[i]).toLowerCase()] = arr[i + 1];
54
+ }
55
+ return out;
56
+ }
57
+
58
+ /**
59
+ * Get RediSearch index info (key type, prefix patterns, field attributes) via FT.INFO.
60
+ * @param {import('redis').RedisClientType} redisClient
61
+ * @param {string} indexName
62
+ * @returns {Promise<{
63
+ * keyType: string,
64
+ * prefixes: string[],
65
+ * attributes: Array<{ identifier: string, attribute: string, type: string }>
66
+ * }>}
67
+ */
68
+ async function getSearchIndexInfo(redisClient, indexName) {
69
+ const raw = await redisClient.sendCommand(['FT.INFO', indexName]);
70
+
71
+ // node-redis v4 may return a plain object when the Search module is loaded natively,
72
+ // or a flat array from sendCommand. Handle both.
73
+ let info;
74
+ if (raw && !Array.isArray(raw) && typeof raw === 'object') {
75
+ info = {};
76
+ for (const [k, v] of Object.entries(raw)) info[k.toLowerCase()] = v;
77
+ } else {
78
+ info = parseFlat(raw);
79
+ }
80
+
81
+ // ── index_definition → key_type + prefixes ───────────────────────────
82
+ let keyType = 'HASH';
83
+ let prefixes = [''];
84
+ const rawDef = info['index_definition'] ?? info['indexdefinition'];
85
+ if (Array.isArray(rawDef)) {
86
+ const def = parseFlat(rawDef);
87
+ if (def['key_type']) keyType = String(def['key_type']).toUpperCase();
88
+ const p = def['prefixes'];
89
+ if (Array.isArray(p) && p.length > 0) prefixes = p.map(String);
90
+ else if (typeof p === 'string' && p.length > 0) prefixes = [p];
91
+ } else if (rawDef && typeof rawDef === 'object') {
92
+ if (rawDef.key_type) keyType = String(rawDef.key_type).toUpperCase();
93
+ const p = rawDef.prefixes;
94
+ if (Array.isArray(p) && p.length > 0) prefixes = p.map(String);
95
+ }
96
+
97
+ // ── attributes (newer) or fields (older RediSearch) ──────────────────
98
+ const rawAttrs = info['attributes'] ?? info['fields'] ?? [];
99
+ const attributes = [];
100
+ if (Array.isArray(rawAttrs)) {
101
+ for (const attr of rawAttrs) {
102
+ let identifier, attribute, type;
103
+ if (Array.isArray(attr)) {
104
+ const a = parseFlat(attr);
105
+ identifier = String(a['identifier'] ?? '');
106
+ attribute = String(a['attribute'] ?? a['identifier'] ?? '');
107
+ type = String(a['type'] ?? 'TEXT').toUpperCase();
108
+ } else if (attr && typeof attr === 'object') {
109
+ identifier = String(attr.identifier ?? '');
110
+ attribute = String(attr.attribute ?? attr.identifier ?? '');
111
+ type = String(attr.type ?? 'TEXT').toUpperCase();
112
+ }
113
+ if (identifier) attributes.push({ identifier, attribute, type });
114
+ }
115
+ }
116
+
117
+ return { keyType, prefixes, attributes };
118
+ }
119
+
120
+ /**
121
+ * Map RediSearch field attributes to RespLite schema fields.
122
+ *
123
+ * - TEXT → TEXT (1:1)
124
+ * - TAG, NUMERIC → TEXT (with warning; values stringified at import time)
125
+ * - GEO, VECTOR, … → skipped with a warning
126
+ * - Always guarantees a `payload` TEXT field exists (added if absent)
127
+ *
128
+ * @param {Array<{ identifier: string, attribute: string, type: string }>} attributes
129
+ * @returns {{
130
+ * fields: Array<{ name: string, type: string }>,
131
+ * fieldMap: Map<string, string>,
132
+ * warnings: string[]
133
+ * }}
134
+ */
135
+ function mapFields(attributes) {
136
+ const warnings = [];
137
+ const fields = [];
138
+ /** identifier (hash field name) → RespLite field name */
139
+ const fieldMap = new Map();
140
+ const usedNames = new Set();
141
+
142
+ for (const attr of attributes) {
143
+ if (!TEXT_COMPATIBLE.has(attr.type)) {
144
+ warnings.push(`Skipping field "${attr.attribute}" (type ${attr.type} is not supported)`);
145
+ continue;
146
+ }
147
+ if (attr.type !== 'TEXT') {
148
+ warnings.push(`Field "${attr.attribute}" mapped from ${attr.type} to TEXT`);
149
+ }
150
+
151
+ // Sanitize to a valid SQLite column / RespLite field name
152
+ let safeName = attr.attribute.replace(/[^A-Za-z0-9:_-]/g, '_');
153
+ if (/^[^A-Za-z]/.test(safeName)) safeName = 'f_' + safeName;
154
+ safeName = safeName.slice(0, 64);
155
+
156
+ if (!safeName || usedNames.has(safeName)) continue;
157
+ usedNames.add(safeName);
158
+ fields.push({ name: safeName, type: 'TEXT' });
159
+ fieldMap.set(attr.identifier, safeName);
160
+ }
161
+
162
+ // RespLite requires a `payload` TEXT field
163
+ if (!usedNames.has('payload')) {
164
+ fields.push({ name: 'payload', type: 'TEXT' });
165
+ }
166
+
167
+ return { fields, fieldMap, warnings };
168
+ }
169
+
170
+ /**
171
+ * Build the fields object for addDocument from a HGETALL result.
172
+ * Fields absent from the hash default to ''.
173
+ * If `payload` is empty, synthesise it as the concatenation of all other values.
174
+ *
175
+ * @param {Record<string, string>} hashData
176
+ * @param {Map<string, string>} fieldMap identifier → RespLite field name
177
+ * @param {Array<{ name: string }>} schemaFields
178
+ * @returns {Record<string, string>}
179
+ */
180
+ function buildDocFields(hashData, fieldMap, schemaFields) {
181
+ const docFields = {};
182
+
183
+ for (const [identifier, fieldName] of fieldMap.entries()) {
184
+ docFields[fieldName] = hashData[identifier] ?? '';
185
+ }
186
+ for (const f of schemaFields) {
187
+ if (!(f.name in docFields)) docFields[f.name] = '';
188
+ }
189
+
190
+ if (!docFields['payload']) {
191
+ docFields['payload'] = Object.entries(docFields)
192
+ .filter(([k]) => k !== 'payload')
193
+ .map(([, v]) => v)
194
+ .filter(Boolean)
195
+ .join(' ');
196
+ }
197
+
198
+ return docFields;
199
+ }
200
+
201
+ /**
202
+ * Import suggestions from a RediSearch index via FT.SUGGET "" MAX n WITHSCORES.
203
+ * RediSearch has no cursor for FT.SUGGET; maxSuggestions caps the import.
204
+ * Returns the number of suggestions imported.
205
+ *
206
+ * @param {import('redis').RedisClientType} redisClient
207
+ * @param {import('better-sqlite3').Database} db
208
+ * @param {string} indexName
209
+ * @param {number} maxSuggestions
210
+ * @returns {Promise<number>}
211
+ */
212
+ async function importSuggestions(redisClient, db, indexName, maxSuggestions) {
213
+ try {
214
+ const raw = await redisClient.sendCommand([
215
+ 'FT.SUGGET', indexName, '', 'MAX', String(maxSuggestions), 'WITHSCORES',
216
+ ]);
217
+ if (!Array.isArray(raw) || raw.length === 0) return 0;
218
+
219
+ let count = 0;
220
+ // Response alternates [term, score, term, score, …]
221
+ db.transaction(() => {
222
+ for (let i = 0; i + 1 < raw.length; i += 2) {
223
+ const term = String(raw[i]);
224
+ const score = parseFloat(String(raw[i + 1])) || 1.0;
225
+ try {
226
+ suggestionAdd(db, indexName, term, score, false, undefined);
227
+ count++;
228
+ } catch (_) {}
229
+ }
230
+ })();
231
+ return count;
232
+ } catch (_) {
233
+ return 0;
234
+ }
235
+ }
236
+
237
+ /**
238
+ * Migrate all (or selected) RediSearch indices from a Redis source into the RespLite DB.
239
+ *
240
+ * On SIGINT/SIGTERM: finish the current document, then stop gracefully.
241
+ * DB is always closed in a finally block.
242
+ *
243
+ * @param {import('redis').RedisClientType} redisClient
244
+ * @param {string} dbPath
245
+ * @param {object} [options]
246
+ * @param {string} [options.pragmaTemplate='default']
247
+ * @param {string[]} [options.onlyIndices] - Restrict to these index names.
248
+ * @param {number} [options.scanCount=500] - COUNT hint for SCAN.
249
+ * @param {number} [options.maxRps=0] - Max Redis requests/s (0 = unlimited).
250
+ * @param {number} [options.batchDocs=200] - Docs per SQLite transaction.
251
+ * @param {number} [options.maxSuggestions=10000] - Cap for FT.SUGGET import.
252
+ * @param {boolean} [options.skipExisting=true] - Skip index if already in RespLite.
253
+ * @param {boolean} [options.withSuggestions=true] - Also migrate suggestions.
254
+ * @param {(result: IndexResult) => void} [options.onProgress]
255
+ * @returns {Promise<{ indices: IndexResult[], aborted: boolean }>}
256
+ */
257
+ export async function runMigrateSearch(redisClient, dbPath, options = {}) {
258
+ const {
259
+ pragmaTemplate = 'default',
260
+ onlyIndices = null,
261
+ scanCount = 500,
262
+ maxRps = 0,
263
+ batchDocs = 200,
264
+ maxSuggestions = 10000,
265
+ skipExisting = true,
266
+ withSuggestions = true,
267
+ onProgress,
268
+ } = options;
269
+
270
+ const db = openDb(dbPath, { pragmaTemplate });
271
+ let abortRequested = false;
272
+ const onSignal = () => { abortRequested = true; };
273
+ process.on('SIGINT', onSignal);
274
+ process.on('SIGTERM', onSignal);
275
+
276
+ const minIntervalMs = maxRps > 0 ? 1000 / maxRps : 0;
277
+ let lastKeyTime = 0;
278
+
279
+ async function throttle() {
280
+ if (minIntervalMs <= 0) return;
281
+ const elapsed = Date.now() - lastKeyTime;
282
+ if (elapsed < minIntervalMs) await sleep(minIntervalMs - elapsed);
283
+ lastKeyTime = Date.now();
284
+ }
285
+
286
+ try {
287
+ const allNames = await listSearchIndices(redisClient);
288
+ const targets = onlyIndices
289
+ ? allNames.filter((n) => onlyIndices.includes(n))
290
+ : allNames;
291
+
292
+ const results = [];
293
+
294
+ for (const indexName of targets) {
295
+ if (abortRequested) break;
296
+
297
+ // ── Validate name ────────────────────────────────────────────────
298
+ if (!INDEX_NAME_RE.test(indexName)) {
299
+ results.push(errorResult(indexName, `Index name "${indexName}" is not valid in RespLite (must match [A-Za-z][A-Za-z0-9:_-]{0,63})`));
300
+ continue;
301
+ }
302
+
303
+ // ── Step 1: FT.INFO ──────────────────────────────────────────────
304
+ let info;
305
+ try {
306
+ info = await getSearchIndexInfo(redisClient, indexName);
307
+ } catch (e) {
308
+ results.push(errorResult(indexName, `FT.INFO failed: ${e.message}`));
309
+ continue;
310
+ }
311
+
312
+ if (info.keyType !== 'HASH') {
313
+ results.push(errorResult(indexName, `key_type "${info.keyType}" not supported (only HASH)`));
314
+ continue;
315
+ }
316
+
317
+ // ── Step 2: map schema ────────────────────────────────────────────
318
+ const { fields, fieldMap, warnings } = mapFields(info.attributes);
319
+
320
+ // ── Step 3: FT.CREATE ─────────────────────────────────────────────
321
+ let created = false;
322
+ let skipped = false;
323
+ try {
324
+ createIndex(db, indexName, fields);
325
+ created = true;
326
+ } catch (e) {
327
+ if (e.message.includes('already exists')) {
328
+ if (skipExisting) {
329
+ skipped = true;
330
+ } else {
331
+ results.push({ ...errorResult(indexName, 'Index already exists in destination'), warnings });
332
+ continue;
333
+ }
334
+ } else {
335
+ results.push({ ...errorResult(indexName, `FT.CREATE failed: ${e.message}`), warnings });
336
+ continue;
337
+ }
338
+ }
339
+
340
+ // ── Step 4: import documents ──────────────────────────────────────
341
+ let docsImported = 0;
342
+ let docsSkipped = 0;
343
+ let docErrors = 0;
344
+
345
+ // Batch infrastructure: accumulate HGETALL results, flush in SQLite transactions
346
+ const pendingHashData = new Map();
347
+ let pendingKeys = [];
348
+
349
+ const batchInsert = db.transaction((keyBatch) => {
350
+ for (const key of keyBatch) {
351
+ const hashData = pendingHashData.get(key);
352
+ if (!hashData) continue;
353
+ const docFields = buildDocFields(hashData, fieldMap, fields);
354
+ const rawScore = hashData['__score'] ?? hashData['score'];
355
+ const score = rawScore ? (parseFloat(rawScore) || 1.0) : 1.0;
356
+ addDocument(db, indexName, key, score, true, docFields);
357
+ }
358
+ });
359
+
360
+ const flushBatch = () => {
361
+ if (pendingKeys.length === 0) return;
362
+ const flushKeys = pendingKeys.splice(0);
363
+ try {
364
+ batchInsert(flushKeys);
365
+ docsImported += flushKeys.length;
366
+ } catch (_) {
367
+ // batch failed — fall back to one-by-one to minimise data loss
368
+ for (const k of flushKeys) {
369
+ try {
370
+ const hd = pendingHashData.get(k);
371
+ if (!hd) continue;
372
+ addDocument(db, indexName, k, 1.0, true, buildDocFields(hd, fieldMap, fields));
373
+ docsImported++;
374
+ } catch (_e) {
375
+ docErrors++;
376
+ }
377
+ }
378
+ }
379
+ pendingHashData.clear();
380
+ };
381
+
382
+ for (const prefix of info.prefixes) {
383
+ if (abortRequested) break;
384
+ const matchPattern = prefix ? `${prefix}*` : '*';
385
+ let cursor = 0;
386
+
387
+ do {
388
+ if (abortRequested) break;
389
+ await throttle();
390
+
391
+ const scanResult = await redisClient.scan(cursor, { MATCH: matchPattern, COUNT: scanCount });
392
+ cursor = Array.isArray(scanResult)
393
+ ? parseInt(String(scanResult[0]), 10)
394
+ : (scanResult?.cursor ?? 0);
395
+ const pageKeys = Array.isArray(scanResult) ? scanResult[1] : (scanResult?.keys ?? []);
396
+
397
+ for (const key of pageKeys) {
398
+ if (abortRequested) break;
399
+ await throttle();
400
+
401
+ let hashData;
402
+ try {
403
+ hashData = await redisClient.hGetAll(key);
404
+ } catch (_) {
405
+ docErrors++;
406
+ continue;
407
+ }
408
+
409
+ if (!hashData || typeof hashData !== 'object' || Object.keys(hashData).length === 0) {
410
+ docsSkipped++;
411
+ continue;
412
+ }
413
+
414
+ pendingHashData.set(key, hashData);
415
+ pendingKeys.push(key);
416
+
417
+ if (pendingKeys.length >= batchDocs) flushBatch();
418
+ }
419
+ } while (cursor !== 0 && !abortRequested);
420
+ }
421
+ flushBatch(); // flush remainder
422
+
423
+ // ── Step 5: suggestions ───────────────────────────────────────────
424
+ let sugsImported = 0;
425
+ if (withSuggestions && !abortRequested) {
426
+ sugsImported = await importSuggestions(redisClient, db, indexName, maxSuggestions);
427
+ }
428
+
429
+ const result = {
430
+ name: indexName,
431
+ created,
432
+ skipped,
433
+ docsImported,
434
+ docsSkipped,
435
+ docErrors,
436
+ sugsImported,
437
+ warnings,
438
+ };
439
+ results.push(result);
440
+ if (onProgress) onProgress(result);
441
+ }
442
+
443
+ return { indices: results, aborted: abortRequested };
444
+ } finally {
445
+ process.off('SIGINT', onSignal);
446
+ process.off('SIGTERM', onSignal);
447
+ db.close();
448
+ }
449
+ }
450
+
451
+ /** @param {string} name @param {string} error */
452
+ function errorResult(name, error) {
453
+ return { name, created: false, skipped: false, docsImported: 0, docsSkipped: 0, docErrors: 0, sugsImported: 0, warnings: [], error };
454
+ }
455
+
456
+ // ── Exported helpers (used by tests) ─────────────────────────────────────────
457
+ export { mapFields, buildDocFields };