resplite 1.2.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -98
- package/package.json +1 -1
- package/spec/SPEC_F.md +94 -0
- package/src/cli/resplite-import.js +74 -17
- package/src/migration/bulk.js +69 -36
- package/src/migration/index.js +32 -1
- package/src/migration/migrate-search.js +457 -0
- package/test/unit/migrate-search.test.js +497 -0
package/src/migration/bulk.js
CHANGED
|
@@ -37,6 +37,9 @@ function sleep(ms) {
|
|
|
37
37
|
|
|
38
38
|
/**
|
|
39
39
|
* Run bulk import: SCAN keys from Redis, import into RespLite DB with checkpointing.
|
|
40
|
+
* On SIGINT/SIGTERM, checkpoint progress, set run status to ABORTED, close DB and rethrow.
|
|
41
|
+
* DB is always closed in a finally block (graceful shutdown when process is interrupted).
|
|
42
|
+
*
|
|
40
43
|
* @param {import('redis').RedisClientType} redisClient
|
|
41
44
|
* @param {string} dbPath
|
|
42
45
|
* @param {string} runId
|
|
@@ -65,41 +68,47 @@ export async function runBulkImport(redisClient, dbPath, runId, options = {}) {
|
|
|
65
68
|
} = options;
|
|
66
69
|
|
|
67
70
|
const db = openDb(dbPath, { pragmaTemplate });
|
|
68
|
-
|
|
69
|
-
const
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
const storages = { keys, strings, hashes, sets, lists, zsets };
|
|
75
|
-
|
|
76
|
-
createRun(db, runId, sourceUri, { scan_count_hint: scan_count });
|
|
77
|
-
let run = getRun(db, runId);
|
|
78
|
-
if (!run) throw new Error(`Run ${runId} not found`);
|
|
79
|
-
|
|
80
|
-
let cursor = resume && run.scan_cursor !== undefined ? parseInt(String(run.scan_cursor), 10) : 0;
|
|
81
|
-
let scanned_keys = resume ? (run.scanned_keys || 0) : 0;
|
|
82
|
-
let migrated_keys = resume ? (run.migrated_keys || 0) : 0;
|
|
83
|
-
let skipped_keys = resume ? (run.skipped_keys || 0) : 0;
|
|
84
|
-
let error_keys = resume ? (run.error_keys || 0) : 0;
|
|
85
|
-
let migrated_bytes = resume ? (run.migrated_bytes || 0) : 0;
|
|
86
|
-
|
|
87
|
-
if (!resume) {
|
|
88
|
-
updateBulkProgress(db, runId, { scan_cursor: String(cursor), scanned_keys, migrated_keys, skipped_keys, error_keys, migrated_bytes });
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
let lastCheckpointTime = Date.now();
|
|
92
|
-
let batchScanned = 0;
|
|
93
|
-
let batchBytes = 0;
|
|
94
|
-
const minIntervalMs = max_rps > 0 ? 1000 / max_rps : 0;
|
|
95
|
-
let lastKeyTime = 0;
|
|
71
|
+
let abortRequested = false;
|
|
72
|
+
const onSignal = () => {
|
|
73
|
+
abortRequested = true;
|
|
74
|
+
};
|
|
75
|
+
process.on('SIGINT', onSignal);
|
|
76
|
+
process.on('SIGTERM', onSignal);
|
|
96
77
|
|
|
97
78
|
try {
|
|
98
|
-
|
|
79
|
+
const keys = createKeysStorage(db);
|
|
80
|
+
const strings = createStringsStorage(db, keys);
|
|
81
|
+
const hashes = createHashesStorage(db, keys);
|
|
82
|
+
const sets = createSetsStorage(db, keys);
|
|
83
|
+
const lists = createListsStorage(db, keys);
|
|
84
|
+
const zsets = createZsetsStorage(db, keys);
|
|
85
|
+
const storages = { keys, strings, hashes, sets, lists, zsets };
|
|
86
|
+
|
|
87
|
+
createRun(db, runId, sourceUri, { scan_count_hint: scan_count });
|
|
88
|
+
let run = getRun(db, runId);
|
|
89
|
+
if (!run) throw new Error(`Run ${runId} not found`);
|
|
90
|
+
|
|
91
|
+
let cursor = resume && run.scan_cursor !== undefined ? parseInt(String(run.scan_cursor), 10) : 0;
|
|
92
|
+
let scanned_keys = resume ? (run.scanned_keys || 0) : 0;
|
|
93
|
+
let migrated_keys = resume ? (run.migrated_keys || 0) : 0;
|
|
94
|
+
let skipped_keys = resume ? (run.skipped_keys || 0) : 0;
|
|
95
|
+
let error_keys = resume ? (run.error_keys || 0) : 0;
|
|
96
|
+
let migrated_bytes = resume ? (run.migrated_bytes || 0) : 0;
|
|
97
|
+
|
|
98
|
+
if (!resume) {
|
|
99
|
+
updateBulkProgress(db, runId, { scan_cursor: String(cursor), scanned_keys, migrated_keys, skipped_keys, error_keys, migrated_bytes });
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let lastCheckpointTime = Date.now();
|
|
103
|
+
let batchScanned = 0;
|
|
104
|
+
let batchBytes = 0;
|
|
105
|
+
const minIntervalMs = max_rps > 0 ? 1000 / max_rps : 0;
|
|
106
|
+
let lastKeyTime = 0;
|
|
107
|
+
|
|
108
|
+
outer: do {
|
|
99
109
|
run = getRun(db, runId);
|
|
100
|
-
if (run && run.status === RUN_STATUS.ABORTED)
|
|
101
|
-
|
|
102
|
-
}
|
|
110
|
+
if (run && run.status === RUN_STATUS.ABORTED) break;
|
|
111
|
+
if (abortRequested) break;
|
|
103
112
|
while (run && run.status === RUN_STATUS.PAUSED) {
|
|
104
113
|
await sleep(2000);
|
|
105
114
|
run = getRun(db, runId);
|
|
@@ -111,8 +120,9 @@ export async function runBulkImport(redisClient, dbPath, runId, options = {}) {
|
|
|
111
120
|
const keyList = parsed.keys || [];
|
|
112
121
|
|
|
113
122
|
for (const keyName of keyList) {
|
|
123
|
+
if (abortRequested) break outer;
|
|
114
124
|
run = getRun(db, runId);
|
|
115
|
-
if (run && run.status === RUN_STATUS.ABORTED) break;
|
|
125
|
+
if (run && run.status === RUN_STATUS.ABORTED) break outer;
|
|
116
126
|
while (run && run.status === RUN_STATUS.PAUSED) {
|
|
117
127
|
await sleep(2000);
|
|
118
128
|
run = getRun(db, runId);
|
|
@@ -162,6 +172,23 @@ export async function runBulkImport(redisClient, dbPath, runId, options = {}) {
|
|
|
162
172
|
}
|
|
163
173
|
} while (cursor !== 0);
|
|
164
174
|
|
|
175
|
+
if (abortRequested) {
|
|
176
|
+
updateBulkProgress(db, runId, {
|
|
177
|
+
scan_cursor: String(cursor),
|
|
178
|
+
scanned_keys,
|
|
179
|
+
migrated_keys,
|
|
180
|
+
skipped_keys,
|
|
181
|
+
error_keys,
|
|
182
|
+
migrated_bytes,
|
|
183
|
+
});
|
|
184
|
+
setRunStatus(db, runId, RUN_STATUS.ABORTED);
|
|
185
|
+
run = getRun(db, runId);
|
|
186
|
+
if (onProgress && run) onProgress(run);
|
|
187
|
+
const err = new Error('Bulk import interrupted by signal (SIGINT/SIGTERM)');
|
|
188
|
+
err.code = 'BULK_ABORTED';
|
|
189
|
+
throw err;
|
|
190
|
+
}
|
|
191
|
+
|
|
165
192
|
updateBulkProgress(db, runId, {
|
|
166
193
|
scan_cursor: '0',
|
|
167
194
|
scanned_keys,
|
|
@@ -173,9 +200,15 @@ export async function runBulkImport(redisClient, dbPath, runId, options = {}) {
|
|
|
173
200
|
setRunStatus(db, runId, RUN_STATUS.COMPLETED);
|
|
174
201
|
return getRun(db, runId);
|
|
175
202
|
} catch (err) {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
203
|
+
if (err.code !== 'BULK_ABORTED') {
|
|
204
|
+
setRunStatus(db, runId, RUN_STATUS.FAILED);
|
|
205
|
+
updateBulkProgress(db, runId, { last_error: err.message });
|
|
206
|
+
logError(db, runId, 'bulk', err.message, null);
|
|
207
|
+
}
|
|
179
208
|
throw err;
|
|
209
|
+
} finally {
|
|
210
|
+
process.off('SIGINT', onSignal);
|
|
211
|
+
process.off('SIGTERM', onSignal);
|
|
212
|
+
db.close();
|
|
180
213
|
}
|
|
181
214
|
}
|
package/src/migration/index.js
CHANGED
|
@@ -24,6 +24,7 @@ import { runPreflight, readKeyspaceEvents, setKeyspaceEvents } from './preflight
|
|
|
24
24
|
import { runBulkImport } from './bulk.js';
|
|
25
25
|
import { runApplyDirty } from './apply-dirty.js';
|
|
26
26
|
import { runVerify } from './verify.js';
|
|
27
|
+
import { runMigrateSearch } from './migrate-search.js';
|
|
27
28
|
import { getRun, getDirtyCounts } from './registry.js';
|
|
28
29
|
|
|
29
30
|
/**
|
|
@@ -50,6 +51,7 @@ import { getRun, getDirtyCounts } from './registry.js';
|
|
|
50
51
|
* status(): { run: object, dirty: object } | null,
|
|
51
52
|
* applyDirty(opts?: { batchKeys?: number, maxRps?: number }): Promise<object>,
|
|
52
53
|
* verify(opts?: { samplePct?: number, maxSample?: number }): Promise<object>,
|
|
54
|
+
* migrateSearch(opts?: { onlyIndices?: string[], scanCount?: number, maxRps?: number, batchDocs?: number, maxSuggestions?: number, skipExisting?: boolean, withSuggestions?: boolean, onProgress?: function }): Promise<object>,
|
|
53
55
|
* close(): Promise<void>,
|
|
54
56
|
* }}
|
|
55
57
|
*/
|
|
@@ -173,6 +175,35 @@ export function createMigration({
|
|
|
173
175
|
return runVerify(client, to, { pragmaTemplate, samplePct, maxSample });
|
|
174
176
|
},
|
|
175
177
|
|
|
178
|
+
/**
|
|
179
|
+
* Step 5 — Migrate search indices: copy RediSearch index schemas and documents
|
|
180
|
+
* into RespLite FT.* tables.
|
|
181
|
+
*
|
|
182
|
+
* Requires RediSearch (Redis Stack or redis/search module) on the source.
|
|
183
|
+
* Only HASH-based indices with TEXT/TAG/NUMERIC fields are supported.
|
|
184
|
+
* TAG and NUMERIC fields are mapped to TEXT.
|
|
185
|
+
*
|
|
186
|
+
* @param {{
|
|
187
|
+
* onlyIndices?: string[],
|
|
188
|
+
* scanCount?: number,
|
|
189
|
+
* maxRps?: number,
|
|
190
|
+
* batchDocs?: number,
|
|
191
|
+
* maxSuggestions?: number,
|
|
192
|
+
* skipExisting?: boolean,
|
|
193
|
+
* withSuggestions?: boolean,
|
|
194
|
+
* onProgress?: (result: object) => void
|
|
195
|
+
* }} [opts]
|
|
196
|
+
* @returns {Promise<{ indices: object[], aborted: boolean }>}
|
|
197
|
+
*/
|
|
198
|
+
async migrateSearch(opts = {}) {
|
|
199
|
+
const client = await getClient();
|
|
200
|
+
return runMigrateSearch(client, to, {
|
|
201
|
+
pragmaTemplate,
|
|
202
|
+
maxRps,
|
|
203
|
+
...opts,
|
|
204
|
+
});
|
|
205
|
+
},
|
|
206
|
+
|
|
176
207
|
/**
|
|
177
208
|
* Disconnect from Redis. Call when done with all migration operations.
|
|
178
209
|
*/
|
|
@@ -185,6 +216,6 @@ export function createMigration({
|
|
|
185
216
|
};
|
|
186
217
|
}
|
|
187
218
|
|
|
188
|
-
export { runPreflight, readKeyspaceEvents, setKeyspaceEvents, runBulkImport, runApplyDirty, runVerify };
|
|
219
|
+
export { runPreflight, readKeyspaceEvents, setKeyspaceEvents, runBulkImport, runApplyDirty, runVerify, runMigrateSearch };
|
|
189
220
|
export { startDirtyTracker } from './tracker.js';
|
|
190
221
|
export { getRun, getDirtyCounts, createRun, setRunStatus, logError } from './registry.js';
|
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migrate RediSearch indices to RespLite FT.* search indices (SPEC_F §F.10).
|
|
3
|
+
*
|
|
4
|
+
* For each index in the source Redis:
|
|
5
|
+
* 1. FT._LIST → enumerate index names
|
|
6
|
+
* 2. FT.INFO → read schema (prefix patterns, field attributes)
|
|
7
|
+
* 3. Map RediSearch field types to RespLite TEXT fields
|
|
8
|
+
* 4. FT.CREATE in RespLite (skip if already exists and skipExisting=true)
|
|
9
|
+
* 5. SCAN keys by prefix → HGETALL → addDocument in SQLite batches
|
|
10
|
+
* 6. FT.SUGGET → import suggestions
|
|
11
|
+
*
|
|
12
|
+
* Graceful shutdown: SIGINT/SIGTERM finishes the current document, checkpoints, closes DB.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { openDb } from '../storage/sqlite/db.js';
|
|
16
|
+
import { createIndex, addDocument, suggestionAdd } from '../storage/sqlite/search.js';
|
|
17
|
+
|
|
18
|
+
const INDEX_NAME_RE = /^[A-Za-z][A-Za-z0-9:_-]{0,63}$/;
|
|
19
|
+
|
|
20
|
+
/** RediSearch field types that can be represented as TEXT in RespLite. */
|
|
21
|
+
const TEXT_COMPATIBLE = new Set(['TEXT', 'TAG', 'NUMERIC']);
|
|
22
|
+
|
|
23
|
+
function sleep(ms) {
|
|
24
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* List all RediSearch index names via FT._LIST.
|
|
29
|
+
* Returns [] if the command fails (e.g. RediSearch module not loaded).
|
|
30
|
+
* @param {import('redis').RedisClientType} redisClient
|
|
31
|
+
* @returns {Promise<string[]>}
|
|
32
|
+
*/
|
|
33
|
+
async function listSearchIndices(redisClient) {
|
|
34
|
+
try {
|
|
35
|
+
const raw = await redisClient.sendCommand(['FT._LIST']);
|
|
36
|
+
if (!Array.isArray(raw)) return [];
|
|
37
|
+
return raw.map(String);
|
|
38
|
+
} catch (_) {
|
|
39
|
+
return [];
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Parse a flat alternating [key, value, key, value, …] Redis response array into a plain object.
|
|
45
|
+
* Keys are lower-cased; nested arrays are kept as-is.
|
|
46
|
+
* @param {unknown} arr
|
|
47
|
+
* @returns {Record<string, unknown>}
|
|
48
|
+
*/
|
|
49
|
+
function parseFlat(arr) {
|
|
50
|
+
if (!Array.isArray(arr)) return {};
|
|
51
|
+
const out = {};
|
|
52
|
+
for (let i = 0; i + 1 < arr.length; i += 2) {
|
|
53
|
+
out[String(arr[i]).toLowerCase()] = arr[i + 1];
|
|
54
|
+
}
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Get RediSearch index info (key type, prefix patterns, field attributes) via FT.INFO.
|
|
60
|
+
* @param {import('redis').RedisClientType} redisClient
|
|
61
|
+
* @param {string} indexName
|
|
62
|
+
* @returns {Promise<{
|
|
63
|
+
* keyType: string,
|
|
64
|
+
* prefixes: string[],
|
|
65
|
+
* attributes: Array<{ identifier: string, attribute: string, type: string }>
|
|
66
|
+
* }>}
|
|
67
|
+
*/
|
|
68
|
+
async function getSearchIndexInfo(redisClient, indexName) {
|
|
69
|
+
const raw = await redisClient.sendCommand(['FT.INFO', indexName]);
|
|
70
|
+
|
|
71
|
+
// node-redis v4 may return a plain object when the Search module is loaded natively,
|
|
72
|
+
// or a flat array from sendCommand. Handle both.
|
|
73
|
+
let info;
|
|
74
|
+
if (raw && !Array.isArray(raw) && typeof raw === 'object') {
|
|
75
|
+
info = {};
|
|
76
|
+
for (const [k, v] of Object.entries(raw)) info[k.toLowerCase()] = v;
|
|
77
|
+
} else {
|
|
78
|
+
info = parseFlat(raw);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ── index_definition → key_type + prefixes ───────────────────────────
|
|
82
|
+
let keyType = 'HASH';
|
|
83
|
+
let prefixes = [''];
|
|
84
|
+
const rawDef = info['index_definition'] ?? info['indexdefinition'];
|
|
85
|
+
if (Array.isArray(rawDef)) {
|
|
86
|
+
const def = parseFlat(rawDef);
|
|
87
|
+
if (def['key_type']) keyType = String(def['key_type']).toUpperCase();
|
|
88
|
+
const p = def['prefixes'];
|
|
89
|
+
if (Array.isArray(p) && p.length > 0) prefixes = p.map(String);
|
|
90
|
+
else if (typeof p === 'string' && p.length > 0) prefixes = [p];
|
|
91
|
+
} else if (rawDef && typeof rawDef === 'object') {
|
|
92
|
+
if (rawDef.key_type) keyType = String(rawDef.key_type).toUpperCase();
|
|
93
|
+
const p = rawDef.prefixes;
|
|
94
|
+
if (Array.isArray(p) && p.length > 0) prefixes = p.map(String);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ── attributes (newer) or fields (older RediSearch) ──────────────────
|
|
98
|
+
const rawAttrs = info['attributes'] ?? info['fields'] ?? [];
|
|
99
|
+
const attributes = [];
|
|
100
|
+
if (Array.isArray(rawAttrs)) {
|
|
101
|
+
for (const attr of rawAttrs) {
|
|
102
|
+
let identifier, attribute, type;
|
|
103
|
+
if (Array.isArray(attr)) {
|
|
104
|
+
const a = parseFlat(attr);
|
|
105
|
+
identifier = String(a['identifier'] ?? '');
|
|
106
|
+
attribute = String(a['attribute'] ?? a['identifier'] ?? '');
|
|
107
|
+
type = String(a['type'] ?? 'TEXT').toUpperCase();
|
|
108
|
+
} else if (attr && typeof attr === 'object') {
|
|
109
|
+
identifier = String(attr.identifier ?? '');
|
|
110
|
+
attribute = String(attr.attribute ?? attr.identifier ?? '');
|
|
111
|
+
type = String(attr.type ?? 'TEXT').toUpperCase();
|
|
112
|
+
}
|
|
113
|
+
if (identifier) attributes.push({ identifier, attribute, type });
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return { keyType, prefixes, attributes };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Map RediSearch field attributes to RespLite schema fields.
|
|
122
|
+
*
|
|
123
|
+
* - TEXT → TEXT (1:1)
|
|
124
|
+
* - TAG, NUMERIC → TEXT (with warning; values stringified at import time)
|
|
125
|
+
* - GEO, VECTOR, … → skipped with a warning
|
|
126
|
+
* - Always guarantees a `payload` TEXT field exists (added if absent)
|
|
127
|
+
*
|
|
128
|
+
* @param {Array<{ identifier: string, attribute: string, type: string }>} attributes
|
|
129
|
+
* @returns {{
|
|
130
|
+
* fields: Array<{ name: string, type: string }>,
|
|
131
|
+
* fieldMap: Map<string, string>,
|
|
132
|
+
* warnings: string[]
|
|
133
|
+
* }}
|
|
134
|
+
*/
|
|
135
|
+
function mapFields(attributes) {
|
|
136
|
+
const warnings = [];
|
|
137
|
+
const fields = [];
|
|
138
|
+
/** identifier (hash field name) → RespLite field name */
|
|
139
|
+
const fieldMap = new Map();
|
|
140
|
+
const usedNames = new Set();
|
|
141
|
+
|
|
142
|
+
for (const attr of attributes) {
|
|
143
|
+
if (!TEXT_COMPATIBLE.has(attr.type)) {
|
|
144
|
+
warnings.push(`Skipping field "${attr.attribute}" (type ${attr.type} is not supported)`);
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
if (attr.type !== 'TEXT') {
|
|
148
|
+
warnings.push(`Field "${attr.attribute}" mapped from ${attr.type} to TEXT`);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Sanitize to a valid SQLite column / RespLite field name
|
|
152
|
+
let safeName = attr.attribute.replace(/[^A-Za-z0-9:_-]/g, '_');
|
|
153
|
+
if (/^[^A-Za-z]/.test(safeName)) safeName = 'f_' + safeName;
|
|
154
|
+
safeName = safeName.slice(0, 64);
|
|
155
|
+
|
|
156
|
+
if (!safeName || usedNames.has(safeName)) continue;
|
|
157
|
+
usedNames.add(safeName);
|
|
158
|
+
fields.push({ name: safeName, type: 'TEXT' });
|
|
159
|
+
fieldMap.set(attr.identifier, safeName);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// RespLite requires a `payload` TEXT field
|
|
163
|
+
if (!usedNames.has('payload')) {
|
|
164
|
+
fields.push({ name: 'payload', type: 'TEXT' });
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return { fields, fieldMap, warnings };
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Build the fields object for addDocument from a HGETALL result.
|
|
172
|
+
* Fields absent from the hash default to ''.
|
|
173
|
+
* If `payload` is empty, synthesise it as the concatenation of all other values.
|
|
174
|
+
*
|
|
175
|
+
* @param {Record<string, string>} hashData
|
|
176
|
+
* @param {Map<string, string>} fieldMap identifier → RespLite field name
|
|
177
|
+
* @param {Array<{ name: string }>} schemaFields
|
|
178
|
+
* @returns {Record<string, string>}
|
|
179
|
+
*/
|
|
180
|
+
function buildDocFields(hashData, fieldMap, schemaFields) {
|
|
181
|
+
const docFields = {};
|
|
182
|
+
|
|
183
|
+
for (const [identifier, fieldName] of fieldMap.entries()) {
|
|
184
|
+
docFields[fieldName] = hashData[identifier] ?? '';
|
|
185
|
+
}
|
|
186
|
+
for (const f of schemaFields) {
|
|
187
|
+
if (!(f.name in docFields)) docFields[f.name] = '';
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (!docFields['payload']) {
|
|
191
|
+
docFields['payload'] = Object.entries(docFields)
|
|
192
|
+
.filter(([k]) => k !== 'payload')
|
|
193
|
+
.map(([, v]) => v)
|
|
194
|
+
.filter(Boolean)
|
|
195
|
+
.join(' ');
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return docFields;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Import suggestions from a RediSearch index via FT.SUGGET "" MAX n WITHSCORES.
|
|
203
|
+
* RediSearch has no cursor for FT.SUGGET; maxSuggestions caps the import.
|
|
204
|
+
* Returns the number of suggestions imported.
|
|
205
|
+
*
|
|
206
|
+
* @param {import('redis').RedisClientType} redisClient
|
|
207
|
+
* @param {import('better-sqlite3').Database} db
|
|
208
|
+
* @param {string} indexName
|
|
209
|
+
* @param {number} maxSuggestions
|
|
210
|
+
* @returns {Promise<number>}
|
|
211
|
+
*/
|
|
212
|
+
async function importSuggestions(redisClient, db, indexName, maxSuggestions) {
|
|
213
|
+
try {
|
|
214
|
+
const raw = await redisClient.sendCommand([
|
|
215
|
+
'FT.SUGGET', indexName, '', 'MAX', String(maxSuggestions), 'WITHSCORES',
|
|
216
|
+
]);
|
|
217
|
+
if (!Array.isArray(raw) || raw.length === 0) return 0;
|
|
218
|
+
|
|
219
|
+
let count = 0;
|
|
220
|
+
// Response alternates [term, score, term, score, …]
|
|
221
|
+
db.transaction(() => {
|
|
222
|
+
for (let i = 0; i + 1 < raw.length; i += 2) {
|
|
223
|
+
const term = String(raw[i]);
|
|
224
|
+
const score = parseFloat(String(raw[i + 1])) || 1.0;
|
|
225
|
+
try {
|
|
226
|
+
suggestionAdd(db, indexName, term, score, false, undefined);
|
|
227
|
+
count++;
|
|
228
|
+
} catch (_) {}
|
|
229
|
+
}
|
|
230
|
+
})();
|
|
231
|
+
return count;
|
|
232
|
+
} catch (_) {
|
|
233
|
+
return 0;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Migrate all (or selected) RediSearch indices from a Redis source into the RespLite DB.
|
|
239
|
+
*
|
|
240
|
+
* On SIGINT/SIGTERM: finish the current document, then stop gracefully.
|
|
241
|
+
* DB is always closed in a finally block.
|
|
242
|
+
*
|
|
243
|
+
* @param {import('redis').RedisClientType} redisClient
|
|
244
|
+
* @param {string} dbPath
|
|
245
|
+
* @param {object} [options]
|
|
246
|
+
* @param {string} [options.pragmaTemplate='default']
|
|
247
|
+
* @param {string[]} [options.onlyIndices] - Restrict to these index names.
|
|
248
|
+
* @param {number} [options.scanCount=500] - COUNT hint for SCAN.
|
|
249
|
+
* @param {number} [options.maxRps=0] - Max Redis requests/s (0 = unlimited).
|
|
250
|
+
* @param {number} [options.batchDocs=200] - Docs per SQLite transaction.
|
|
251
|
+
* @param {number} [options.maxSuggestions=10000] - Cap for FT.SUGGET import.
|
|
252
|
+
* @param {boolean} [options.skipExisting=true] - Skip index if already in RespLite.
|
|
253
|
+
* @param {boolean} [options.withSuggestions=true] - Also migrate suggestions.
|
|
254
|
+
* @param {(result: IndexResult) => void} [options.onProgress]
|
|
255
|
+
* @returns {Promise<{ indices: IndexResult[], aborted: boolean }>}
|
|
256
|
+
*/
|
|
257
|
+
export async function runMigrateSearch(redisClient, dbPath, options = {}) {
|
|
258
|
+
const {
|
|
259
|
+
pragmaTemplate = 'default',
|
|
260
|
+
onlyIndices = null,
|
|
261
|
+
scanCount = 500,
|
|
262
|
+
maxRps = 0,
|
|
263
|
+
batchDocs = 200,
|
|
264
|
+
maxSuggestions = 10000,
|
|
265
|
+
skipExisting = true,
|
|
266
|
+
withSuggestions = true,
|
|
267
|
+
onProgress,
|
|
268
|
+
} = options;
|
|
269
|
+
|
|
270
|
+
const db = openDb(dbPath, { pragmaTemplate });
|
|
271
|
+
let abortRequested = false;
|
|
272
|
+
const onSignal = () => { abortRequested = true; };
|
|
273
|
+
process.on('SIGINT', onSignal);
|
|
274
|
+
process.on('SIGTERM', onSignal);
|
|
275
|
+
|
|
276
|
+
const minIntervalMs = maxRps > 0 ? 1000 / maxRps : 0;
|
|
277
|
+
let lastKeyTime = 0;
|
|
278
|
+
|
|
279
|
+
async function throttle() {
|
|
280
|
+
if (minIntervalMs <= 0) return;
|
|
281
|
+
const elapsed = Date.now() - lastKeyTime;
|
|
282
|
+
if (elapsed < minIntervalMs) await sleep(minIntervalMs - elapsed);
|
|
283
|
+
lastKeyTime = Date.now();
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
try {
|
|
287
|
+
const allNames = await listSearchIndices(redisClient);
|
|
288
|
+
const targets = onlyIndices
|
|
289
|
+
? allNames.filter((n) => onlyIndices.includes(n))
|
|
290
|
+
: allNames;
|
|
291
|
+
|
|
292
|
+
const results = [];
|
|
293
|
+
|
|
294
|
+
for (const indexName of targets) {
|
|
295
|
+
if (abortRequested) break;
|
|
296
|
+
|
|
297
|
+
// ── Validate name ────────────────────────────────────────────────
|
|
298
|
+
if (!INDEX_NAME_RE.test(indexName)) {
|
|
299
|
+
results.push(errorResult(indexName, `Index name "${indexName}" is not valid in RespLite (must match [A-Za-z][A-Za-z0-9:_-]{0,63})`));
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// ── Step 1: FT.INFO ──────────────────────────────────────────────
|
|
304
|
+
let info;
|
|
305
|
+
try {
|
|
306
|
+
info = await getSearchIndexInfo(redisClient, indexName);
|
|
307
|
+
} catch (e) {
|
|
308
|
+
results.push(errorResult(indexName, `FT.INFO failed: ${e.message}`));
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
if (info.keyType !== 'HASH') {
|
|
313
|
+
results.push(errorResult(indexName, `key_type "${info.keyType}" not supported (only HASH)`));
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// ── Step 2: map schema ────────────────────────────────────────────
|
|
318
|
+
const { fields, fieldMap, warnings } = mapFields(info.attributes);
|
|
319
|
+
|
|
320
|
+
// ── Step 3: FT.CREATE ─────────────────────────────────────────────
|
|
321
|
+
let created = false;
|
|
322
|
+
let skipped = false;
|
|
323
|
+
try {
|
|
324
|
+
createIndex(db, indexName, fields);
|
|
325
|
+
created = true;
|
|
326
|
+
} catch (e) {
|
|
327
|
+
if (e.message.includes('already exists')) {
|
|
328
|
+
if (skipExisting) {
|
|
329
|
+
skipped = true;
|
|
330
|
+
} else {
|
|
331
|
+
results.push({ ...errorResult(indexName, 'Index already exists in destination'), warnings });
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
} else {
|
|
335
|
+
results.push({ ...errorResult(indexName, `FT.CREATE failed: ${e.message}`), warnings });
|
|
336
|
+
continue;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// ── Step 4: import documents ──────────────────────────────────────
|
|
341
|
+
let docsImported = 0;
|
|
342
|
+
let docsSkipped = 0;
|
|
343
|
+
let docErrors = 0;
|
|
344
|
+
|
|
345
|
+
// Batch infrastructure: accumulate HGETALL results, flush in SQLite transactions
|
|
346
|
+
const pendingHashData = new Map();
|
|
347
|
+
let pendingKeys = [];
|
|
348
|
+
|
|
349
|
+
const batchInsert = db.transaction((keyBatch) => {
|
|
350
|
+
for (const key of keyBatch) {
|
|
351
|
+
const hashData = pendingHashData.get(key);
|
|
352
|
+
if (!hashData) continue;
|
|
353
|
+
const docFields = buildDocFields(hashData, fieldMap, fields);
|
|
354
|
+
const rawScore = hashData['__score'] ?? hashData['score'];
|
|
355
|
+
const score = rawScore ? (parseFloat(rawScore) || 1.0) : 1.0;
|
|
356
|
+
addDocument(db, indexName, key, score, true, docFields);
|
|
357
|
+
}
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
const flushBatch = () => {
|
|
361
|
+
if (pendingKeys.length === 0) return;
|
|
362
|
+
const flushKeys = pendingKeys.splice(0);
|
|
363
|
+
try {
|
|
364
|
+
batchInsert(flushKeys);
|
|
365
|
+
docsImported += flushKeys.length;
|
|
366
|
+
} catch (_) {
|
|
367
|
+
// batch failed — fall back to one-by-one to minimise data loss
|
|
368
|
+
for (const k of flushKeys) {
|
|
369
|
+
try {
|
|
370
|
+
const hd = pendingHashData.get(k);
|
|
371
|
+
if (!hd) continue;
|
|
372
|
+
addDocument(db, indexName, k, 1.0, true, buildDocFields(hd, fieldMap, fields));
|
|
373
|
+
docsImported++;
|
|
374
|
+
} catch (_e) {
|
|
375
|
+
docErrors++;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
pendingHashData.clear();
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
for (const prefix of info.prefixes) {
|
|
383
|
+
if (abortRequested) break;
|
|
384
|
+
const matchPattern = prefix ? `${prefix}*` : '*';
|
|
385
|
+
let cursor = 0;
|
|
386
|
+
|
|
387
|
+
do {
|
|
388
|
+
if (abortRequested) break;
|
|
389
|
+
await throttle();
|
|
390
|
+
|
|
391
|
+
const scanResult = await redisClient.scan(cursor, { MATCH: matchPattern, COUNT: scanCount });
|
|
392
|
+
cursor = Array.isArray(scanResult)
|
|
393
|
+
? parseInt(String(scanResult[0]), 10)
|
|
394
|
+
: (scanResult?.cursor ?? 0);
|
|
395
|
+
const pageKeys = Array.isArray(scanResult) ? scanResult[1] : (scanResult?.keys ?? []);
|
|
396
|
+
|
|
397
|
+
for (const key of pageKeys) {
|
|
398
|
+
if (abortRequested) break;
|
|
399
|
+
await throttle();
|
|
400
|
+
|
|
401
|
+
let hashData;
|
|
402
|
+
try {
|
|
403
|
+
hashData = await redisClient.hGetAll(key);
|
|
404
|
+
} catch (_) {
|
|
405
|
+
docErrors++;
|
|
406
|
+
continue;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
if (!hashData || typeof hashData !== 'object' || Object.keys(hashData).length === 0) {
|
|
410
|
+
docsSkipped++;
|
|
411
|
+
continue;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
pendingHashData.set(key, hashData);
|
|
415
|
+
pendingKeys.push(key);
|
|
416
|
+
|
|
417
|
+
if (pendingKeys.length >= batchDocs) flushBatch();
|
|
418
|
+
}
|
|
419
|
+
} while (cursor !== 0 && !abortRequested);
|
|
420
|
+
}
|
|
421
|
+
flushBatch(); // flush remainder
|
|
422
|
+
|
|
423
|
+
// ── Step 5: suggestions ───────────────────────────────────────────
|
|
424
|
+
let sugsImported = 0;
|
|
425
|
+
if (withSuggestions && !abortRequested) {
|
|
426
|
+
sugsImported = await importSuggestions(redisClient, db, indexName, maxSuggestions);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
const result = {
|
|
430
|
+
name: indexName,
|
|
431
|
+
created,
|
|
432
|
+
skipped,
|
|
433
|
+
docsImported,
|
|
434
|
+
docsSkipped,
|
|
435
|
+
docErrors,
|
|
436
|
+
sugsImported,
|
|
437
|
+
warnings,
|
|
438
|
+
};
|
|
439
|
+
results.push(result);
|
|
440
|
+
if (onProgress) onProgress(result);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
return { indices: results, aborted: abortRequested };
|
|
444
|
+
} finally {
|
|
445
|
+
process.off('SIGINT', onSignal);
|
|
446
|
+
process.off('SIGTERM', onSignal);
|
|
447
|
+
db.close();
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/** @param {string} name @param {string} error */
|
|
452
|
+
function errorResult(name, error) {
|
|
453
|
+
return { name, created: false, skipped: false, docsImported: 0, docsSkipped: 0, docErrors: 0, sugsImported: 0, warnings: [], error };
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// ── Exported helpers (used by tests) ─────────────────────────────────────────
|
|
457
|
+
export { mapFields, buildDocFields };
|