resplite 1.4.14 → 1.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "resplite",
3
- "version": "1.4.14",
3
+ "version": "1.4.18",
4
4
  "description": "A RESP2 server with practical Redis compatibility, backed by SQLite",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -72,6 +72,10 @@ CREATE TABLE IF NOT EXISTS search_indices (
72
72
  created_at INTEGER NOT NULL,
73
73
  updated_at INTEGER NOT NULL
74
74
  );
75
+
76
+ CREATE TABLE IF NOT EXISTS search_rowid_allocator (
77
+ id INTEGER PRIMARY KEY AUTOINCREMENT
78
+ );
75
79
  `;
76
80
 
77
81
  /** Type enum: 1=string, 2=hash, 3=set, 4=list, 5=zset */
@@ -28,6 +28,57 @@ function tableName(idx, suffix) {
28
28
  return `search_${suffix}__${idx}`;
29
29
  }
30
30
 
31
+ /**
32
+ * Build deterministic sorted field names for FTS column order.
33
+ * @param {{ fields: { name: string, type: string }[] }} schema
34
+ * @returns {string[]}
35
+ */
36
+ function getSortedFieldNames(schema) {
37
+ return schema.fields.map((f) => f.name).sort();
38
+ }
39
+
40
+ /**
41
+ * Encode field values in deterministic FTS column order.
42
+ * Missing values are normalized to empty string to match insert semantics.
43
+ * @param {string[]} fieldNames
44
+ * @param {Record<string, string>} fields
45
+ * @returns {string[]}
46
+ */
47
+ function encodeFtsFieldValues(fieldNames, fields) {
48
+ return fieldNames.map((f) => fields[f] ?? '');
49
+ }
50
+
51
+ /**
52
+ * Delete a specific contentless FTS row using the special 'delete' command row.
53
+ * FTS5 requires passing the exact prior values for all indexed columns.
54
+ * @param {import('better-sqlite3').Database} db
55
+ * @param {string} ftsTableName
56
+ * @param {number} ftsRowid
57
+ * @param {string[]} fieldNames
58
+ * @param {Record<string, string>} fields
59
+ */
60
+ function deleteFtsRow(db, ftsTableName, ftsRowid, fieldNames, fields) {
61
+ const values = encodeFtsFieldValues(fieldNames, fields);
62
+ const columns = [ftsTableName, 'rowid', ...fieldNames];
63
+ const placeholders = columns.map(() => '?').join(', ');
64
+ db.prepare(`INSERT INTO ${ftsTableName}(${columns.join(', ')}) VALUES (${placeholders})`).run(
65
+ 'delete',
66
+ ftsRowid,
67
+ ...values
68
+ );
69
+ }
70
+
71
+ /**
72
+ * Allocate a monotonic FTS rowid that is never reused, even after deletes.
73
+ * This prevents legacy stale tokens from being remapped to a new document.
74
+ * @param {import('better-sqlite3').Database} db
75
+ * @returns {number}
76
+ */
77
+ function allocateFtsRowid(db) {
78
+ const info = db.prepare('INSERT INTO search_rowid_allocator DEFAULT VALUES').run();
79
+ return Number(info.lastInsertRowid);
80
+ }
81
+
31
82
  /**
32
83
  * Build canonical schema JSON (fields sorted by name). D.12.2
33
84
  * @param {{ name: string, type: string }[]} fields
@@ -138,7 +189,7 @@ export function getIndexMeta(db, name) {
138
189
  */
139
190
  export function addDocument(db, idx, docId, score, replace, fields) {
140
191
  const meta = getIndexMeta(db, idx);
141
- const fieldNames = meta.schema.fields.map((f) => f.name);
192
+ const fieldNames = getSortedFieldNames(meta.schema);
142
193
  for (const k of Object.keys(fields)) {
143
194
  if (!fieldNames.includes(k)) throw new Error('ERR unknown field');
144
195
  }
@@ -154,14 +205,16 @@ export function addDocument(db, idx, docId, score, replace, fields) {
154
205
  let ftsRowid;
155
206
  if (existing) {
156
207
  if (!replace) throw new Error('ERR document exists');
157
- // FTS5 contentless bug: INSERT OR REPLACE doesn't remove old tokens.
158
- // Solution: assign a new fts_rowid to avoid token pollution.
159
- const maxRow = db.prepare(`SELECT COALESCE(MAX(fts_rowid), 0) AS m FROM ${docmapT}`).get();
160
- ftsRowid = maxRow.m + 1;
208
+ const previousRowid = existing.fts_rowid;
209
+ ftsRowid = allocateFtsRowid(db);
161
210
  db.prepare(`UPDATE ${docmapT} SET fts_rowid = ? WHERE doc_id = ?`).run(ftsRowid, docId);
211
+ const oldDoc = db.prepare(`SELECT fields_json FROM ${docsT} WHERE doc_id = ?`).get(docId);
212
+ if (oldDoc?.fields_json) {
213
+ const oldFields = JSON.parse(oldDoc.fields_json);
214
+ deleteFtsRow(db, ftsT, previousRowid, fieldNames, oldFields);
215
+ }
162
216
  } else {
163
- const maxRow = db.prepare(`SELECT COALESCE(MAX(fts_rowid), 0) AS m FROM ${docmapT}`).get();
164
- ftsRowid = maxRow.m + 1;
217
+ ftsRowid = allocateFtsRowid(db);
165
218
  db.prepare(`INSERT INTO ${docmapT}(doc_id, fts_rowid) VALUES (?, ?)`).run(docId, ftsRowid);
166
219
  }
167
220
 
@@ -176,9 +229,8 @@ export function addDocument(db, idx, docId, score, replace, fields) {
176
229
  ).run(docId, score, fieldsJson, now, now);
177
230
  }
178
231
 
179
- // FTS5 contentless: insert with new rowid (old rowid becomes orphaned and won't match via docmap join).
180
- const ftsColumns = ['rowid', ...fieldNames.sort()];
181
- const ftsValues = [ftsRowid, ...fieldNames.sort().map((f) => fields[f] ?? '')];
232
+ const ftsColumns = ['rowid', ...fieldNames];
233
+ const ftsValues = [ftsRowid, ...encodeFtsFieldValues(fieldNames, fields)];
182
234
  const placeholders = ftsValues.map(() => '?').join(', ');
183
235
  const colList = ftsColumns.join(', ');
184
236
  db.prepare(`INSERT INTO ${ftsT}(${colList}) VALUES (${placeholders})`).run(...ftsValues);
@@ -218,7 +270,8 @@ export function getDocumentFields(db, idx, docId) {
218
270
  }
219
271
 
220
272
  export function deleteDocument(db, idx, docId) {
221
- getIndexMeta(db, idx);
273
+ const meta = getIndexMeta(db, idx);
274
+ const fieldNames = getSortedFieldNames(meta.schema);
222
275
  const docsT = tableName(idx, 'docs');
223
276
  const docmapT = tableName(idx, 'docmap');
224
277
  const ftsT = tableName(idx, 'fts');
@@ -226,9 +279,12 @@ export function deleteDocument(db, idx, docId) {
226
279
  const row = db.prepare(`SELECT fts_rowid FROM ${docmapT} WHERE doc_id = ?`).get(docId);
227
280
  if (!row) return 0;
228
281
 
229
- // FTS5 contentless does not support DELETE. Remove from docs and docmap; FTS row becomes orphaned
230
- // (search results join through docmap so orphaned FTS rows are not returned).
231
282
  db.transaction(() => {
283
+ const docRow = db.prepare(`SELECT fields_json FROM ${docsT} WHERE doc_id = ?`).get(docId);
284
+ if (docRow?.fields_json) {
285
+ const fields = JSON.parse(docRow.fields_json);
286
+ deleteFtsRow(db, ftsT, row.fts_rowid, fieldNames, fields);
287
+ }
232
288
  db.prepare(`DELETE FROM ${docsT} WHERE doc_id = ?`).run(docId);
233
289
  db.prepare(`DELETE FROM ${docmapT} WHERE doc_id = ?`).run(docId);
234
290
  })();
@@ -230,6 +230,37 @@ describe('Search integration', () => {
230
230
  const g2 = tryParseValue(gorge2, 0).value;
231
231
  assert.equal(g2[0], 0, 'gorge* should NOT match martan - this is the bug');
232
232
  });
233
+
234
+ it('FT.DEL followed by FT.ADD REPLACE does not keep stale tokens', async () => {
235
+ await sendCommand(port, argv('FT.CREATE', 'del_replace_idx', 'SCHEMA', 'payload', 'TEXT'));
236
+
237
+ const addBicho = await sendCommand(
238
+ port,
239
+ argv('FT.ADD', 'del_replace_idx', 'DY1O2', '1', 'REPLACE', 'FIELDS', 'payload', 'bicho')
240
+ );
241
+ assert.equal(tryParseValue(addBicho, 0).value, 'OK');
242
+
243
+ const del = await sendCommand(port, argv('FT.DEL', 'del_replace_idx', 'DY1O2'));
244
+ assert.equal(tryParseValue(del, 0).value, 1);
245
+
246
+ const addGorrion = await sendCommand(
247
+ port,
248
+ argv('FT.ADD', 'del_replace_idx', 'DY1O2', '1', 'REPLACE', 'FIELDS', 'payload', 'gorrion')
249
+ );
250
+ assert.equal(tryParseValue(addGorrion, 0).value, 'OK');
251
+
252
+ const oldPrefix = await sendCommand(port, argv('FT.SEARCH', 'del_replace_idx', 'bicho*', 'NOCONTENT', 'LIMIT', '0', '10'));
253
+ const oldArr = tryParseValue(oldPrefix, 0).value;
254
+ assert.equal(oldArr[0], 0, 'bicho* should not match after re-adding DY1O2 with gorrion');
255
+
256
+ const newPrefix = await sendCommand(
257
+ port,
258
+ argv('FT.SEARCH', 'del_replace_idx', 'gorrion*', 'NOCONTENT', 'LIMIT', '0', '10')
259
+ );
260
+ const newArr = tryParseValue(newPrefix, 0).value;
261
+ assert.equal(newArr[0], 1);
262
+ assert.equal(newArr[1].toString?.('utf8') ?? newArr[1], 'DY1O2');
263
+ });
233
264
  });
234
265
 
235
266
  describe('Search persistence', () => {
@@ -88,6 +88,31 @@ describe('Search layer', () => {
88
88
  assert.equal(deleteDocument(db, 'names', 'nonexistent'), 0);
89
89
  });
90
90
 
91
+ it('delete + re-add never remaps legacy stale tokens', () => {
92
+ createIndex(db, 'legacy_stale', [{ name: 'payload', type: 'TEXT' }]);
93
+ addDocument(db, 'legacy_stale', 'doc1', 1, true, { payload: 'gorrion' });
94
+
95
+ const mapped = db
96
+ .prepare('SELECT fts_rowid FROM search_docmap__legacy_stale WHERE doc_id = ?')
97
+ .get('doc1');
98
+ const oldRowid = mapped.fts_rowid;
99
+
100
+ // Simulate a legacy-polluted index row where stale term postings exist for the same rowid.
101
+ db.prepare('INSERT INTO search_fts__legacy_stale(rowid, payload) VALUES (?, ?)').run(oldRowid, 'bicho');
102
+
103
+ assert.equal(search(db, 'legacy_stale', 'bicho*', { noContent: true }).total, 1);
104
+ assert.equal(deleteDocument(db, 'legacy_stale', 'doc1'), 1);
105
+
106
+ addDocument(db, 'legacy_stale', 'doc1', 1, true, { payload: 'gorrion' });
107
+ const remapped = db
108
+ .prepare('SELECT fts_rowid FROM search_docmap__legacy_stale WHERE doc_id = ?')
109
+ .get('doc1');
110
+ assert.notEqual(remapped.fts_rowid, oldRowid);
111
+
112
+ assert.equal(search(db, 'legacy_stale', 'gorrion*', { noContent: true }).total, 1);
113
+ assert.equal(search(db, 'legacy_stale', 'bicho*', { noContent: true }).total, 0);
114
+ });
115
+
91
116
  it('search with NOCONTENT returns total and doc ids', () => {
92
117
  const r = search(db, 'names', 'hello', { noContent: true, offset: 0, count: 10 });
93
118
  assert.equal(typeof r.total, 'number');