s3db.js 13.3.0 → 13.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,487 @@
1
+ /**
2
+ * # FullTextPlugin - Full-Text Search for s3db.js
3
+ *
4
+ * ## Overview
5
+ *
6
+ * The FullTextPlugin adds powerful full-text search capabilities to s3db.js, automatically
7
+ * indexing specified fields and providing fast, flexible search across your resources.
8
+ *
9
+ * ## Features
10
+ *
11
+ * 1. **Automatic Indexing** - Automatically indexes text fields on insert/update/delete
12
+ * 2. **Configurable Fields** - Choose which fields to index per resource
13
+ * 3. **Tokenization** - Intelligent word tokenization with configurable minimum length
14
+ * 4. **Partial Matching** - Support for both exact and partial word matching
15
+ * 5. **Relevance Scoring** - Results ranked by relevance score
16
+ * 6. **Persistent Indexes** - Indexes stored in S3 and loaded on startup
17
+ * 7. **Incremental Updates** - Only changed indexes are saved (dirty tracking)
18
+ * 8. **Index Management** - Rebuild, clear, and get statistics for indexes
19
+ *
20
+ * ## Configuration
21
+ *
22
+ * ```javascript
23
+ * import { Database } from 's3db.js';
24
+ * import { FullTextPlugin } from 's3db.js/plugins/fulltext';
25
+ *
26
+ * // Basic configuration
27
+ * const db = new Database({
28
+ * connectionString: 's3://bucket/db'
29
+ * });
30
+ *
31
+ * await db.use(new FullTextPlugin({
32
+ * minWordLength: 3, // Minimum word length to index (default: 3)
33
+ * maxResults: 100, // Maximum search results (default: 100)
34
+ * fields: ['title', 'description', 'content'] // Fields to index
35
+ * }));
36
+ *
37
+ * // Per-resource field mapping
38
+ * await db.use(new FullTextPlugin({
39
+ * minWordLength: 2, // Index shorter words
40
+ * fields: {
41
+ * users: ['name', 'email', 'bio'],
42
+ * products: ['name', 'description', 'category'],
43
+ * articles: ['title', 'content', 'tags']
44
+ * }
45
+ * }));
46
+ * ```
47
+ *
48
+ * ## Usage Examples
49
+ *
50
+ * ### Basic Search
51
+ *
52
+ * ```javascript
53
+ * const db = new Database({ connectionString: 's3://bucket/db' });
54
+ * await db.use(new FullTextPlugin({
55
+ * fields: ['title', 'content']
56
+ * }));
57
+ * await db.start();
58
+ *
59
+ * const articles = await db.createResource({
60
+ * name: 'articles',
61
+ * attributes: {
62
+ * title: 'string',
63
+ * content: 'string',
64
+ * author: 'string'
65
+ * }
66
+ * });
67
+ *
68
+ * // Insert articles (automatically indexed)
69
+ * await articles.insert({
70
+ * id: 'a1',
71
+ * title: 'Getting Started with S3DB',
72
+ * content: 'S3DB is a document database built on AWS S3...',
73
+ * author: 'John Doe'
74
+ * });
75
+ *
76
+ * // Search articles
77
+ * const fulltextPlugin = db.plugins.FullTextPlugin;
78
+ * const results = await fulltextPlugin.searchRecords('articles', 'S3DB database');
79
+ *
80
+ * console.log(results);
81
+ * // [
82
+ * // {
83
+ * // id: 'a1',
84
+ * // title: 'Getting Started with S3DB',
85
+ * // content: 'S3DB is a document database...',
86
+ * // _searchScore: 2
87
+ * // }
88
+ * // ]
89
+ * ```
90
+ *
91
+ * ### Search with Options
92
+ *
93
+ * ```javascript
94
+ * const fulltextPlugin = db.plugins.FullTextPlugin;
95
+ *
96
+ * // Exact match search
97
+ * const exact = await fulltextPlugin.searchRecords('articles', 'database', {
98
+ * exactMatch: true,
99
+ * limit: 10
100
+ * });
101
+ *
102
+ * // Partial match search (default)
103
+ * const partial = await fulltextPlugin.searchRecords('articles', 'data', {
104
+ * exactMatch: false,
105
+ * limit: 20
106
+ * });
107
+ *
108
+ * // Search specific fields
109
+ * const titleOnly = await fulltextPlugin.searchRecords('articles', 'S3DB', {
110
+ * fields: ['title'], // Search only title field
111
+ * limit: 5
112
+ * });
113
+ *
114
+ * // Paginated search
115
+ * const page2 = await fulltextPlugin.searchRecords('articles', 'database', {
116
+ * limit: 10,
117
+ * offset: 10 // Skip first 10 results
118
+ * });
119
+ * ```
120
+ *
121
+ * ### Search IDs Only
122
+ *
123
+ * ```javascript
124
+ * // Get only record IDs and scores (faster)
125
+ * const idResults = await fulltextPlugin.search('articles', 'database');
126
+ *
127
+ * console.log(idResults);
128
+ * // [
129
+ * // { recordId: 'a1', score: 3 },
130
+ * // { recordId: 'a2', score: 2 },
131
+ * // { recordId: 'a3', score: 1 }
132
+ * // ]
133
+ *
134
+ * // Fetch records manually if needed
135
+ * const records = await articles.getMany(idResults.map(r => r.recordId));
136
+ * ```
137
+ *
138
+ * ### Index Management
139
+ *
140
+ * ```javascript
141
+ * const fulltextPlugin = db.plugins.FullTextPlugin;
142
+ *
143
+ * // Rebuild index for a resource
144
+ * await fulltextPlugin.rebuildIndex('articles');
145
+ *
146
+ * // Rebuild all indexes
147
+ * await fulltextPlugin.rebuildAllIndexes();
148
+ *
149
+ * // Rebuild with timeout
150
+ * await fulltextPlugin.rebuildAllIndexes({ timeout: 30000 }); // 30 seconds
151
+ *
152
+ * // Get index statistics
153
+ * const stats = await fulltextPlugin.getIndexStats();
154
+ * console.log(stats);
155
+ * // {
156
+ * // totalIndexes: 1523,
157
+ * // totalWords: 245,
158
+ * // resources: {
159
+ * // articles: {
160
+ * // totalRecords: 50,
161
+ * // totalWords: 150,
162
+ * // fields: {
163
+ * // title: { words: 75, totalOccurrences: 100 },
164
+ * // content: { words: 75, totalOccurrences: 200 }
165
+ * // }
166
+ * // }
167
+ * // }
168
+ * // }
169
+ *
170
+ * // Clear specific resource index
171
+ * await fulltextPlugin.clearIndex('articles');
172
+ *
173
+ * // Clear all indexes
174
+ * await fulltextPlugin.clearAllIndexes();
175
+ * ```
176
+ *
177
+ * ## Best Practices
178
+ *
179
+ * ### 1. Choose Fields Wisely
180
+ *
181
+ * ```javascript
182
+ * // DON'T: Index all fields (wastes storage)
183
+ * await db.use(new FullTextPlugin({
184
+ * fields: ['id', 'createdAt', 'updatedAt', 'title', 'content'] // ❌
185
+ * }));
186
+ *
187
+ * // DO: Index only searchable text fields
188
+ * await db.use(new FullTextPlugin({
189
+ * fields: ['title', 'content', 'tags'] // ✅
190
+ * }));
191
+ * ```
192
+ *
193
+ * ### 2. Configure Minimum Word Length
194
+ *
195
+ * ```javascript
196
+ * // For general text (articles, blogs)
197
+ * await db.use(new FullTextPlugin({
198
+ * minWordLength: 3 // Skip "a", "an", "the", etc.
199
+ * }));
200
+ *
201
+ * // For technical content (code, IDs)
202
+ * await db.use(new FullTextPlugin({
203
+ * minWordLength: 2 // Allow shorter terms like "id", "db"
204
+ * }));
205
+ *
206
+ * // For specialized content (medical, legal)
207
+ * await db.use(new FullTextPlugin({
208
+ * minWordLength: 4 // More selective indexing
209
+ * }));
210
+ * ```
211
+ *
212
+ * ### 3. Rebuild Indexes After Schema Changes
213
+ *
214
+ * ```javascript
215
+ * // After changing indexed fields
216
+ * await db.use(new FullTextPlugin({
217
+ * fields: ['title', 'content', 'summary'] // Added 'summary'
218
+ * }));
219
+ *
220
+ * // Rebuild indexes to include new field
221
+ * const fulltextPlugin = db.plugins.FullTextPlugin;
222
+ * await fulltextPlugin.rebuildAllIndexes();
223
+ * ```
224
+ *
225
+ * ### 4. Use Exact Match for Precision
226
+ *
227
+ * ```javascript
228
+ * // For user search: partial match (more results)
229
+ * const userSearch = await fulltextPlugin.searchRecords('articles', query, {
230
+ * exactMatch: false
231
+ * });
232
+ *
233
+ * // For filtering: exact match (precise results)
234
+ * const filtered = await fulltextPlugin.searchRecords('articles', 'database', {
235
+ * exactMatch: true
236
+ * });
237
+ * ```
238
+ *
239
+ * ## Performance Considerations
240
+ *
241
+ * ### Indexing Performance
242
+ *
243
+ * - **Insert**: +10-50ms per record (depending on text length)
244
+ * - **Update**: +20-100ms per record (remove old + add new index)
245
+ * - **Delete**: +10-30ms per record (remove from index)
246
+ * - **Storage**: ~100-500 bytes per indexed word
247
+ *
248
+ * ### Search Performance
249
+ *
250
+ * | Records | Indexed Words | Search Time |
251
+ * |---------|---------------|-------------|
252
+ * | 1,000 | 5,000 | ~10ms |
253
+ * | 10,000 | 50,000 | ~50ms |
254
+ * | 100,000 | 500,000 | ~200ms |
255
+ *
256
+ * ### Optimization Tips
257
+ *
258
+ * ```javascript
259
+ * // 1. Use search() instead of searchRecords() when you don't need full records
260
+ * const ids = await fulltextPlugin.search('articles', 'database'); // Fast
261
+ * const records = await fulltextPlugin.searchRecords('articles', 'database'); // Slower
262
+ *
263
+ * // 2. Limit results
264
+ * const results = await fulltextPlugin.searchRecords('articles', 'database', {
265
+ * limit: 20 // Faster than fetching 100+ results
266
+ * });
267
+ *
268
+ * // 3. Search specific fields
269
+ * const titleResults = await fulltextPlugin.searchRecords('articles', 'database', {
270
+ * fields: ['title'] // Faster than searching all fields
271
+ * });
272
+ *
273
+ * // 4. Use pagination for large result sets
274
+ * for (let offset = 0; offset < total; offset += 50) {
275
+ * const page = await fulltextPlugin.searchRecords('articles', 'database', {
276
+ * limit: 50,
277
+ * offset
278
+ * });
279
+ * processPage(page);
280
+ * }
281
+ * ```
282
+ *
283
+ * ## Troubleshooting
284
+ *
285
+ * ### Search Returns No Results
286
+ *
287
+ * ```javascript
288
+ * // Check if fields are configured
289
+ * const plugin = db.plugins.FullTextPlugin;
290
+ * console.log(plugin.config.fields); // Should include the fields you're searching
291
+ *
292
+ * // Check index statistics
293
+ * const stats = await plugin.getIndexStats();
294
+ * console.log(stats.resources.articles); // Should show indexed words
295
+ *
296
+ * // Rebuild index if needed
297
+ * await plugin.rebuildIndex('articles');
298
+ * ```
299
+ *
300
+ * ### Search Too Slow
301
+ *
302
+ * ```javascript
303
+ * // Solution 1: Reduce minWordLength to index fewer words
304
+ * await db.use(new FullTextPlugin({
305
+ * minWordLength: 4 // More selective
306
+ * }));
307
+ *
308
+ * // Solution 2: Limit search fields
309
+ * const results = await plugin.searchRecords('articles', query, {
310
+ * fields: ['title'] // Search only title, not content
311
+ * });
312
+ *
313
+ * // Solution 3: Use exact match
314
+ * const results = await plugin.searchRecords('articles', query, {
315
+ * exactMatch: true // Faster than partial matching
316
+ * });
317
+ * ```
318
+ *
319
+ * ### Index Growing Too Large
320
+ *
321
+ * ```javascript
322
+ * // Check index size
323
+ * const stats = await plugin.getIndexStats();
324
+ * console.log(`Total indexes: ${stats.totalIndexes}`);
325
+ * console.log(`Total words: ${stats.totalWords}`);
326
+ *
327
+ * // Solution 1: Increase minWordLength
328
+ * await db.use(new FullTextPlugin({
329
+ * minWordLength: 4 // Index fewer words
330
+ * }));
331
+ * await plugin.rebuildAllIndexes();
332
+ *
333
+ * // Solution 2: Index fewer fields
334
+ * await db.use(new FullTextPlugin({
335
+ * fields: ['title'] // Don't index long content fields
336
+ * }));
337
+ * await plugin.rebuildAllIndexes();
338
+ *
339
+ * // Solution 3: Clear old indexes
340
+ * await plugin.clearIndex('old_resource');
341
+ * ```
342
+ *
343
+ * ### Indexes Not Persisting
344
+ *
345
+ * ```javascript
346
+ * // Indexes save automatically on plugin stop
347
+ * await db.stop(); // Ensures indexes are saved
348
+ *
349
+ * // Or manually save
350
+ * await plugin.saveIndexes();
351
+ *
352
+ * // Check if index resource exists
353
+ * console.log(db.resources.plg_fulltext_indexes); // Should exist
354
+ * ```
355
+ *
356
+ * ## Real-World Use Cases
357
+ *
358
+ * ### 1. Article/Blog Search
359
+ *
360
+ * ```javascript
361
+ * const plugin = new FullTextPlugin({
362
+ * fields: ['title', 'content', 'tags'],
363
+ * minWordLength: 3
364
+ * });
365
+ *
366
+ * // User searches for "javascript database"
367
+ * const results = await plugin.searchRecords('articles', 'javascript database', {
368
+ * limit: 10
369
+ * });
370
+ *
371
+ * // Display results with highlights
372
+ * results.forEach(article => {
373
+ * console.log(`${article.title} (score: ${article._searchScore})`);
374
+ * });
375
+ * ```
376
+ *
377
+ * ### 2. Product Search
378
+ *
379
+ * ```javascript
380
+ * const plugin = new FullTextPlugin({
381
+ * fields: ['name', 'description', 'category', 'brand'],
382
+ * minWordLength: 2
383
+ * });
384
+ *
385
+ * // Search for "laptop gaming"
386
+ * const products = await plugin.searchRecords('products', 'laptop gaming', {
387
+ * limit: 20
388
+ * });
389
+ *
390
+ * // Filter by category after search
391
+ * const electronics = products.filter(p => p.category === 'Electronics');
392
+ * ```
393
+ *
394
+ * ### 3. User Directory Search
395
+ *
396
+ * ```javascript
397
+ * const plugin = new FullTextPlugin({
398
+ * fields: ['name', 'email', 'department', 'title'],
399
+ * minWordLength: 2
400
+ * });
401
+ *
402
+ * // Search for "john engineer"
403
+ * const users = await plugin.searchRecords('users', 'john engineer', {
404
+ * limit: 10
405
+ * });
406
+ * ```
407
+ *
408
+ * ### 4. Documentation Search
409
+ *
410
+ * ```javascript
411
+ * const plugin = new FullTextPlugin({
412
+ * fields: ['title', 'content', 'category'],
413
+ * minWordLength: 3
414
+ * });
415
+ *
416
+ * // Search docs with exact match for technical terms
417
+ * const exactResults = await plugin.searchRecords('docs', 'insert()', {
418
+ * exactMatch: true
419
+ * });
420
+ *
421
+ * // Fallback to partial match if no results
422
+ * if (exactResults.length === 0) {
423
+ * const partialResults = await plugin.searchRecords('docs', 'insert', {
424
+ * exactMatch: false
425
+ * });
426
+ * }
427
+ * ```
428
+ *
429
+ * ## API Reference
430
+ *
431
+ * ### Constructor Options
432
+ *
433
+ * - `minWordLength` (number, default: 3) - Minimum word length to index
434
+ * - `maxResults` (number, default: 100) - Maximum search results
435
+ * - `fields` (string[] | object) - Fields to index (array or per-resource mapping)
436
+ *
437
+ * ### Methods
438
+ *
439
+ * - `search(resourceName, query, options)` - Search and return IDs with scores
440
+ * - `searchRecords(resourceName, query, options)` - Search and return full records
441
+ * - `rebuildIndex(resourceName)` - Rebuild index for a resource
442
+ * - `rebuildAllIndexes(options)` - Rebuild all indexes
443
+ * - `getIndexStats()` - Get index statistics
444
+ * - `clearIndex(resourceName)` - Clear specific resource index
445
+ * - `clearAllIndexes()` - Clear all indexes
446
+ * - `saveIndexes()` - Manually save indexes to S3
447
+ *
448
+ * ### Search Options
449
+ *
450
+ * ```typescript
451
+ * interface SearchOptions {
452
+ * fields?: string[]; // Specific fields to search
453
+ * limit?: number; // Max results (default: maxResults from config)
454
+ * offset?: number; // Pagination offset (default: 0)
455
+ * exactMatch?: boolean; // Exact vs partial matching (default: false)
456
+ * }
457
+ * ```
458
+ *
459
+ * ### Search Result Structure
460
+ *
461
+ * ```typescript
462
+ * // search() returns
463
+ * interface SearchResult {
464
+ * recordId: string;
465
+ * score: number; // Higher = more relevant
466
+ * }
467
+ *
468
+ * // searchRecords() returns
469
+ * interface SearchRecord extends ResourceRecord {
470
+ * _searchScore: number; // Added to each record
471
+ * }
472
+ * ```
473
+ *
474
+ * ## Notes
475
+ *
476
+ * - Indexes are stored in `plg_fulltext_indexes` resource
477
+ * - Tokenization preserves accented characters (é, ñ, etc.)
478
+ * - Case-insensitive search
479
+ * - Special characters are removed during tokenization
480
+ * - Nested field paths supported (e.g., 'profile.bio')
481
+ * - Indexes save automatically on plugin stop
482
+ * - Dirty tracking ensures only changed indexes are saved
483
+ */
484
+
1
485
  import { Plugin } from "./plugin.class.js";
2
486
  import tryFn from "../concerns/try-fn.js";
3
487
  import { FulltextError } from "./fulltext.errors.js";