@outfitter/index 0.1.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,292 @@
1
+ # @outfitter/index
2
+
3
+ SQLite FTS5 full-text search indexing with WAL mode and Result-based error handling.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ bun add @outfitter/index
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```typescript
14
+ import { createIndex } from "@outfitter/index";
15
+
16
+ // Create an index
17
+ const index = createIndex({ path: "./data/search.db" });
18
+
19
+ // Add documents
20
+ await index.add({
21
+ id: "doc-1",
22
+ content: "Hello world, this is searchable content",
23
+ metadata: { title: "Greeting", tags: ["hello", "world"] },
24
+ });
25
+
26
+ // Search with FTS5 syntax
27
+ const results = await index.search({ query: "hello" });
28
+
29
+ if (results.isOk()) {
30
+ for (const result of results.value) {
31
+ console.log(result.id, result.score, result.highlights);
32
+ }
33
+ }
34
+
35
+ // Cleanup
36
+ index.close();
37
+ ```
38
+
39
+ ## Features
40
+
41
+ - **FTS5 Full-Text Search** — BM25 ranking with snippet highlights
42
+ - **WAL Mode** — Better concurrency for read-heavy workloads
43
+ - **Typed Metadata** — Generic type parameter for document metadata
44
+ - **Result-Based API** — All operations return `Result<T, StorageError>`
45
+ - **Tokenizer Options** — unicode61, porter (stemming), or trigram
46
+ - **Batch Operations** — Efficient bulk document insertion
47
+ - **Version Migration** — Built-in schema migration support
48
+
49
+ ## API Reference
50
+
51
+ ### createIndex(options)
52
+
53
+ Creates an FTS5 full-text search index.
54
+
55
+ ```typescript
56
+ interface IndexOptions {
57
+ path: string; // Path to SQLite database file
58
+ tableName?: string; // FTS5 table name (default: "documents")
59
+ tokenizer?: TokenizerType; // Tokenizer (default: "unicode61")
60
+ tool?: string; // Tool identifier for metadata
61
+ toolVersion?: string; // Tool version for metadata
62
+ migrations?: IndexMigrationRegistry; // Optional migration registry
63
+ }
64
+
65
+ const index = createIndex<MyMetadata>({
66
+ path: "./data/index.db",
67
+ tableName: "notes_fts",
68
+ tokenizer: "porter",
69
+ });
70
+ ```
71
+
72
+ ### Tokenizer Types
73
+
74
+ | Tokenizer | Use Case |
75
+ |-----------|----------|
76
+ | `unicode61` | Default, Unicode-aware word tokenization |
77
+ | `porter` | English text with stemming (finds "running" when searching "run") |
78
+ | `trigram` | Substring matching, typo tolerance |
79
+
80
+ ### Index Methods
81
+
82
+ ```typescript
83
+ interface Index<T = unknown> {
84
+ // Add single document (replaces if ID exists)
85
+ add(doc: IndexDocument): Promise<Result<void, StorageError>>;
86
+
87
+ // Add multiple documents in a transaction
88
+ addMany(docs: IndexDocument[]): Promise<Result<void, StorageError>>;
89
+
90
+ // Search with FTS5 query syntax
91
+ search(query: SearchQuery): Promise<Result<SearchResult<T>[], StorageError>>;
92
+
93
+ // Remove document by ID
94
+ remove(id: string): Promise<Result<void, StorageError>>;
95
+
96
+ // Clear all documents
97
+ clear(): Promise<Result<void, StorageError>>;
98
+
99
+ // Close database connection
100
+ close(): void;
101
+ }
102
+ ```
103
+
104
+ ### Document Structure
105
+
106
+ ```typescript
107
+ interface IndexDocument {
108
+ id: string; // Unique document ID
109
+ content: string; // Searchable text
110
+ metadata?: Record<string, unknown>; // Optional metadata (stored as JSON)
111
+ }
112
+
113
+ await index.add({
114
+ id: "note-123",
115
+ content: "Meeting notes from standup",
116
+ metadata: {
117
+ title: "Standup Notes",
118
+ date: "2024-01-15",
119
+ tags: ["meeting", "standup"],
120
+ },
121
+ });
122
+ ```
123
+
124
+ ### Search Query
125
+
126
+ ```typescript
127
+ interface SearchQuery {
128
+ query: string; // FTS5 query string
129
+ limit?: number; // Max results (default: 25)
130
+ offset?: number; // Skip results for pagination (default: 0)
131
+ }
132
+
133
+ // Simple search
134
+ const results = await index.search({ query: "typescript" });
135
+
136
+ // Phrase search with pagination
137
+ const paged = await index.search({
138
+ query: '"error handling"',
139
+ limit: 10,
140
+ offset: 20,
141
+ });
142
+ ```
143
+
144
+ ### FTS5 Query Syntax
145
+
146
+ FTS5 supports powerful query syntax:
147
+
148
+ | Syntax | Example | Description |
149
+ |--------|---------|-------------|
150
+ | Terms | `typescript bun` | Match all terms (implicit AND) |
151
+ | Phrase | `"error handling"` | Exact phrase match |
152
+ | OR | `ts OR typescript` | Match either term |
153
+ | NOT | `typescript NOT javascript` | Exclude term |
154
+ | Prefix | `type*` | Prefix matching |
155
+ | Grouping | `(react OR vue) AND typescript` | Complex queries |
156
+
157
+ ### Search Results
158
+
159
+ ```typescript
160
+ interface SearchResult<T = unknown> {
161
+ id: string; // Document ID
162
+ content: string; // Full document content
163
+ score: number; // BM25 relevance (negative; closer to 0 = better match)
164
+ metadata?: T; // Document metadata
165
+ highlights?: string[]; // Matching snippets with <b> tags
166
+ }
167
+
168
+ const results = await index.search({ query: "hello world" });
169
+
170
+ if (results.isOk()) {
171
+ for (const result of results.value) {
172
+ console.log(`${result.id}: ${result.highlights?.[0]}`);
173
+ // "doc-1: <b>Hello</b> <b>world</b>, this is..."
174
+ }
175
+ }
176
+ ```
177
+
178
+ ## Batch Operations
179
+
180
+ For bulk indexing, use `addMany` for transactional efficiency:
181
+
182
+ ```typescript
183
+ const documents = [
184
+ { id: "1", content: "First document" },
185
+ { id: "2", content: "Second document" },
186
+ { id: "3", content: "Third document" },
187
+ ];
188
+
189
+ const result = await index.addMany(documents);
190
+
191
+ if (result.isErr()) {
192
+ // Transaction rolled back, no documents added
193
+ console.error(result.error.message);
194
+ }
195
+ ```
196
+
197
+ ## Version Migration
198
+
199
+ Indexes track their schema version. Provide a migration registry for upgrades:
200
+
201
+ ```typescript
202
+ import { createIndex, createMigrationRegistry } from "@outfitter/index";
203
+
204
+ const migrations = createMigrationRegistry();
205
+
206
+ migrations.register(1, 2, (ctx) => {
207
+ ctx.db.run("ALTER TABLE documents ADD COLUMN category TEXT");
208
+ return Result.ok(undefined);
209
+ });
210
+
211
+ const index = createIndex({
212
+ path: "./data/index.db",
213
+ migrations,
214
+ });
215
+ ```
216
+
217
+ ### Migration Registry
218
+
219
+ ```typescript
220
+ interface IndexMigrationRegistry {
221
+ register(
222
+ fromVersion: number,
223
+ toVersion: number,
224
+ migrate: (ctx: IndexMigrationContext) => Result<void, StorageError>
225
+ ): void;
226
+
227
+ migrate(
228
+ ctx: IndexMigrationContext,
229
+ fromVersion: number,
230
+ toVersion: number
231
+ ): Result<void, StorageError>;
232
+ }
233
+
234
+ interface IndexMigrationContext {
235
+ db: Database; // bun:sqlite Database instance
236
+ }
237
+ ```
238
+
239
+ ## Index Metadata
240
+
241
+ Indexes store metadata for tracking provenance:
242
+
243
+ ```typescript
244
+ interface IndexMetadata {
245
+ version: number; // Schema version
246
+ created: string; // ISO timestamp
247
+ tool: string; // Creating tool identifier
248
+ toolVersion: string; // Creating tool version
249
+ }
250
+ ```
251
+
252
+ ## Version Constant
253
+
254
+ The current index format version is exported for compatibility checks:
255
+
256
+ ```typescript
257
+ import { INDEX_VERSION } from "@outfitter/index";
258
+
259
+ console.log(`Using index format version ${INDEX_VERSION}`);
260
+ ```
261
+
262
+ ## Error Handling
263
+
264
+ All operations return `Result<T, StorageError>`:
265
+
266
+ ```typescript
267
+ const result = await index.add(doc);
268
+
269
+ if (result.isErr()) {
270
+ console.error("Failed to add document:", result.error.message);
271
+ // result.error.cause contains the underlying error
272
+ }
273
+ ```
274
+
275
+ Common error scenarios:
276
+ - Index closed after `close()` called
277
+ - Invalid table name or tokenizer
278
+ - SQLite errors (disk full, permissions)
279
+ - Version mismatch without migrations
280
+
281
+ ## Performance Tips
282
+
283
+ 1. **Use WAL mode** — Enabled by default for better read concurrency
284
+ 2. **Batch inserts** — Use `addMany` for bulk operations
285
+ 3. **Choose tokenizer wisely** — `porter` for English, `unicode61` for general use
286
+ 4. **Limit results** — Use pagination for large result sets
287
+ 5. **Close when done** — Call `close()` to release resources
288
+
289
+ ## Related Packages
290
+
291
+ - [@outfitter/contracts](../contracts/README.md) — Result types and StorageError
292
+ - [@outfitter/file-ops](../file-ops/README.md) — Path utilities and workspace detection
package/dist/fts5.d.ts ADDED
@@ -0,0 +1,286 @@
1
+ import { Database } from "bun:sqlite";
2
+ import { StorageError } from "@outfitter/contracts";
3
+ import { Result } from "@outfitter/contracts";
4
+ interface MigrationRegistry<TContext> {
5
+ register(fromVersion: number, toVersion: number, migrate: (context: TContext) => Result<void, StorageError>): void;
6
+ migrate(context: TContext, fromVersion: number, toVersion: number): Result<void, StorageError>;
7
+ }
8
+ interface IndexMigrationContext {
9
+ readonly db: Database;
10
+ }
11
+ type IndexMigrationRegistry = MigrationRegistry<IndexMigrationContext>;
12
+ import { Result as Result2, StorageError as StorageError2 } from "@outfitter/contracts";
13
+ /**
14
+ * FTS5 tokenizer options for text analysis.
15
+ *
16
+ * - `unicode61`: Default tokenizer with Unicode support (recommended for most use cases)
17
+ * - `porter`: Applies Porter stemming algorithm for English text (finds related word forms)
18
+ * - `trigram`: Splits text into 3-character sequences (good for substring matching)
19
+ */
20
+ type TokenizerType = "unicode61" | "porter" | "trigram";
21
+ /**
22
+ * Options for creating an FTS5 index.
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * const options: IndexOptions = {
27
+ * path: "/path/to/index.db",
28
+ * tableName: "documents",
29
+ * tokenizer: "porter",
30
+ * };
31
+ *
32
+ * const index = createIndex(options);
33
+ * ```
34
+ */
35
+ interface IndexOptions {
36
+ /**
37
+ * Absolute path to the SQLite database file.
38
+ * The file will be created if it does not exist.
39
+ */
40
+ path: string;
41
+ /**
42
+ * Name of the FTS5 virtual table.
43
+ * @defaultValue "documents"
44
+ */
45
+ tableName?: string;
46
+ /**
47
+ * FTS5 tokenizer for text analysis.
48
+ * @defaultValue "unicode61"
49
+ */
50
+ tokenizer?: TokenizerType;
51
+ /**
52
+ * Optional tool identifier recorded in index metadata.
53
+ */
54
+ tool?: string;
55
+ /**
56
+ * Optional tool version recorded in index metadata.
57
+ */
58
+ toolVersion?: string;
59
+ /**
60
+ * Optional migration registry for upgrading older index versions.
61
+ */
62
+ migrations?: IndexMigrationRegistry;
63
+ }
64
+ /**
65
+ * A document to be indexed in the FTS5 index.
66
+ *
67
+ * Documents have a unique ID, searchable content, and optional metadata.
68
+ * The metadata is stored as JSON and can be used to attach additional
69
+ * information that is returned with search results.
70
+ *
71
+ * @example
72
+ * ```typescript
73
+ * const doc: IndexDocument = {
74
+ * id: "note-123",
75
+ * content: "This is the searchable text content",
76
+ * metadata: { title: "My Note", createdAt: Date.now() },
77
+ * };
78
+ * ```
79
+ */
80
+ interface IndexDocument {
81
+ /** Unique identifier for this document */
82
+ id: string;
83
+ /** Searchable text content */
84
+ content: string;
85
+ /**
86
+ * Optional metadata associated with the document.
87
+ * Stored as JSON and returned with search results.
88
+ */
89
+ metadata?: Record<string, unknown>;
90
+ }
91
+ /**
92
+ * Query parameters for searching the FTS5 index.
93
+ *
94
+ * Uses FTS5 query syntax which supports:
95
+ * - Simple terms: `search term`
96
+ * - Phrases: `"exact phrase"`
97
+ * - Boolean operators: `term1 AND term2`, `term1 OR term2`, `NOT term`
98
+ * - Prefix matching: `term*`
99
+ * - Grouping: `(term1 OR term2) AND term3`
100
+ *
101
+ * @example
102
+ * ```typescript
103
+ * // Simple search
104
+ * const query1: SearchQuery = { query: "typescript" };
105
+ *
106
+ * // Phrase search with pagination
107
+ * const query2: SearchQuery = {
108
+ * query: '"error handling"',
109
+ * limit: 10,
110
+ * offset: 20,
111
+ * };
112
+ * ```
113
+ */
114
+ interface SearchQuery {
115
+ /** FTS5 query string */
116
+ query: string;
117
+ /**
118
+ * Maximum number of results to return.
119
+ * @defaultValue 25
120
+ */
121
+ limit?: number;
122
+ /**
123
+ * Number of results to skip (for pagination).
124
+ * @defaultValue 0
125
+ */
126
+ offset?: number;
127
+ }
128
+ /**
129
+ * A single search result from an FTS5 query.
130
+ *
131
+ * Results include the document ID, BM25 relevance score, content,
132
+ * and any associated metadata. Optional highlights show matching
133
+ * snippets from the content.
134
+ *
135
+ * @typeParam T - Type of the metadata (defaults to `unknown`)
136
+ *
137
+ * @example
138
+ * ```typescript
139
+ * interface NoteMetadata {
140
+ * title: string;
141
+ * tags: string[];
142
+ * }
143
+ *
144
+ * const result: SearchResult<NoteMetadata> = {
145
+ * id: "note-123",
146
+ * score: 0.85,
147
+ * content: "Full document content...",
148
+ * metadata: { title: "My Note", tags: ["typescript"] },
149
+ * highlights: ["...matching <b>snippet</b>..."],
150
+ * };
151
+ * ```
152
+ */
153
+ interface SearchResult<T = unknown> {
154
+ /** Document ID */
155
+ id: string;
156
+ /**
157
+ * BM25 relevance ranking score.
158
+ * Higher scores indicate better matches.
159
+ * Note: FTS5 BM25 returns negative values (closer to 0 = better match).
160
+ */
161
+ score: number;
162
+ /** Full document content */
163
+ content: string;
164
+ /** Document metadata (if present) */
165
+ metadata?: T;
166
+ /**
167
+ * Matching snippets from the content.
168
+ * Uses FTS5 snippet() function for context-aware highlights.
169
+ */
170
+ highlights?: string[];
171
+ }
172
+ /**
173
+ * The FTS5 index interface for full-text search operations.
174
+ *
175
+ * Provides methods for adding, searching, and removing documents
176
+ * from an SQLite FTS5 index. All operations return `Result` types
177
+ * for explicit error handling.
178
+ *
179
+ * @typeParam T - Type of document metadata (defaults to `unknown`)
180
+ *
181
+ * @example
182
+ * ```typescript
183
+ * const index = createIndex<NoteMetadata>({ path: "./index.db" });
184
+ *
185
+ * // Add documents
186
+ * await index.add({ id: "1", content: "Hello world", metadata: { title: "Greeting" } });
187
+ *
188
+ * // Search
189
+ * const results = await index.search({ query: "hello" });
190
+ * if (results.isOk()) {
191
+ * for (const result of results.value) {
192
+ * console.log(result.id, result.score);
193
+ * }
194
+ * }
195
+ *
196
+ * // Cleanup
197
+ * index.close();
198
+ * ```
199
+ */
200
+ interface Index<T = unknown> {
201
+ /**
202
+ * Add a single document to the index.
203
+ * If a document with the same ID exists, it will be replaced.
204
+ *
205
+ * @param doc - Document to add
206
+ * @returns Result indicating success or StorageError
207
+ */
208
+ add(doc: IndexDocument): Promise<Result2<void, StorageError2>>;
209
+ /**
210
+ * Add multiple documents to the index in a single transaction.
211
+ * More efficient than calling add() multiple times.
212
+ * If a document with the same ID exists, it will be replaced.
213
+ *
214
+ * @param docs - Array of documents to add
215
+ * @returns Result indicating success or StorageError
216
+ */
217
+ addMany(docs: IndexDocument[]): Promise<Result2<void, StorageError2>>;
218
+ /**
219
+ * Search the index using FTS5 query syntax.
220
+ * Returns results ranked by BM25 relevance score.
221
+ *
222
+ * @param query - Search query parameters
223
+ * @returns Result containing array of search results or StorageError
224
+ */
225
+ search(query: SearchQuery): Promise<Result2<SearchResult<T>[], StorageError2>>;
226
+ /**
227
+ * Remove a document from the index by ID.
228
+ * No error is returned if the document does not exist.
229
+ *
230
+ * @param id - Document ID to remove
231
+ * @returns Result indicating success or StorageError
232
+ */
233
+ remove(id: string): Promise<Result2<void, StorageError2>>;
234
+ /**
235
+ * Remove all documents from the index.
236
+ *
237
+ * @returns Result indicating success or StorageError
238
+ */
239
+ clear(): Promise<Result2<void, StorageError2>>;
240
+ /**
241
+ * Close the index and release resources.
242
+ * The index should not be used after calling close().
243
+ */
244
+ close(): void;
245
+ }
246
+ /**
247
+ * Creates an FTS5 full-text search index.
248
+ *
249
+ * Uses SQLite FTS5 virtual table for fast full-text search with BM25 ranking.
250
+ * The database is configured with WAL mode for better concurrency.
251
+ *
252
+ * @typeParam T - Type of document metadata (defaults to `unknown`)
253
+ * @param options - Index options including database path and table configuration
254
+ * @returns An Index instance for managing documents and searching
255
+ *
256
+ * @example
257
+ * ```typescript
258
+ * // Create an index with default settings
259
+ * const index = createIndex({ path: "./data/index.db" });
260
+ *
261
+ * // Add documents
262
+ * await index.add({
263
+ * id: "doc-1",
264
+ * content: "Hello world",
265
+ * metadata: { title: "Greeting" },
266
+ * });
267
+ *
268
+ * // Search
269
+ * const results = await index.search({ query: "hello" });
270
+ *
271
+ * // Cleanup
272
+ * index.close();
273
+ * ```
274
+ *
275
+ * @example
276
+ * ```typescript
277
+ * // Create an index with Porter stemmer for English text
278
+ * const index = createIndex({
279
+ * path: "./data/notes.db",
280
+ * tableName: "notes_fts",
281
+ * tokenizer: "porter",
282
+ * });
283
+ * ```
284
+ */
285
+ declare function createIndex<T = unknown>(options: IndexOptions): Index<T>;
286
+ export { createIndex };