@gmickel/gno 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,7 @@ import type {
31
31
  StoreResult,
32
32
  } from '../types';
33
33
  import { err, ok } from '../types';
34
+ import { loadFts5Snowball } from './fts5-snowball';
34
35
  import type { SqliteDbProvider } from './types';
35
36
 
36
37
  // ─────────────────────────────────────────────────────────────────────────────
@@ -103,6 +104,19 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
103
104
  this.db.exec('PRAGMA journal_mode = WAL');
104
105
  }
105
106
 
107
+ // Load fts5-snowball extension if using snowball tokenizer
108
+ if (ftsTokenizer.startsWith('snowball')) {
109
+ const snowballResult = loadFts5Snowball(this.db);
110
+ if (!snowballResult.loaded) {
111
+ this.db.close();
112
+ this.db = null;
113
+ return err(
114
+ 'EXTENSION_LOAD_FAILED',
115
+ `Failed to load fts5-snowball: ${snowballResult.error}`
116
+ );
117
+ }
118
+ }
119
+
106
120
  // Run migrations
107
121
  const result = runMigrations(this.db, migrations, ftsTokenizer);
108
122
  if (!result.ok) {
@@ -744,16 +758,15 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
744
758
  const db = this.ensureOpen();
745
759
  const limit = options.limit ?? 20;
746
760
 
747
- // Join FTS results with chunks and documents
748
- // Use bm25() function explicitly - fts.rank doesn't work with JOINs
749
- // Note: Multiple docs can share mirror_hash (content-addressed storage)
750
- // Deduplication by uri+seq is done in search.ts to avoid FTS function context issues
761
+ // Document-level FTS search using documents_fts
762
+ // Uses bm25() for relevance ranking (more negative = better match)
763
+ // Snippet from body column (index 2) with highlight markers
751
764
  const sql = `
752
765
  SELECT
753
- c.mirror_hash,
754
- c.seq,
755
- bm25(content_fts) as score,
756
- ${options.snippet ? "snippet(content_fts, 0, '<mark>', '</mark>', '...', 32) as snippet," : ''}
766
+ d.mirror_hash,
767
+ 0 as seq,
768
+ bm25(documents_fts) as score,
769
+ ${options.snippet ? "snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet," : ''}
757
770
  d.docid,
758
771
  d.uri,
759
772
  d.title,
@@ -764,13 +777,11 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
764
777
  d.source_mtime,
765
778
  d.source_size,
766
779
  d.source_hash
767
- FROM content_fts fts
768
- JOIN content_chunks c ON c.rowid = fts.rowid
769
- JOIN documents d ON d.mirror_hash = c.mirror_hash AND d.active = 1
770
- WHERE content_fts MATCH ?
780
+ FROM documents_fts fts
781
+ JOIN documents d ON d.id = fts.rowid AND d.active = 1
782
+ WHERE documents_fts MATCH ?
771
783
  ${options.collection ? 'AND d.collection = ?' : ''}
772
- ${options.language ? 'AND c.language = ?' : ''}
773
- ORDER BY bm25(content_fts)
784
+ ORDER BY bm25(documents_fts)
774
785
  LIMIT ?
775
786
  `;
776
787
 
@@ -778,9 +789,6 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
778
789
  if (options.collection) {
779
790
  params.push(options.collection);
780
791
  }
781
- if (options.language) {
782
- params.push(options.language);
783
- }
784
792
  params.push(limit);
785
793
 
786
794
  interface FtsRow {
@@ -835,29 +843,157 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
835
843
  }
836
844
  }
837
845
 
838
- async rebuildFtsForHash(mirrorHash: string): Promise<StoreResult<void>> {
846
+ /**
847
+ * Sync a document to documents_fts for full-text search.
848
+ * Must be called after document and content are both upserted.
849
+ * The FTS rowid matches documents.id for efficient JOINs.
850
+ */
851
+ async syncDocumentFts(
852
+ collection: string,
853
+ relPath: string
854
+ ): Promise<StoreResult<void>> {
839
855
  try {
840
856
  const db = this.ensureOpen();
841
857
 
842
858
  const transaction = db.transaction(() => {
843
- // Get chunks for this hash
844
- const chunks = db
845
- .query<{ rowid: number; text: string }, [string]>(
846
- 'SELECT rowid, text FROM content_chunks WHERE mirror_hash = ?'
859
+ // Get document with its content
860
+ interface DocWithContent {
861
+ id: number;
862
+ rel_path: string;
863
+ title: string | null;
864
+ markdown: string | null;
865
+ }
866
+
867
+ const doc = db
868
+ .query<DocWithContent, [string, string]>(
869
+ `SELECT d.id, d.rel_path, d.title, c.markdown
870
+ FROM documents d
871
+ LEFT JOIN content c ON c.mirror_hash = d.mirror_hash
872
+ WHERE d.collection = ? AND d.rel_path = ? AND d.active = 1`
847
873
  )
848
- .all(mirrorHash);
874
+ .get(collection, relPath);
849
875
 
850
- // Delete old FTS entries for these rowids
851
- for (const chunk of chunks) {
852
- db.run('DELETE FROM content_fts WHERE rowid = ?', [chunk.rowid]);
876
+ if (!doc) {
877
+ return; // Document not found or inactive
853
878
  }
854
879
 
855
- // Insert new FTS entries
880
+ // Delete existing FTS entry for this doc
881
+ db.run('DELETE FROM documents_fts WHERE rowid = ?', [doc.id]);
882
+
883
+ // Insert new FTS entry if we have content
884
+ if (doc.markdown) {
885
+ db.run(
886
+ 'INSERT INTO documents_fts (rowid, filepath, title, body) VALUES (?, ?, ?, ?)',
887
+ [doc.id, doc.rel_path, doc.title ?? '', doc.markdown]
888
+ );
889
+ }
890
+ });
891
+
892
+ transaction();
893
+ return ok(undefined);
894
+ } catch (cause) {
895
+ return err(
896
+ 'QUERY_FAILED',
897
+ cause instanceof Error ? cause.message : 'Failed to sync document FTS',
898
+ cause
899
+ );
900
+ }
901
+ }
902
+
903
+ /**
904
+ * Rebuild entire documents_fts index from scratch.
905
+ * Use after migration or for recovery.
906
+ */
907
+ async rebuildAllDocumentsFts(): Promise<StoreResult<number>> {
908
+ try {
909
+ const db = this.ensureOpen();
910
+ let count = 0;
911
+
912
+ const transaction = db.transaction(() => {
913
+ // Clear FTS table
914
+ db.run('DELETE FROM documents_fts');
915
+
916
+ // Get all active documents with content
917
+ interface DocWithContent {
918
+ id: number;
919
+ rel_path: string;
920
+ title: string | null;
921
+ markdown: string;
922
+ }
923
+
924
+ const docs = db
925
+ .query<DocWithContent, []>(
926
+ `SELECT d.id, d.rel_path, d.title, c.markdown
927
+ FROM documents d
928
+ JOIN content c ON c.mirror_hash = d.mirror_hash
929
+ WHERE d.active = 1 AND d.mirror_hash IS NOT NULL`
930
+ )
931
+ .all();
932
+
933
+ // Insert FTS entries
856
934
  const stmt = db.prepare(
857
- 'INSERT INTO content_fts (rowid, text) VALUES (?, ?)'
935
+ 'INSERT INTO documents_fts (rowid, filepath, title, body) VALUES (?, ?, ?, ?)'
858
936
  );
859
- for (const chunk of chunks) {
860
- stmt.run(chunk.rowid, chunk.text);
937
+
938
+ for (const doc of docs) {
939
+ stmt.run(doc.id, doc.rel_path, doc.title ?? '', doc.markdown);
940
+ count++;
941
+ }
942
+ });
943
+
944
+ transaction();
945
+ return ok(count);
946
+ } catch (cause) {
947
+ return err(
948
+ 'QUERY_FAILED',
949
+ cause instanceof Error
950
+ ? cause.message
951
+ : 'Failed to rebuild documents FTS',
952
+ cause
953
+ );
954
+ }
955
+ }
956
+
957
+ /**
958
+ * @deprecated Use syncDocumentFts for document-level FTS.
959
+ * Kept for backwards compat during migration.
960
+ */
961
+ async rebuildFtsForHash(mirrorHash: string): Promise<StoreResult<void>> {
962
+ try {
963
+ const db = this.ensureOpen();
964
+
965
+ const transaction = db.transaction(() => {
966
+ // Get documents using this hash and sync their FTS
967
+ interface DocInfo {
968
+ id: number;
969
+ rel_path: string;
970
+ title: string | null;
971
+ }
972
+
973
+ const docs = db
974
+ .query<DocInfo, [string]>(
975
+ 'SELECT id, rel_path, title FROM documents WHERE mirror_hash = ? AND active = 1'
976
+ )
977
+ .all(mirrorHash);
978
+
979
+ // Get content
980
+ const content = db
981
+ .query<{ markdown: string }, [string]>(
982
+ 'SELECT markdown FROM content WHERE mirror_hash = ?'
983
+ )
984
+ .get(mirrorHash);
985
+
986
+ if (!content) {
987
+ return;
988
+ }
989
+
990
+ // Update FTS for each document using this hash
991
+ for (const doc of docs) {
992
+ db.run('DELETE FROM documents_fts WHERE rowid = ?', [doc.id]);
993
+ db.run(
994
+ 'INSERT INTO documents_fts (rowid, filepath, title, body) VALUES (?, ?, ?, ?)',
995
+ [doc.id, doc.rel_path, doc.title ?? '', content.markdown]
996
+ );
861
997
  }
862
998
  });
863
999
 
@@ -1116,10 +1252,10 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
1116
1252
  `);
1117
1253
  expiredCache = cacheResult.changes;
1118
1254
 
1119
- // Rebuild FTS index (remove orphaned entries)
1255
+ // Clean orphaned FTS entries (documents that no longer exist or are inactive)
1120
1256
  db.run(`
1121
- DELETE FROM content_fts WHERE rowid NOT IN (
1122
- SELECT rowid FROM content_chunks
1257
+ DELETE FROM documents_fts WHERE rowid NOT IN (
1258
+ SELECT id FROM documents WHERE active = 1
1123
1259
  )
1124
1260
  `);
1125
1261
  });
@@ -0,0 +1,144 @@
1
+ /**
2
+ * fts5-snowball extension loader.
3
+ *
4
+ * Loads vendored fts5-snowball extension for multilingual FTS5 stemming.
5
+ * Pattern mirrors sqlite-vec loader.
6
+ *
7
+ * @module src/store/sqlite/fts5-snowball
8
+ */
9
+
10
+ import type { Database } from 'bun:sqlite';
11
+ // node:fs: existsSync for sync file checks at load time
12
+ import { existsSync } from 'node:fs';
13
+ // node:path: join for cross-platform paths
14
+ import { join } from 'node:path';
15
+ // node:process: arch/platform detection (no Bun equivalent)
16
+ import { arch, platform } from 'node:process';
17
+ import { fileURLToPath } from 'node:url';
18
+
19
+ // ─────────────────────────────────────────────────────────────────────────────
20
+ // Types
21
+ // ─────────────────────────────────────────────────────────────────────────────
22
+
23
+ /**
24
+ * Result of attempting to load fts5-snowball.
25
+ */
26
+ export interface Fts5SnowballLoadResult {
27
+ loaded: boolean;
28
+ error?: string;
29
+ path?: string;
30
+ }
31
+
32
+ // ─────────────────────────────────────────────────────────────────────────────
33
+ // Platform Detection
34
+ // ─────────────────────────────────────────────────────────────────────────────
35
+
36
+ function getPlatformDir(): string | null {
37
+ const os = platform === 'win32' ? 'windows' : platform;
38
+ const archName = arch === 'arm64' ? 'arm64' : 'x64';
39
+
40
+ if (os === 'darwin') {
41
+ return `darwin-${archName}`;
42
+ }
43
+ if (os === 'linux' && archName === 'x64') {
44
+ return 'linux-x64';
45
+ }
46
+ if (os === 'windows' && archName === 'x64') {
47
+ return 'windows-x64';
48
+ }
49
+
50
+ return null;
51
+ }
52
+
53
+ function getExtensionSuffix(): string {
54
+ if (platform === 'win32') {
55
+ return 'dll';
56
+ }
57
+ if (platform === 'darwin') {
58
+ return 'dylib';
59
+ }
60
+ return 'so';
61
+ }
62
+
63
+ // ─────────────────────────────────────────────────────────────────────────────
64
+ // Path Resolution
65
+ // ─────────────────────────────────────────────────────────────────────────────
66
+
67
+ /**
68
+ * Get path to vendored fts5-snowball extension.
69
+ * Returns null if not available for this platform.
70
+ */
71
+ export function getExtensionPath(): string | null {
72
+ const platformDir = getPlatformDir();
73
+ if (!platformDir) {
74
+ return null;
75
+ }
76
+
77
+ const suffix = getExtensionSuffix();
78
+ const filename = `fts5stemmer.${suffix}`;
79
+
80
+ // Resolve relative to this module (ESM-safe)
81
+ const thisDir = fileURLToPath(new URL('.', import.meta.url));
82
+ const vendorPath = join(
83
+ thisDir,
84
+ '..',
85
+ '..',
86
+ '..',
87
+ 'vendor',
88
+ 'fts5-snowball',
89
+ platformDir,
90
+ filename
91
+ );
92
+
93
+ if (existsSync(vendorPath)) {
94
+ return vendorPath;
95
+ }
96
+
97
+ return null;
98
+ }
99
+
100
+ // ─────────────────────────────────────────────────────────────────────────────
101
+ // Extension Loading
102
+ // ─────────────────────────────────────────────────────────────────────────────
103
+
104
+ /**
105
+ * Load fts5-snowball extension into database.
106
+ *
107
+ * Must be called after Database.setCustomSQLite() on macOS.
108
+ * Safe to call multiple times - extension load is idempotent.
109
+ *
110
+ * @param db - Open database connection
111
+ * @returns Load result with success/error info
112
+ */
113
+ export function loadFts5Snowball(db: Database): Fts5SnowballLoadResult {
114
+ const path = getExtensionPath();
115
+
116
+ if (!path) {
117
+ const platformDir = getPlatformDir();
118
+ return {
119
+ loaded: false,
120
+ error: platformDir
121
+ ? `fts5-snowball binary not found for ${platformDir}`
122
+ : `fts5-snowball not available for ${platform}-${arch}`,
123
+ };
124
+ }
125
+
126
+ try {
127
+ db.loadExtension(path);
128
+ return { loaded: true, path };
129
+ } catch (e) {
130
+ const message = e instanceof Error ? e.message : String(e);
131
+ return {
132
+ loaded: false,
133
+ error: message,
134
+ path,
135
+ };
136
+ }
137
+ }
138
+
139
+ /**
140
+ * Check if fts5-snowball is available for this platform.
141
+ */
142
+ export function isAvailable(): boolean {
143
+ return getExtensionPath() !== null;
144
+ }
@@ -18,6 +18,7 @@ export type StoreErrorCode =
18
18
  | 'CONSTRAINT_VIOLATION'
19
19
  | 'MIGRATION_FAILED'
20
20
  | 'CONNECTION_FAILED'
21
+ | 'EXTENSION_LOAD_FAILED'
21
22
  | 'QUERY_FAILED'
22
23
  | 'TRANSACTION_FAILED'
23
24
  | 'INVALID_INPUT'
@@ -195,7 +196,11 @@ export interface FtsSearchOptions {
195
196
  limit?: number;
196
197
  /** Filter by collection */
197
198
  collection?: string;
198
- /** Filter by language */
199
+ /**
200
+ * Language hint (reserved for future use).
201
+ * Note: FTS5 snowball tokenizer is language-aware at index time,
202
+ * so runtime language filtering is not currently implemented.
203
+ */
199
204
  language?: string;
200
205
  /** Include snippet with highlights */
201
206
  snippet?: boolean;
@@ -469,7 +474,7 @@ export interface StorePort {
469
474
  // ─────────────────────────────────────────────────────────────────────────
470
475
 
471
476
  /**
472
- * Search chunks using FTS5.
477
+ * Search documents using FTS5 (document-level).
473
478
  */
474
479
  searchFts(
475
480
  query: string,
@@ -477,8 +482,23 @@ export interface StorePort {
477
482
  ): Promise<StoreResult<FtsResult[]>>;
478
483
 
479
484
  /**
485
+ * Sync a document to documents_fts for full-text search.
486
+ * Must be called after document and content are both upserted.
487
+ */
488
+ syncDocumentFts(
489
+ collection: string,
490
+ relPath: string
491
+ ): Promise<StoreResult<void>>;
492
+
493
+ /**
494
+ * Rebuild entire documents_fts index from scratch.
495
+ * Use after migration or for recovery. Returns count of indexed docs.
496
+ */
497
+ rebuildAllDocumentsFts(): Promise<StoreResult<number>>;
498
+
499
+ /**
500
+ * @deprecated Use syncDocumentFts for document-level FTS.
480
501
  * Rebuild FTS index for a mirror hash.
481
- * Called after upserting chunks.
482
502
  */
483
503
  rebuildFtsForHash(mirrorHash: string): Promise<StoreResult<void>>;
484
504
 
@@ -78,9 +78,11 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
78
78
 
79
79
  // Seek pagination: use cursor to avoid skipping items as backlog shrinks
80
80
  // Query structure changes based on whether we have a cursor
81
+ // Include document title for contextual embedding
81
82
  const sql = after
82
83
  ? `
83
84
  SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
85
+ (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
84
86
  CASE
85
87
  WHEN NOT EXISTS (
86
88
  SELECT 1 FROM content_vectors v
@@ -108,6 +110,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
108
110
  `
109
111
  : `
110
112
  SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
113
+ (SELECT d.title FROM documents d WHERE d.mirror_hash = c.mirror_hash AND d.active = 1 LIMIT 1) as title,
111
114
  CASE
112
115
  WHEN NOT EXISTS (
113
116
  SELECT 1 FROM content_vectors v
@@ -38,6 +38,7 @@ export interface BacklogItem {
38
38
  mirrorHash: string;
39
39
  seq: number;
40
40
  text: string;
41
+ title: string | null;
41
42
  reason: 'new' | 'changed' | 'force';
42
43
  }
43
44