@gmickel/gno 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,23 +53,172 @@ import { loadFts5Snowball } from "./fts5-snowball";
53
53
 
54
54
  /** Whitespace regex for splitting FTS5 tokens */
55
55
  const WHITESPACE_REGEX = /\s+/;
56
+ const SINGLE_LINE_QUERY_PATTERN = /[\r\n]/;
57
+ const DOUBLE_QUOTE_PATTERN = /"/g;
58
+ const FTS5_FIELD_WEIGHTS = {
59
+ filepath: 1.5,
60
+ title: 4.0,
61
+ body: 1.0,
62
+ } as const;
63
+
64
+ function sanitizeFts5Term(term: string): string {
65
+ return term.replace(/[^\p{L}\p{N}'_]/gu, "").toLowerCase();
66
+ }
67
+
68
+ function isCompoundToken(token: string): boolean {
69
+ return /^[\p{L}\p{N}][\p{L}\p{N}'+-]*[-+][\p{L}\p{N}][\p{L}\p{N}'+-]*$/u.test(
70
+ token
71
+ );
72
+ }
73
+
74
+ function sanitizeCompoundTerm(term: string): string {
75
+ return term
76
+ .split(/[-+]/)
77
+ .map((part) => sanitizeFts5Term(part))
78
+ .filter((part) => part.length > 0)
79
+ .join(" ");
80
+ }
81
+
82
+ type FtsQueryBuildResult =
83
+ | { ok: true; query: string }
84
+ | { ok: false; error: string };
56
85
 
57
86
  /**
58
- * Escape a query string for safe FTS5 MATCH.
59
- * Wraps each token in double quotes to treat as literal terms.
60
- * Handles special chars: ? * - + ( ) " : ^ etc.
87
+ * Narrow lexical grammar for BM25/FTS queries.
88
+ *
89
+ * Supported:
90
+ * - plain terms -> prefix match
91
+ * - quoted phrases -> phrase match
92
+ * - negation with at least one positive term
93
+ * - hyphenated compounds handled intentionally
61
94
  */
62
- function escapeFts5Query(query: string): string {
63
- // Split on whitespace, filter empty, quote each token
64
- return query
65
- .split(WHITESPACE_REGEX)
66
- .filter((t) => t.length > 0)
67
- .map((token) => {
68
- // Escape internal double quotes by doubling them
69
- const escaped = token.replace(/"/g, '""');
70
- return `"${escaped}"`;
71
- })
72
- .join(" ");
95
+ function buildFts5Query(query: string): FtsQueryBuildResult {
96
+ const trimmed = query.trim();
97
+ if (!trimmed) {
98
+ return { ok: false, error: "Search query cannot be empty" };
99
+ }
100
+
101
+ if (SINGLE_LINE_QUERY_PATTERN.test(trimmed)) {
102
+ return {
103
+ ok: false,
104
+ error: "Lexical query must be a single line. Remove newline characters.",
105
+ };
106
+ }
107
+
108
+ const quoteCount = (trimmed.match(DOUBLE_QUOTE_PATTERN) ?? []).length;
109
+ if (quoteCount % 2 === 1) {
110
+ return {
111
+ ok: false,
112
+ error:
113
+ 'Lexical query has an unmatched double quote ("). Add the closing quote or remove it.',
114
+ };
115
+ }
116
+
117
+ const positive: string[] = [];
118
+ const negative: string[] = [];
119
+ let i = 0;
120
+
121
+ while (i < trimmed.length) {
122
+ while (i < trimmed.length && /\s/.test(trimmed[i]!)) {
123
+ i += 1;
124
+ }
125
+ if (i >= trimmed.length) {
126
+ break;
127
+ }
128
+
129
+ const negated = trimmed[i] === "-";
130
+ if (negated) {
131
+ i += 1;
132
+ }
133
+
134
+ if (i < trimmed.length && trimmed[i] === '"') {
135
+ const start = i + 1;
136
+ i += 1;
137
+ while (i < trimmed.length && trimmed[i] !== '"') {
138
+ i += 1;
139
+ }
140
+ const phrase = trimmed.slice(start, i).trim();
141
+ i += 1;
142
+
143
+ if (!phrase) {
144
+ continue;
145
+ }
146
+
147
+ const sanitized = phrase
148
+ .split(WHITESPACE_REGEX)
149
+ .map((token) =>
150
+ isCompoundToken(token)
151
+ ? sanitizeCompoundTerm(token)
152
+ : sanitizeFts5Term(token)
153
+ )
154
+ .filter((token) => token.length > 0)
155
+ .join(" ");
156
+ if (!sanitized) {
157
+ continue;
158
+ }
159
+
160
+ const ftsPhrase = `"${sanitized}"`;
161
+ if (negated) {
162
+ negative.push(ftsPhrase);
163
+ } else {
164
+ positive.push(ftsPhrase);
165
+ }
166
+ continue;
167
+ }
168
+
169
+ const start = i;
170
+ while (i < trimmed.length && !/[\s"]/.test(trimmed[i]!)) {
171
+ i += 1;
172
+ }
173
+ const token = trimmed.slice(start, i);
174
+ if (!token) {
175
+ continue;
176
+ }
177
+
178
+ if (isCompoundToken(token)) {
179
+ const sanitized = sanitizeCompoundTerm(token);
180
+ if (!sanitized) {
181
+ continue;
182
+ }
183
+ const ftsPhrase = `"${sanitized}"`;
184
+ if (negated) {
185
+ negative.push(ftsPhrase);
186
+ } else {
187
+ positive.push(ftsPhrase);
188
+ }
189
+ continue;
190
+ }
191
+
192
+ const sanitized = sanitizeFts5Term(token);
193
+ if (!sanitized) {
194
+ continue;
195
+ }
196
+ const ftsTerm = `"${sanitized}"*`;
197
+ if (negated) {
198
+ negative.push(ftsTerm);
199
+ } else {
200
+ positive.push(ftsTerm);
201
+ }
202
+ }
203
+
204
+ if (positive.length === 0 && negative.length === 0) {
205
+ return { ok: false, error: "Search query has no searchable terms" };
206
+ }
207
+
208
+ if (positive.length === 0) {
209
+ return {
210
+ ok: false,
211
+ error:
212
+ "Negation requires at least one positive search term in lexical queries.",
213
+ };
214
+ }
215
+
216
+ let ftsQuery = positive.join(" AND ");
217
+ for (const negation of negative) {
218
+ ftsQuery = `${ftsQuery} NOT ${negation}`;
219
+ }
220
+
221
+ return { ok: true, query: ftsQuery };
73
222
  }
74
223
 
75
224
  // ─────────────────────────────────────────────────────────────────────────────
@@ -1028,10 +1177,14 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
1028
1177
  try {
1029
1178
  const db = this.ensureOpen();
1030
1179
  const limit = options.limit ?? 20;
1180
+ const builtQuery = buildFts5Query(query);
1181
+ if (!builtQuery.ok) {
1182
+ return err("INVALID_INPUT", builtQuery.error);
1183
+ }
1031
1184
 
1032
1185
  // Build tag filter conditions using EXISTS subqueries
1033
1186
  const tagConditions: string[] = [];
1034
- const params: (string | number)[] = [escapeFts5Query(query)];
1187
+ const params: (string | number)[] = [];
1035
1188
 
1036
1189
  // tagsAny: document has at least one of these tags
1037
1190
  if (options.tagsAny && options.tagsAny.length > 0) {
@@ -1075,17 +1228,35 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
1075
1228
  if (options.collection) {
1076
1229
  params.push(options.collection);
1077
1230
  }
1231
+
1232
+ const hasOuterFilters =
1233
+ tagConditions.length > 0 || Boolean(options.collection);
1234
+ const ftsLimit = hasOuterFilters ? limit * 10 : limit;
1078
1235
  params.push(limit);
1079
1236
 
1080
- // Document-level FTS search using documents_fts
1081
- // Uses bm25() for relevance ranking (more negative = better match)
1082
- // Snippet from body column (index 2) with highlight markers
1237
+ // Document-level FTS search using an FTS-first CTE to keep collection and
1238
+ // metadata filters from degrading the query plan into a broad scan.
1083
1239
  const sql = `
1240
+ WITH fts_matches AS (
1241
+ SELECT
1242
+ rowid,
1243
+ ${options.snippet ? "snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet," : ""}
1244
+ bm25(
1245
+ documents_fts,
1246
+ ${FTS5_FIELD_WEIGHTS.filepath},
1247
+ ${FTS5_FIELD_WEIGHTS.title},
1248
+ ${FTS5_FIELD_WEIGHTS.body}
1249
+ ) as score
1250
+ FROM documents_fts
1251
+ WHERE documents_fts MATCH ?
1252
+ ORDER BY score
1253
+ LIMIT ?
1254
+ )
1084
1255
  SELECT
1085
1256
  d.mirror_hash,
1086
1257
  0 as seq,
1087
- bm25(documents_fts) as score,
1088
- ${options.snippet ? "snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet," : ""}
1258
+ fm.score as score,
1259
+ ${options.snippet ? "fm.snippet as snippet," : ""}
1089
1260
  d.docid,
1090
1261
  d.uri,
1091
1262
  d.title,
@@ -1097,12 +1268,12 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
1097
1268
  d.frontmatter_date,
1098
1269
  d.source_size,
1099
1270
  d.source_hash
1100
- FROM documents_fts fts
1101
- JOIN documents d ON d.id = fts.rowid AND d.active = 1
1102
- WHERE documents_fts MATCH ?
1271
+ FROM fts_matches fm
1272
+ JOIN documents d ON d.id = fm.rowid AND d.active = 1
1273
+ WHERE 1 = 1
1103
1274
  ${tagConditions.length > 0 ? `AND ${tagConditions.join(" AND ")}` : ""}
1104
1275
  ${options.collection ? "AND d.collection = ?" : ""}
1105
- ORDER BY bm25(documents_fts)
1276
+ ORDER BY fm.score
1106
1277
  LIMIT ?
1107
1278
  `;
1108
1279
 
@@ -1124,7 +1295,10 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
1124
1295
  source_hash: string | null;
1125
1296
  }
1126
1297
 
1127
- const rows = db.query<FtsRow, (string | number)[]>(sql).all(...params);
1298
+ const queryParams = [builtQuery.query, ftsLimit, ...params];
1299
+ const rows = db
1300
+ .query<FtsRow, (string | number)[]>(sql)
1301
+ .all(...queryParams);
1128
1302
 
1129
1303
  return ok(
1130
1304
  rows.map((r) => ({