@gmickel/gno 0.36.0 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +192 -9
- package/package.json +8 -1
- package/src/cli/commands/ask.ts +25 -7
- package/src/cli/commands/doctor.ts +17 -0
- package/src/cli/commands/embed.ts +2 -3
- package/src/cli/commands/query.ts +21 -6
- package/src/cli/commands/search.ts +3 -0
- package/src/cli/commands/vsearch.ts +10 -3
- package/src/cli/format/search-results.ts +58 -1
- package/src/cli/program.ts +38 -0
- package/src/config/types.ts +14 -0
- package/src/converters/mime.ts +9 -0
- package/src/ingestion/chunker.ts +186 -5
- package/src/ingestion/sync.ts +2 -1
- package/src/ingestion/types.ts +2 -1
- package/src/llm/registry.ts +22 -2
- package/src/mcp/tools/query.ts +17 -8
- package/src/mcp/tools/vsearch.ts +7 -3
- package/src/sdk/client.ts +34 -6
- package/src/sdk/embed.ts +7 -3
- package/src/sdk/types.ts +1 -0
- package/src/store/sqlite/adapter.ts +199 -25
|
@@ -53,23 +53,172 @@ import { loadFts5Snowball } from "./fts5-snowball";
|
|
|
53
53
|
|
|
54
54
|
/** Whitespace regex for splitting FTS5 tokens */
|
|
55
55
|
const WHITESPACE_REGEX = /\s+/;
|
|
56
|
+
const SINGLE_LINE_QUERY_PATTERN = /[\r\n]/;
|
|
57
|
+
const DOUBLE_QUOTE_PATTERN = /"/g;
|
|
58
|
+
const FTS5_FIELD_WEIGHTS = {
|
|
59
|
+
filepath: 1.5,
|
|
60
|
+
title: 4.0,
|
|
61
|
+
body: 1.0,
|
|
62
|
+
} as const;
|
|
63
|
+
|
|
64
|
+
function sanitizeFts5Term(term: string): string {
|
|
65
|
+
return term.replace(/[^\p{L}\p{N}'_]/gu, "").toLowerCase();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function isCompoundToken(token: string): boolean {
|
|
69
|
+
return /^[\p{L}\p{N}][\p{L}\p{N}'+-]*[-+][\p{L}\p{N}][\p{L}\p{N}'+-]*$/u.test(
|
|
70
|
+
token
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function sanitizeCompoundTerm(term: string): string {
|
|
75
|
+
return term
|
|
76
|
+
.split(/[-+]/)
|
|
77
|
+
.map((part) => sanitizeFts5Term(part))
|
|
78
|
+
.filter((part) => part.length > 0)
|
|
79
|
+
.join(" ");
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
type FtsQueryBuildResult =
|
|
83
|
+
| { ok: true; query: string }
|
|
84
|
+
| { ok: false; error: string };
|
|
56
85
|
|
|
57
86
|
/**
|
|
58
|
-
*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
87
|
+
* Narrow lexical grammar for BM25/FTS queries.
|
|
88
|
+
*
|
|
89
|
+
* Supported:
|
|
90
|
+
* - plain terms -> prefix match
|
|
91
|
+
* - quoted phrases -> phrase match
|
|
92
|
+
* - negation with at least one positive term
|
|
93
|
+
* - hyphenated compounds handled intentionally
|
|
61
94
|
*/
|
|
62
|
-
function
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
95
|
+
function buildFts5Query(query: string): FtsQueryBuildResult {
|
|
96
|
+
const trimmed = query.trim();
|
|
97
|
+
if (!trimmed) {
|
|
98
|
+
return { ok: false, error: "Search query cannot be empty" };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (SINGLE_LINE_QUERY_PATTERN.test(trimmed)) {
|
|
102
|
+
return {
|
|
103
|
+
ok: false,
|
|
104
|
+
error: "Lexical query must be a single line. Remove newline characters.",
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const quoteCount = (trimmed.match(DOUBLE_QUOTE_PATTERN) ?? []).length;
|
|
109
|
+
if (quoteCount % 2 === 1) {
|
|
110
|
+
return {
|
|
111
|
+
ok: false,
|
|
112
|
+
error:
|
|
113
|
+
'Lexical query has an unmatched double quote ("). Add the closing quote or remove it.',
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const positive: string[] = [];
|
|
118
|
+
const negative: string[] = [];
|
|
119
|
+
let i = 0;
|
|
120
|
+
|
|
121
|
+
while (i < trimmed.length) {
|
|
122
|
+
while (i < trimmed.length && /\s/.test(trimmed[i]!)) {
|
|
123
|
+
i += 1;
|
|
124
|
+
}
|
|
125
|
+
if (i >= trimmed.length) {
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const negated = trimmed[i] === "-";
|
|
130
|
+
if (negated) {
|
|
131
|
+
i += 1;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (i < trimmed.length && trimmed[i] === '"') {
|
|
135
|
+
const start = i + 1;
|
|
136
|
+
i += 1;
|
|
137
|
+
while (i < trimmed.length && trimmed[i] !== '"') {
|
|
138
|
+
i += 1;
|
|
139
|
+
}
|
|
140
|
+
const phrase = trimmed.slice(start, i).trim();
|
|
141
|
+
i += 1;
|
|
142
|
+
|
|
143
|
+
if (!phrase) {
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const sanitized = phrase
|
|
148
|
+
.split(WHITESPACE_REGEX)
|
|
149
|
+
.map((token) =>
|
|
150
|
+
isCompoundToken(token)
|
|
151
|
+
? sanitizeCompoundTerm(token)
|
|
152
|
+
: sanitizeFts5Term(token)
|
|
153
|
+
)
|
|
154
|
+
.filter((token) => token.length > 0)
|
|
155
|
+
.join(" ");
|
|
156
|
+
if (!sanitized) {
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const ftsPhrase = `"${sanitized}"`;
|
|
161
|
+
if (negated) {
|
|
162
|
+
negative.push(ftsPhrase);
|
|
163
|
+
} else {
|
|
164
|
+
positive.push(ftsPhrase);
|
|
165
|
+
}
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const start = i;
|
|
170
|
+
while (i < trimmed.length && !/[\s"]/.test(trimmed[i]!)) {
|
|
171
|
+
i += 1;
|
|
172
|
+
}
|
|
173
|
+
const token = trimmed.slice(start, i);
|
|
174
|
+
if (!token) {
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (isCompoundToken(token)) {
|
|
179
|
+
const sanitized = sanitizeCompoundTerm(token);
|
|
180
|
+
if (!sanitized) {
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
const ftsPhrase = `"${sanitized}"`;
|
|
184
|
+
if (negated) {
|
|
185
|
+
negative.push(ftsPhrase);
|
|
186
|
+
} else {
|
|
187
|
+
positive.push(ftsPhrase);
|
|
188
|
+
}
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const sanitized = sanitizeFts5Term(token);
|
|
193
|
+
if (!sanitized) {
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
const ftsTerm = `"${sanitized}"*`;
|
|
197
|
+
if (negated) {
|
|
198
|
+
negative.push(ftsTerm);
|
|
199
|
+
} else {
|
|
200
|
+
positive.push(ftsTerm);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (positive.length === 0 && negative.length === 0) {
|
|
205
|
+
return { ok: false, error: "Search query has no searchable terms" };
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (positive.length === 0) {
|
|
209
|
+
return {
|
|
210
|
+
ok: false,
|
|
211
|
+
error:
|
|
212
|
+
"Negation requires at least one positive search term in lexical queries.",
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
let ftsQuery = positive.join(" AND ");
|
|
217
|
+
for (const negation of negative) {
|
|
218
|
+
ftsQuery = `${ftsQuery} NOT ${negation}`;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return { ok: true, query: ftsQuery };
|
|
73
222
|
}
|
|
74
223
|
|
|
75
224
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -1028,10 +1177,14 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
1028
1177
|
try {
|
|
1029
1178
|
const db = this.ensureOpen();
|
|
1030
1179
|
const limit = options.limit ?? 20;
|
|
1180
|
+
const builtQuery = buildFts5Query(query);
|
|
1181
|
+
if (!builtQuery.ok) {
|
|
1182
|
+
return err("INVALID_INPUT", builtQuery.error);
|
|
1183
|
+
}
|
|
1031
1184
|
|
|
1032
1185
|
// Build tag filter conditions using EXISTS subqueries
|
|
1033
1186
|
const tagConditions: string[] = [];
|
|
1034
|
-
const params: (string | number)[] = [
|
|
1187
|
+
const params: (string | number)[] = [];
|
|
1035
1188
|
|
|
1036
1189
|
// tagsAny: document has at least one of these tags
|
|
1037
1190
|
if (options.tagsAny && options.tagsAny.length > 0) {
|
|
@@ -1075,17 +1228,35 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
1075
1228
|
if (options.collection) {
|
|
1076
1229
|
params.push(options.collection);
|
|
1077
1230
|
}
|
|
1231
|
+
|
|
1232
|
+
const hasOuterFilters =
|
|
1233
|
+
tagConditions.length > 0 || Boolean(options.collection);
|
|
1234
|
+
const ftsLimit = hasOuterFilters ? limit * 10 : limit;
|
|
1078
1235
|
params.push(limit);
|
|
1079
1236
|
|
|
1080
|
-
// Document-level FTS search using
|
|
1081
|
-
//
|
|
1082
|
-
// Snippet from body column (index 2) with highlight markers
|
|
1237
|
+
// Document-level FTS search using an FTS-first CTE to keep collection and
|
|
1238
|
+
// metadata filters from degrading the query plan into a broad scan.
|
|
1083
1239
|
const sql = `
|
|
1240
|
+
WITH fts_matches AS (
|
|
1241
|
+
SELECT
|
|
1242
|
+
rowid,
|
|
1243
|
+
${options.snippet ? "snippet(documents_fts, 2, '<mark>', '</mark>', '...', 32) as snippet," : ""}
|
|
1244
|
+
bm25(
|
|
1245
|
+
documents_fts,
|
|
1246
|
+
${FTS5_FIELD_WEIGHTS.filepath},
|
|
1247
|
+
${FTS5_FIELD_WEIGHTS.title},
|
|
1248
|
+
${FTS5_FIELD_WEIGHTS.body}
|
|
1249
|
+
) as score
|
|
1250
|
+
FROM documents_fts
|
|
1251
|
+
WHERE documents_fts MATCH ?
|
|
1252
|
+
ORDER BY score
|
|
1253
|
+
LIMIT ?
|
|
1254
|
+
)
|
|
1084
1255
|
SELECT
|
|
1085
1256
|
d.mirror_hash,
|
|
1086
1257
|
0 as seq,
|
|
1087
|
-
|
|
1088
|
-
${options.snippet ? "snippet
|
|
1258
|
+
fm.score as score,
|
|
1259
|
+
${options.snippet ? "fm.snippet as snippet," : ""}
|
|
1089
1260
|
d.docid,
|
|
1090
1261
|
d.uri,
|
|
1091
1262
|
d.title,
|
|
@@ -1097,12 +1268,12 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
1097
1268
|
d.frontmatter_date,
|
|
1098
1269
|
d.source_size,
|
|
1099
1270
|
d.source_hash
|
|
1100
|
-
FROM
|
|
1101
|
-
JOIN documents d ON d.id =
|
|
1102
|
-
WHERE
|
|
1271
|
+
FROM fts_matches fm
|
|
1272
|
+
JOIN documents d ON d.id = fm.rowid AND d.active = 1
|
|
1273
|
+
WHERE 1 = 1
|
|
1103
1274
|
${tagConditions.length > 0 ? `AND ${tagConditions.join(" AND ")}` : ""}
|
|
1104
1275
|
${options.collection ? "AND d.collection = ?" : ""}
|
|
1105
|
-
ORDER BY
|
|
1276
|
+
ORDER BY fm.score
|
|
1106
1277
|
LIMIT ?
|
|
1107
1278
|
`;
|
|
1108
1279
|
|
|
@@ -1124,7 +1295,10 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
1124
1295
|
source_hash: string | null;
|
|
1125
1296
|
}
|
|
1126
1297
|
|
|
1127
|
-
const
|
|
1298
|
+
const queryParams = [builtQuery.query, ftsLimit, ...params];
|
|
1299
|
+
const rows = db
|
|
1300
|
+
.query<FtsRow, (string | number)[]>(sql)
|
|
1301
|
+
.all(...queryParams);
|
|
1128
1302
|
|
|
1129
1303
|
return ok(
|
|
1130
1304
|
rows.map((r) => ({
|