@gmickel/gno 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/package.json +6 -3
- package/src/cli/commands/ask.ts +3 -0
- package/src/cli/program.ts +24 -0
- package/src/llm/nodeLlamaCpp/lifecycle.ts +4 -1
- package/src/mcp/tools/index.ts +3 -0
- package/src/mcp/tools/query.ts +2 -0
- package/src/mcp/tools/search.ts +2 -0
- package/src/mcp/tools/vsearch.ts +2 -0
- package/src/pipeline/exclude.ts +69 -0
- package/src/pipeline/hybrid.ts +21 -0
- package/src/pipeline/search.ts +17 -0
- package/src/pipeline/types.ts +6 -0
- package/src/pipeline/vsearch.ts +27 -0
- package/src/serve/public/globals.built.css +1 -1
- package/src/serve/public/lib/retrieval-filters.ts +3 -0
- package/src/serve/public/pages/Ask.tsx +150 -1
- package/src/serve/public/pages/Search.tsx +22 -0
- package/src/serve/routes/api.ts +116 -48
package/README.md
CHANGED
|
@@ -32,7 +32,12 @@ GNO is a local knowledge engine that turns your documents into a searchable, con
|
|
|
32
32
|
|
|
33
33
|
---
|
|
34
34
|
|
|
35
|
-
## What's New in v0.
|
|
35
|
+
## What's New in v0.19
|
|
36
|
+
|
|
37
|
+
- **Exclusion Filters**: explicit `exclude` controls across CLI, API, Web, and MCP to hard-prune unwanted docs by title/path/body text
|
|
38
|
+
- **Ask Query-Mode Parity**: Ask now supports structured `term` / `intent` / `hyde` controls in both API and Web UI
|
|
39
|
+
|
|
40
|
+
### v0.18
|
|
36
41
|
|
|
37
42
|
- **Intent Steering**: optional `intent` control for ambiguous queries across CLI, API, Web, and MCP query flows
|
|
38
43
|
- **Rerank Controls**: `candidateLimit` lets you tune rerank cost vs. recall on slower or memory-constrained machines
|
|
@@ -162,6 +167,7 @@ gno vsearch "error handling patterns" # Semantic similarity
|
|
|
162
167
|
gno query "database optimization" # Full pipeline
|
|
163
168
|
gno query "meeting decisions" --since "last month" --category "meeting,notes" --author "gordon"
|
|
164
169
|
gno query "performance" --intent "web performance and latency"
|
|
170
|
+
gno query "performance" --exclude "reviews,hiring"
|
|
165
171
|
gno ask "what did we decide" --answer # AI synthesis
|
|
166
172
|
```
|
|
167
173
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gmickel/gno",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.20.0",
|
|
4
4
|
"description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"embeddings",
|
|
@@ -102,7 +102,7 @@
|
|
|
102
102
|
"markitdown-ts": "^0.0.9",
|
|
103
103
|
"minimatch": "^10.1.1",
|
|
104
104
|
"nanoid": "^5.1.6",
|
|
105
|
-
"node-llama-cpp": "^3.
|
|
105
|
+
"node-llama-cpp": "^3.17.1",
|
|
106
106
|
"officeparser": "^6.0.4",
|
|
107
107
|
"picocolors": "^1.1.1",
|
|
108
108
|
"react": "^19.2.3",
|
|
@@ -146,5 +146,8 @@
|
|
|
146
146
|
},
|
|
147
147
|
"engines": {
|
|
148
148
|
"bun": ">=1.3.0"
|
|
149
|
-
}
|
|
149
|
+
},
|
|
150
|
+
"trustedDependencies": [
|
|
151
|
+
"node-llama-cpp"
|
|
152
|
+
]
|
|
150
153
|
}
|
package/src/cli/commands/ask.ts
CHANGED
|
@@ -199,6 +199,7 @@ export async function ask(
|
|
|
199
199
|
author: options.author,
|
|
200
200
|
tagsAll: options.tagsAll,
|
|
201
201
|
tagsAny: options.tagsAny,
|
|
202
|
+
exclude: options.exclude,
|
|
202
203
|
noExpand: options.noExpand,
|
|
203
204
|
noRerank: options.noRerank,
|
|
204
205
|
candidateLimit: options.candidateLimit,
|
|
@@ -262,6 +263,8 @@ export async function ask(
|
|
|
262
263
|
vectorsUsed: searchResult.value.meta.vectorsUsed ?? false,
|
|
263
264
|
intent: searchResult.value.meta.intent,
|
|
264
265
|
candidateLimit: searchResult.value.meta.candidateLimit,
|
|
266
|
+
exclude: searchResult.value.meta.exclude,
|
|
267
|
+
queryModes: searchResult.value.meta.queryModes,
|
|
265
268
|
answerGenerated,
|
|
266
269
|
totalResults: results.length,
|
|
267
270
|
answerContext,
|
package/src/cli/program.ts
CHANGED
|
@@ -225,6 +225,10 @@ function wireSearchCommands(program: Command): void {
|
|
|
225
225
|
.option("--category <values>", "require category match (comma-separated)")
|
|
226
226
|
.option("--author <text>", "filter by author (case-insensitive contains)")
|
|
227
227
|
.option("--intent <text>", "disambiguating context for ambiguous queries")
|
|
228
|
+
.option(
|
|
229
|
+
"--exclude <values>",
|
|
230
|
+
"exclude docs containing any term (comma-separated)"
|
|
231
|
+
)
|
|
228
232
|
.option("--tags-all <tags>", "require ALL tags (comma-separated)")
|
|
229
233
|
.option("--tags-any <tags>", "require ANY tag (comma-separated)")
|
|
230
234
|
.option("--full", "include full content")
|
|
@@ -270,6 +274,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
270
274
|
? parsePositiveInt("limit", cmdOpts.limit)
|
|
271
275
|
: getDefaultLimit(format);
|
|
272
276
|
const categories = parseCsvValues(cmdOpts.category);
|
|
277
|
+
const exclude = parseCsvValues(cmdOpts.exclude);
|
|
273
278
|
|
|
274
279
|
const { search, formatSearch } = await import("./commands/search");
|
|
275
280
|
const result = await search(queryText, {
|
|
@@ -282,6 +287,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
282
287
|
categories,
|
|
283
288
|
author: cmdOpts.author as string | undefined,
|
|
284
289
|
intent: cmdOpts.intent as string | undefined,
|
|
290
|
+
exclude,
|
|
285
291
|
tagsAll,
|
|
286
292
|
tagsAny,
|
|
287
293
|
full: Boolean(cmdOpts.full),
|
|
@@ -332,6 +338,10 @@ function wireSearchCommands(program: Command): void {
|
|
|
332
338
|
.option("--category <values>", "require category match (comma-separated)")
|
|
333
339
|
.option("--author <text>", "filter by author (case-insensitive contains)")
|
|
334
340
|
.option("--intent <text>", "disambiguating context for ambiguous queries")
|
|
341
|
+
.option(
|
|
342
|
+
"--exclude <values>",
|
|
343
|
+
"exclude docs containing any term (comma-separated)"
|
|
344
|
+
)
|
|
335
345
|
.option("--tags-all <tags>", "require ALL tags (comma-separated)")
|
|
336
346
|
.option("--tags-any <tags>", "require ANY tag (comma-separated)")
|
|
337
347
|
.option("--full", "include full content")
|
|
@@ -377,6 +387,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
377
387
|
? parsePositiveInt("limit", cmdOpts.limit)
|
|
378
388
|
: getDefaultLimit(format);
|
|
379
389
|
const categories = parseCsvValues(cmdOpts.category);
|
|
390
|
+
const exclude = parseCsvValues(cmdOpts.exclude);
|
|
380
391
|
|
|
381
392
|
const { vsearch, formatVsearch } = await import("./commands/vsearch");
|
|
382
393
|
const result = await vsearch(queryText, {
|
|
@@ -389,6 +400,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
389
400
|
categories,
|
|
390
401
|
author: cmdOpts.author as string | undefined,
|
|
391
402
|
intent: cmdOpts.intent as string | undefined,
|
|
403
|
+
exclude,
|
|
392
404
|
tagsAll,
|
|
393
405
|
tagsAny,
|
|
394
406
|
full: Boolean(cmdOpts.full),
|
|
@@ -434,6 +446,10 @@ function wireSearchCommands(program: Command): void {
|
|
|
434
446
|
.option("--category <values>", "require category match (comma-separated)")
|
|
435
447
|
.option("--author <text>", "filter by author (case-insensitive contains)")
|
|
436
448
|
.option("--intent <text>", "disambiguating context for ambiguous queries")
|
|
449
|
+
.option(
|
|
450
|
+
"--exclude <values>",
|
|
451
|
+
"exclude docs containing any term (comma-separated)"
|
|
452
|
+
)
|
|
437
453
|
.option("--tags-all <tags>", "require ALL tags (comma-separated)")
|
|
438
454
|
.option("--tags-any <tags>", "require ANY tag (comma-separated)")
|
|
439
455
|
.option("--full", "include full content")
|
|
@@ -505,6 +521,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
505
521
|
? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
|
|
506
522
|
: undefined;
|
|
507
523
|
const categories = parseCsvValues(cmdOpts.category);
|
|
524
|
+
const exclude = parseCsvValues(cmdOpts.exclude);
|
|
508
525
|
|
|
509
526
|
// Determine expansion/rerank settings based on flags
|
|
510
527
|
// Priority: --fast > --thorough > --no-expand/--no-rerank > default
|
|
@@ -541,6 +558,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
541
558
|
categories,
|
|
542
559
|
author: cmdOpts.author as string | undefined,
|
|
543
560
|
intent: cmdOpts.intent as string | undefined,
|
|
561
|
+
exclude,
|
|
544
562
|
tagsAll,
|
|
545
563
|
tagsAny,
|
|
546
564
|
full: Boolean(cmdOpts.full),
|
|
@@ -586,6 +604,10 @@ function wireSearchCommands(program: Command): void {
|
|
|
586
604
|
.option("--category <values>", "require category match (comma-separated)")
|
|
587
605
|
.option("--author <text>", "filter by author (case-insensitive contains)")
|
|
588
606
|
.option("--intent <text>", "disambiguating context for ambiguous queries")
|
|
607
|
+
.option(
|
|
608
|
+
"--exclude <values>",
|
|
609
|
+
"exclude docs containing any term (comma-separated)"
|
|
610
|
+
)
|
|
589
611
|
.option("--fast", "skip expansion and reranking (fastest)")
|
|
590
612
|
.option("--thorough", "enable query expansion (slower)")
|
|
591
613
|
.option("-C, --candidate-limit <num>", "max candidates passed to reranking")
|
|
@@ -616,6 +638,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
616
638
|
? parsePositiveInt("max-answer-tokens", cmdOpts.maxAnswerTokens)
|
|
617
639
|
: undefined;
|
|
618
640
|
const categories = parseCsvValues(cmdOpts.category);
|
|
641
|
+
const exclude = parseCsvValues(cmdOpts.exclude);
|
|
619
642
|
|
|
620
643
|
// Determine expansion/rerank settings based on flags
|
|
621
644
|
// Default: skip expansion (balanced mode)
|
|
@@ -641,6 +664,7 @@ function wireSearchCommands(program: Command): void {
|
|
|
641
664
|
categories,
|
|
642
665
|
author: cmdOpts.author as string | undefined,
|
|
643
666
|
intent: cmdOpts.intent as string | undefined,
|
|
667
|
+
exclude,
|
|
644
668
|
noExpand,
|
|
645
669
|
noRerank,
|
|
646
670
|
candidateLimit,
|
|
@@ -49,7 +49,10 @@ export class ModelManager {
|
|
|
49
49
|
if (!this.llama) {
|
|
50
50
|
const { getLlama, LlamaLogLevel } = await import("node-llama-cpp");
|
|
51
51
|
// Suppress model loading warnings (vocab tokens, pooling type)
|
|
52
|
-
this.llama = await getLlama({
|
|
52
|
+
this.llama = await getLlama({
|
|
53
|
+
build: "autoAttempt",
|
|
54
|
+
logLevel: LlamaLogLevel.error,
|
|
55
|
+
});
|
|
53
56
|
}
|
|
54
57
|
return this.llama;
|
|
55
58
|
}
|
package/src/mcp/tools/index.ts
CHANGED
|
@@ -57,6 +57,7 @@ const searchInputSchema = z.object({
|
|
|
57
57
|
minScore: z.number().min(0).max(1).optional(),
|
|
58
58
|
lang: z.string().optional(),
|
|
59
59
|
intent: z.string().optional(),
|
|
60
|
+
exclude: z.array(z.string()).optional(),
|
|
60
61
|
since: z.string().optional(),
|
|
61
62
|
until: z.string().optional(),
|
|
62
63
|
categories: z.array(z.string()).optional(),
|
|
@@ -107,6 +108,7 @@ const vsearchInputSchema = z.object({
|
|
|
107
108
|
minScore: z.number().min(0).max(1).optional(),
|
|
108
109
|
lang: z.string().optional(),
|
|
109
110
|
intent: z.string().optional(),
|
|
111
|
+
exclude: z.array(z.string()).optional(),
|
|
110
112
|
since: z.string().optional(),
|
|
111
113
|
until: z.string().optional(),
|
|
112
114
|
categories: z.array(z.string()).optional(),
|
|
@@ -128,6 +130,7 @@ export const queryInputSchema = z.object({
|
|
|
128
130
|
lang: z.string().optional(),
|
|
129
131
|
intent: z.string().optional(),
|
|
130
132
|
candidateLimit: z.number().int().min(1).max(100).optional(),
|
|
133
|
+
exclude: z.array(z.string()).optional(),
|
|
131
134
|
since: z.string().optional(),
|
|
132
135
|
until: z.string().optional(),
|
|
133
136
|
categories: z.array(z.string()).optional(),
|
package/src/mcp/tools/query.ts
CHANGED
|
@@ -38,6 +38,7 @@ interface QueryInput {
|
|
|
38
38
|
lang?: string;
|
|
39
39
|
intent?: string;
|
|
40
40
|
candidateLimit?: number;
|
|
41
|
+
exclude?: string[];
|
|
41
42
|
since?: string;
|
|
42
43
|
until?: string;
|
|
43
44
|
categories?: string[];
|
|
@@ -251,6 +252,7 @@ export function handleQuery(
|
|
|
251
252
|
queryLanguageHint: args.lang, // Affects expansion prompt, not retrieval
|
|
252
253
|
intent: args.intent,
|
|
253
254
|
candidateLimit: args.candidateLimit,
|
|
255
|
+
exclude: args.exclude,
|
|
254
256
|
since: args.since,
|
|
255
257
|
until: args.until,
|
|
256
258
|
categories: args.categories,
|
package/src/mcp/tools/search.ts
CHANGED
|
@@ -20,6 +20,7 @@ interface SearchInput {
|
|
|
20
20
|
minScore?: number;
|
|
21
21
|
lang?: string;
|
|
22
22
|
intent?: string;
|
|
23
|
+
exclude?: string[];
|
|
23
24
|
since?: string;
|
|
24
25
|
until?: string;
|
|
25
26
|
categories?: string[];
|
|
@@ -110,6 +111,7 @@ export function handleSearch(
|
|
|
110
111
|
collection: args.collection,
|
|
111
112
|
lang: args.lang,
|
|
112
113
|
intent: args.intent,
|
|
114
|
+
exclude: args.exclude,
|
|
113
115
|
since: args.since,
|
|
114
116
|
until: args.until,
|
|
115
117
|
categories: args.categories,
|
package/src/mcp/tools/vsearch.ts
CHANGED
|
@@ -29,6 +29,7 @@ interface VsearchInput {
|
|
|
29
29
|
minScore?: number;
|
|
30
30
|
lang?: string;
|
|
31
31
|
intent?: string;
|
|
32
|
+
exclude?: string[];
|
|
32
33
|
since?: string;
|
|
33
34
|
until?: string;
|
|
34
35
|
categories?: string[];
|
|
@@ -194,6 +195,7 @@ export function handleVsearch(
|
|
|
194
195
|
minScore: args.minScore,
|
|
195
196
|
collection: args.collection,
|
|
196
197
|
intent: args.intent,
|
|
198
|
+
exclude: args.exclude,
|
|
197
199
|
since: args.since,
|
|
198
200
|
until: args.until,
|
|
199
201
|
categories: args.categories,
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Explicit exclusion helpers for retrieval filters.
|
|
3
|
+
*
|
|
4
|
+
* @module src/pipeline/exclude
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { ChunkRow } from "../store/types";
|
|
8
|
+
|
|
9
|
+
export function normalizeExcludeTerms(values: string[]): string[] {
|
|
10
|
+
const out: string[] = [];
|
|
11
|
+
const seen = new Set<string>();
|
|
12
|
+
|
|
13
|
+
for (const value of values) {
|
|
14
|
+
for (const part of value.split(",")) {
|
|
15
|
+
const trimmed = part.trim();
|
|
16
|
+
if (!trimmed) {
|
|
17
|
+
continue;
|
|
18
|
+
}
|
|
19
|
+
const key = trimmed.toLowerCase();
|
|
20
|
+
if (seen.has(key)) {
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
seen.add(key);
|
|
24
|
+
out.push(trimmed);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return out;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function includesTerm(haystack: string, term: string): boolean {
|
|
32
|
+
return haystack.toLowerCase().includes(term.toLowerCase());
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function matchesExcludedText(
|
|
36
|
+
haystacks: string[],
|
|
37
|
+
excludeTerms: string[] | undefined
|
|
38
|
+
): boolean {
|
|
39
|
+
if (!excludeTerms?.length) {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
for (const haystack of haystacks) {
|
|
44
|
+
if (!haystack) {
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
for (const term of excludeTerms) {
|
|
48
|
+
if (includesTerm(haystack, term)) {
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function matchesExcludedChunks(
|
|
58
|
+
chunks: ChunkRow[],
|
|
59
|
+
excludeTerms: string[] | undefined
|
|
60
|
+
): boolean {
|
|
61
|
+
if (!excludeTerms?.length || chunks.length === 0) {
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return matchesExcludedText(
|
|
66
|
+
chunks.map((chunk) => chunk.text),
|
|
67
|
+
excludeTerms
|
|
68
|
+
);
|
|
69
|
+
}
|
package/src/pipeline/hybrid.ts
CHANGED
|
@@ -21,6 +21,7 @@ import type {
|
|
|
21
21
|
import { err, ok } from "../store/types";
|
|
22
22
|
import { createChunkLookup } from "./chunk-lookup";
|
|
23
23
|
import { formatQueryForEmbedding } from "./contextual";
|
|
24
|
+
import { matchesExcludedChunks, matchesExcludedText } from "./exclude";
|
|
24
25
|
import { expandQuery } from "./expansion";
|
|
25
26
|
import {
|
|
26
27
|
buildExplainResults,
|
|
@@ -670,6 +671,25 @@ export async function searchHybrid(
|
|
|
670
671
|
continue;
|
|
671
672
|
}
|
|
672
673
|
|
|
674
|
+
const excluded =
|
|
675
|
+
matchesExcludedText(
|
|
676
|
+
[
|
|
677
|
+
doc.title ?? "",
|
|
678
|
+
doc.relPath,
|
|
679
|
+
doc.author ?? "",
|
|
680
|
+
doc.contentType ?? "",
|
|
681
|
+
...(doc.categories ?? []),
|
|
682
|
+
],
|
|
683
|
+
options.exclude
|
|
684
|
+
) ||
|
|
685
|
+
matchesExcludedChunks(
|
|
686
|
+
chunksMap.get(candidate.mirrorHash) ?? [],
|
|
687
|
+
options.exclude
|
|
688
|
+
);
|
|
689
|
+
if (excluded) {
|
|
690
|
+
continue;
|
|
691
|
+
}
|
|
692
|
+
|
|
673
693
|
// For --full mode, de-dupe by docid (keep best scoring candidate per doc)
|
|
674
694
|
if (options.full && seenDocids.has(doc.docid)) {
|
|
675
695
|
continue;
|
|
@@ -810,6 +830,7 @@ export async function searchHybrid(
|
|
|
810
830
|
vectorsUsed: vectorAvailable,
|
|
811
831
|
totalResults: finalResults.length,
|
|
812
832
|
intent: options.intent,
|
|
833
|
+
exclude: options.exclude,
|
|
813
834
|
collection: options.collection,
|
|
814
835
|
lang: options.lang,
|
|
815
836
|
since: temporalRange.since,
|
package/src/pipeline/search.ts
CHANGED
|
@@ -17,6 +17,7 @@ import type {
|
|
|
17
17
|
|
|
18
18
|
import { err, ok } from "../store/types";
|
|
19
19
|
import { createChunkLookup } from "./chunk-lookup";
|
|
20
|
+
import { matchesExcludedChunks, matchesExcludedText } from "./exclude";
|
|
20
21
|
import { selectBestChunkForSteering } from "./intent";
|
|
21
22
|
import { detectQueryLanguage } from "./query-language";
|
|
22
23
|
import {
|
|
@@ -237,6 +238,21 @@ export async function searchBm25(
|
|
|
237
238
|
) ?? rawChunk)
|
|
238
239
|
: rawChunk;
|
|
239
240
|
|
|
241
|
+
const excluded =
|
|
242
|
+
matchesExcludedText(
|
|
243
|
+
[fts.title ?? "", fts.relPath ?? "", fts.snippet ?? ""],
|
|
244
|
+
options.exclude
|
|
245
|
+
) ||
|
|
246
|
+
matchesExcludedChunks(
|
|
247
|
+
chunksMapResult.ok && fts.mirrorHash
|
|
248
|
+
? (chunksMapResult.value.get(fts.mirrorHash) ?? [])
|
|
249
|
+
: [],
|
|
250
|
+
options.exclude
|
|
251
|
+
);
|
|
252
|
+
if (excluded) {
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
|
|
240
256
|
// For --full, de-dupe by docid (keep best scoring chunk per doc)
|
|
241
257
|
// Raw BM25: smaller (more negative) is better
|
|
242
258
|
if (options.full) {
|
|
@@ -309,6 +325,7 @@ export async function searchBm25(
|
|
|
309
325
|
mode: "bm25",
|
|
310
326
|
totalResults: Math.min(filteredResults.length, limit),
|
|
311
327
|
intent: options.intent,
|
|
328
|
+
exclude: options.exclude,
|
|
312
329
|
collection: options.collection,
|
|
313
330
|
lang: options.lang,
|
|
314
331
|
since: temporalRange.since,
|
package/src/pipeline/types.ts
CHANGED
|
@@ -79,6 +79,8 @@ export interface SearchMeta {
|
|
|
79
79
|
author?: string;
|
|
80
80
|
/** Rerank candidate limit used */
|
|
81
81
|
candidateLimit?: number;
|
|
82
|
+
/** Explicit exclusion terms applied */
|
|
83
|
+
exclude?: string[];
|
|
82
84
|
/** Explain data (when --explain is used) */
|
|
83
85
|
explain?: {
|
|
84
86
|
lines: ExplainLine[];
|
|
@@ -124,6 +126,8 @@ export interface SearchOptions {
|
|
|
124
126
|
author?: string;
|
|
125
127
|
/** Optional disambiguating context that steers scoring/snippets, but is not searched directly */
|
|
126
128
|
intent?: string;
|
|
129
|
+
/** Explicit exclusion terms for hard candidate pruning */
|
|
130
|
+
exclude?: string[];
|
|
127
131
|
}
|
|
128
132
|
|
|
129
133
|
/** Structured query mode identifier */
|
|
@@ -317,6 +321,8 @@ export interface AskMeta {
|
|
|
317
321
|
vectorsUsed: boolean;
|
|
318
322
|
intent?: string;
|
|
319
323
|
candidateLimit?: number;
|
|
324
|
+
exclude?: string[];
|
|
325
|
+
queryModes?: QueryModeSummary;
|
|
320
326
|
answerGenerated?: boolean;
|
|
321
327
|
totalResults?: number;
|
|
322
328
|
answerContext?: AnswerContextExplain;
|
package/src/pipeline/vsearch.ts
CHANGED
|
@@ -14,6 +14,7 @@ import type { SearchOptions, SearchResult, SearchResults } from "./types";
|
|
|
14
14
|
import { err, ok } from "../store/types";
|
|
15
15
|
import { createChunkLookup } from "./chunk-lookup";
|
|
16
16
|
import { formatQueryForEmbedding } from "./contextual";
|
|
17
|
+
import { matchesExcludedChunks, matchesExcludedText } from "./exclude";
|
|
17
18
|
import { selectBestChunkForSteering } from "./intent";
|
|
18
19
|
import { detectQueryLanguage } from "./query-language";
|
|
19
20
|
import {
|
|
@@ -174,6 +175,25 @@ export async function searchVectorWithEmbedding(
|
|
|
174
175
|
continue;
|
|
175
176
|
}
|
|
176
177
|
|
|
178
|
+
const excluded =
|
|
179
|
+
matchesExcludedText(
|
|
180
|
+
[
|
|
181
|
+
doc.title ?? "",
|
|
182
|
+
doc.relPath,
|
|
183
|
+
doc.author ?? "",
|
|
184
|
+
doc.contentType ?? "",
|
|
185
|
+
...(doc.categories ?? []),
|
|
186
|
+
],
|
|
187
|
+
options.exclude
|
|
188
|
+
) ||
|
|
189
|
+
matchesExcludedChunks(
|
|
190
|
+
chunksMap.get(vec.mirrorHash) ?? [],
|
|
191
|
+
options.exclude
|
|
192
|
+
);
|
|
193
|
+
if (excluded) {
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
|
|
177
197
|
// For --full, de-dupe by docid (keep best scoring chunk per doc)
|
|
178
198
|
if (options.full) {
|
|
179
199
|
const existing = bestByDocid.get(doc.docid);
|
|
@@ -301,6 +321,7 @@ export async function searchVectorWithEmbedding(
|
|
|
301
321
|
vectorsUsed: true,
|
|
302
322
|
totalResults: finalResults.length,
|
|
303
323
|
intent: options.intent,
|
|
324
|
+
exclude: options.exclude,
|
|
304
325
|
collection: options.collection,
|
|
305
326
|
lang: options.lang,
|
|
306
327
|
since: temporalRange.since,
|
|
@@ -362,6 +383,9 @@ interface DocumentInfo {
|
|
|
362
383
|
title: string | null;
|
|
363
384
|
collection: string;
|
|
364
385
|
relPath: string;
|
|
386
|
+
author: string | null;
|
|
387
|
+
contentType: string | null;
|
|
388
|
+
categories: string[] | null;
|
|
365
389
|
sourceHash: string;
|
|
366
390
|
sourceMime: string;
|
|
367
391
|
sourceExt: string;
|
|
@@ -491,6 +515,9 @@ async function buildDocumentMap(
|
|
|
491
515
|
title: doc.title,
|
|
492
516
|
collection: doc.collection,
|
|
493
517
|
relPath: doc.relPath,
|
|
518
|
+
author: doc.author ?? null,
|
|
519
|
+
contentType: doc.contentType ?? null,
|
|
520
|
+
categories: doc.categories ?? null,
|
|
494
521
|
sourceHash: doc.sourceHash,
|
|
495
522
|
sourceMime: doc.sourceMime,
|
|
496
523
|
sourceExt: doc.sourceExt,
|