@oomkapwn/enquire-mcp 2.8.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +102 -0
- package/README.md +177 -195
- package/dist/embeddings.d.ts +34 -0
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/embeddings.js +79 -0
- package/dist/embeddings.js.map +1 -1
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +55 -6
- package/dist/index.js.map +1 -1
- package/dist/ocr.d.ts +66 -0
- package/dist/ocr.d.ts.map +1 -0
- package/dist/ocr.js +199 -0
- package/dist/ocr.js.map +1 -0
- package/dist/tools.d.ts +78 -1
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +110 -1
- package/dist/tools.js.map +1 -1
- package/docs/api.md +1 -1
- package/package.json +4 -2
package/dist/embeddings.d.ts
CHANGED
|
@@ -35,4 +35,38 @@ export interface Embedder {
|
|
|
35
35
|
export declare function loadEmbedder(alias?: string): Promise<Embedder>;
|
|
36
36
|
/** Cosine similarity between two L2-normalized vectors (= dot product). */
|
|
37
37
|
export declare function cosineSim(a: Float32Array, b: Float32Array): number;
|
|
38
|
+
/** BGE reranker model catalog — analogous to `EMBEDDING_MODELS`. */
|
|
39
|
+
export interface RerankerModel {
|
|
40
|
+
alias: string;
|
|
41
|
+
hfId: string;
|
|
42
|
+
approxSizeMB: number;
|
|
43
|
+
multilingual: boolean;
|
|
44
|
+
/** Max combined (query + passage) tokens — BGE base is 512. */
|
|
45
|
+
maxTokens: number;
|
|
46
|
+
}
|
|
47
|
+
export declare const RERANKER_MODELS: Readonly<Record<string, RerankerModel>>;
|
|
48
|
+
export declare const DEFAULT_RERANKER_ALIAS = "rerank-multilingual";
|
|
49
|
+
export declare function resolveRerankerModel(alias: string | undefined): RerankerModel;
|
|
50
|
+
/** Opaque handle for a loaded reranker. Constructed via `loadReranker()`. */
|
|
51
|
+
export interface Reranker {
|
|
52
|
+
readonly model: RerankerModel;
|
|
53
|
+
/**
|
|
54
|
+
* Score (query, passage) pairs. Higher = more relevant. BGE rerankers
|
|
55
|
+
* return logits in roughly [-10, +10]; we apply sigmoid to get [0, 1] for
|
|
56
|
+
* comparable scoring across models. Truncation of overly-long passages
|
|
57
|
+
* is the model's responsibility (it'll silently chop at maxTokens).
|
|
58
|
+
*
|
|
59
|
+
* Returns one score per passage in input order.
|
|
60
|
+
*/
|
|
61
|
+
score(query: string, passages: readonly string[]): Promise<number[]>;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Load a BGE-style cross-encoder reranker. Lazy-imports
|
|
65
|
+
* `@huggingface/transformers` on first call (same lazy-load pattern as
|
|
66
|
+
* `loadEmbedder`). Cold-start downloads the model from HuggingFace
|
|
67
|
+
* (~25-110 MB depending on alias) into `~/.cache/huggingface/`.
|
|
68
|
+
*
|
|
69
|
+
* @param alias - Reranker alias from RERANKER_MODELS (default: "rerank-multilingual").
|
|
70
|
+
*/
|
|
71
|
+
export declare function loadReranker(alias?: string): Promise<Reranker>;
|
|
38
72
|
//# sourceMappingURL=embeddings.d.ts.map
|
package/dist/embeddings.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../src/embeddings.ts"],"names":[],"mappings":"AAcA;;mCAEmC;AACnC,MAAM,WAAW,cAAc;IAC7B,iEAAiE;IACjE,KAAK,EAAE,MAAM,CAAC;IACd,uDAAuD;IACvD,IAAI,EAAE,MAAM,CAAC;IACb,4DAA4D;IAC5D,GAAG,EAAE,MAAM,CAAC;IACZ,8EAA8E;IAC9E,YAAY,EAAE,MAAM,CAAC;IACrB,gEAAgE;IAChE,YAAY,EAAE,OAAO,CAAC;IACtB,6DAA6D;IAC7D,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,eAAO,MAAM,gBAAgB,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAiBpE,CAAC;AAEH,0EAA0E;AAC1E,eAAO,MAAM,mBAAmB,iBAAiB,CAAC;AAElD,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,cAAc,CAQtE;AAED,6EAA6E;AAC7E,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,KAAK,EAAE,cAAc,CAAC;IAC/B;wDACoD;IACpD,KAAK,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;CAC1D;AA0BD;;;;;GAKG;AACH,wBAAsB,YAAY,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAyCpE;AAED,2EAA2E;AAC3E,wBAAgB,SAAS,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CASlE"}
|
|
1
|
+
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../src/embeddings.ts"],"names":[],"mappings":"AAcA;;mCAEmC;AACnC,MAAM,WAAW,cAAc;IAC7B,iEAAiE;IACjE,KAAK,EAAE,MAAM,CAAC;IACd,uDAAuD;IACvD,IAAI,EAAE,MAAM,CAAC;IACb,4DAA4D;IAC5D,GAAG,EAAE,MAAM,CAAC;IACZ,8EAA8E;IAC9E,YAAY,EAAE,MAAM,CAAC;IACrB,gEAAgE;IAChE,YAAY,EAAE,OAAO,CAAC;IACtB,6DAA6D;IAC7D,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,eAAO,MAAM,gBAAgB,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAiBpE,CAAC;AAEH,0EAA0E;AAC1E,eAAO,MAAM,mBAAmB,iBAAiB,CAAC;AAElD,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,cAAc,CAQtE;AAED,6EAA6E;AAC7E,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,KAAK,EAAE,cAAc,CAAC;IAC/B;wDACoD;IACpD,KAAK,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;CAC1D;AA0BD;;;;;GAKG;AACH,wBAAsB,YAAY,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAyCpE;AAED,2EAA2E;AAC3E,wBAAgB,SAAS,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CASlE;AAuBD,oEAAoE;AACpE,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,OAAO,CAAC;IACtB,+DAA+D;IAC/D,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,eAAO,MAAM,eAAe,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAoBlE,CAAC;AAEH,eAAO,MAAM,sBAAsB,wBAAwB,CAAC;AAE5D,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAQ7E;AAED,6EAA6E;AAC7E,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,KAAK,EAAE,aAAa,CAAC;IAC9B;;;;;;;OAOG;IACH,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;CACtE;AAED;;;;;;;GAOG;AACH,wBAAsB,YAAY,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAyCpE"}
|
package/dist/embeddings.js
CHANGED
|
@@ -114,4 +114,83 @@ export function cosineSim(a, b) {
|
|
|
114
114
|
}
|
|
115
115
|
return s;
|
|
116
116
|
}
|
|
117
|
+
export const RERANKER_MODELS = Object.freeze({
|
|
118
|
+
// BGE-reranker-base — English, ~110 MB. Latency ~30-50ms per pair on M1 CPU.
|
|
119
|
+
"rerank-bge": {
|
|
120
|
+
alias: "rerank-bge",
|
|
121
|
+
hfId: "Xenova/bge-reranker-base",
|
|
122
|
+
approxSizeMB: 110,
|
|
123
|
+
multilingual: false,
|
|
124
|
+
maxTokens: 512
|
|
125
|
+
},
|
|
126
|
+
// mxbai-rerank-xsmall-v1 — multilingual, ~25 MB, much faster than BGE-base.
|
|
127
|
+
// Better default for users on slower hardware or larger candidate sets.
|
|
128
|
+
// Cited in MTEB leaderboard as comparable to BGE-base on English while
|
|
129
|
+
// staying multilingual.
|
|
130
|
+
"rerank-multilingual": {
|
|
131
|
+
alias: "rerank-multilingual",
|
|
132
|
+
hfId: "Xenova/mxbai-rerank-xsmall-v1",
|
|
133
|
+
approxSizeMB: 25,
|
|
134
|
+
multilingual: true,
|
|
135
|
+
maxTokens: 512
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
export const DEFAULT_RERANKER_ALIAS = "rerank-multilingual";
|
|
139
|
+
export function resolveRerankerModel(alias) {
|
|
140
|
+
const key = alias ?? DEFAULT_RERANKER_ALIAS;
|
|
141
|
+
const model = RERANKER_MODELS[key];
|
|
142
|
+
if (!model) {
|
|
143
|
+
const known = Object.keys(RERANKER_MODELS).join(", ");
|
|
144
|
+
throw new Error(`Unknown reranker model alias '${key}'. Known aliases: ${known}.`);
|
|
145
|
+
}
|
|
146
|
+
return model;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Load a BGE-style cross-encoder reranker. Lazy-imports
|
|
150
|
+
* `@huggingface/transformers` on first call (same lazy-load pattern as
|
|
151
|
+
* `loadEmbedder`). Cold-start downloads the model from HuggingFace
|
|
152
|
+
* (~25-110 MB depending on alias) into `~/.cache/huggingface/`.
|
|
153
|
+
*
|
|
154
|
+
* @param alias - Reranker alias from RERANKER_MODELS (default: "rerank-multilingual").
|
|
155
|
+
*/
|
|
156
|
+
export async function loadReranker(alias) {
|
|
157
|
+
const model = resolveRerankerModel(alias);
|
|
158
|
+
const pipeline = await loadPipeline();
|
|
159
|
+
const classifier = (await pipeline("text-classification", model.hfId));
|
|
160
|
+
return {
|
|
161
|
+
model,
|
|
162
|
+
async score(query, passages) {
|
|
163
|
+
if (passages.length === 0)
|
|
164
|
+
return [];
|
|
165
|
+
// Build the (query, passage) pair inputs. transformers.js
|
|
166
|
+
// text-classification accepts an array; the model returns one
|
|
167
|
+
// {label, score} per input.
|
|
168
|
+
const inputs = passages.map((p) => ({ text: query, text_pair: p }));
|
|
169
|
+
// Sub-batch to bound memory — same rationale as the embedder's
|
|
170
|
+
// MAX_INTERNAL_BATCH. Cross-encoder is heavier per pair, so we use a
|
|
171
|
+
// smaller batch (4) to keep peak memory under ~150 MB on M1.
|
|
172
|
+
const MAX_INTERNAL_BATCH = 4;
|
|
173
|
+
const out = [];
|
|
174
|
+
for (let batchStart = 0; batchStart < inputs.length; batchStart += MAX_INTERNAL_BATCH) {
|
|
175
|
+
const batch = inputs.slice(batchStart, batchStart + MAX_INTERNAL_BATCH);
|
|
176
|
+
const result = await classifier(batch);
|
|
177
|
+
// Pipeline returns one Array per input by default; flatten to scores.
|
|
178
|
+
// Each output is {label, score}; for binary-relevance rerankers, the
|
|
179
|
+
// score is already the model's relevance probability.
|
|
180
|
+
const scores = Array.isArray(result) ? result : [result];
|
|
181
|
+
for (const r of scores) {
|
|
182
|
+
if (typeof r?.score === "number") {
|
|
183
|
+
out.push(r.score);
|
|
184
|
+
}
|
|
185
|
+
else {
|
|
186
|
+
// Defensive: surface as -Infinity so this hit goes to the bottom
|
|
187
|
+
// rather than poisoning the sort with NaN.
|
|
188
|
+
out.push(-Infinity);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return out;
|
|
193
|
+
}
|
|
194
|
+
};
|
|
195
|
+
}
|
|
117
196
|
//# sourceMappingURL=embeddings.js.map
|
package/dist/embeddings.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../src/embeddings.ts"],"names":[],"mappings":"AAAA,kFAAkF;AAClF,gFAAgF;AAChF,sEAAsE;AACtE,gFAAgF;AAChF,EAAE;AACF,gBAAgB;AAChB,8EAA8E;AAC9E,oEAAoE;AACpE,wEAAwE;AACxE,yEAAyE;AACzE,iFAAiF;AACjF,8EAA8E;AAC9E,uEAAuE;AAoBvE,MAAM,CAAC,MAAM,gBAAgB,GAA6C,MAAM,CAAC,MAAM,CAAC;IACtF,YAAY,EAAE;QACZ,KAAK,EAAE,cAAc;QACrB,IAAI,EAAE,8CAA8C;QACpD,GAAG,EAAE,GAAG;QACR,YAAY,EAAE,GAAG;QACjB,YAAY,EAAE,IAAI;QAClB,SAAS,EAAE,GAAG;KACf;IACD,GAAG,EAAE;QACH,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,0BAA0B;QAChC,GAAG,EAAE,GAAG;QACR,YAAY,EAAE,EAAE;QAChB,YAAY,EAAE,KAAK;QACnB,SAAS,EAAE,GAAG;KACf;CACF,CAAC,CAAC;AAEH,0EAA0E;AAC1E,MAAM,CAAC,MAAM,mBAAmB,GAAG,cAAc,CAAC;AAElD,MAAM,UAAU,YAAY,CAAC,KAAyB;IACpD,MAAM,GAAG,GAAG,KAAK,IAAI,mBAAmB,CAAC;IACzC,MAAM,KAAK,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC;IACpC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,IAAI,KAAK,CAAC,kCAAkC,GAAG,qBAAqB,KAAK,GAAG,CAAC,CAAC;IACtF,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAUD,2EAA2E;AAC3E,2EAA2E;AAC3E,4EAA4E;AAC5E,IAAI,YAAY,GAA+D,IAAI,CAAC;AAEpF,KAAK,UAAU,YAAY;IACzB,IAAI,YAAY;QAAE,OAAO,YAAY,CAAC;IACtC,IAAI,CAAC;QACH,gEAAgE;QAChE,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAErD,CAAC;QACF,IAAI,CAAC,GAAG,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;QACzF,YAAY,GAAG,GAAG,CAAC,QAAQ,CAAC;QAC5B,OAAO,YAAY,CAAC;IACtB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CACb,6HAA6H;YAC3H,iGAAiG;YACjG,mBAAmB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CACxE,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,KAAc;IAC/C,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IAClC,MAAM,QAAQ,GAAG,MAAM,YAAY,EAAE,CAAC;IACtC,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAAC,oBAAoB,EAAE,KAAK,CAAC,IAAI,CAAC,CAGN,CAAC;IAE9D,wEAAwE;IACxE,wEAAwE;IACxE,yEAAyE;IACzE,wEAAwE;IACxE,0EAA0E;IAC1E,sEAAsE;IACtE,MAAM,kBAAkB,GAAG,CAAC,CAAC;IAE7B,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC;IACtB,OAAO;QACL,KAAK;QACL,KAAK,CAAC,KAAK,CAAC,KAAwB;YAClC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAClC,MAAM,GAAG,GAAmB,EAAE,CAAC;YAC/B,oEAAoE;YACpE,gEAAgE;YAChE,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC,MAAM,EAAE,UAAU,IAAI,kBAAkB,EAAE,CAAC;gBACrF,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,UAAU,GAAG,kBAAkB,CAAC,CAAC;gBACvE,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,CAAC,GAAG,KAAK,CAAC,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBACjF,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC3B,MAAM,IAAI,KAAK,CACb,SAAS,KAAK,CAAC,IAAI,iBAAiB,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,cAAc,GAAG,sCAAsC,CAC1G,CAAC;gBACJ,CAAC;gBACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACtC,MAAM,KAAK,GAAG,CAAC,GAAG,GAAG,CAAC;oBACtB,uEAAuE;oBACvE,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;gBACpE,CAAC;YACH,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;KACF,CAAC;AACJ,CAAC;AAED,2EAA2E;AAC3E,MAAM,UAAU,SAAS,CAAC,CAAe,EAAE,CAAe;IACxD,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IACrE,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACjC,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC"}
|
|
1
|
+
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../src/embeddings.ts"],"names":[],"mappings":"AAAA,kFAAkF;AAClF,gFAAgF;AAChF,sEAAsE;AACtE,gFAAgF;AAChF,EAAE;AACF,gBAAgB;AAChB,8EAA8E;AAC9E,oEAAoE;AACpE,wEAAwE;AACxE,yEAAyE;AACzE,iFAAiF;AACjF,8EAA8E;AAC9E,uEAAuE;AAoBvE,MAAM,CAAC,MAAM,gBAAgB,GAA6C,MAAM,CAAC,MAAM,CAAC;IACtF,YAAY,EAAE;QACZ,KAAK,EAAE,cAAc;QACrB,IAAI,EAAE,8CAA8C;QACpD,GAAG,EAAE,GAAG;QACR,YAAY,EAAE,GAAG;QACjB,YAAY,EAAE,IAAI;QAClB,SAAS,EAAE,GAAG;KACf;IACD,GAAG,EAAE;QACH,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,0BAA0B;QAChC,GAAG,EAAE,GAAG;QACR,YAAY,EAAE,EAAE;QAChB,YAAY,EAAE,KAAK;QACnB,SAAS,EAAE,GAAG;KACf;CACF,CAAC,CAAC;AAEH,0EAA0E;AAC1E,MAAM,CAAC,MAAM,mBAAmB,GAAG,cAAc,CAAC;AAElD,MAAM,UAAU,YAAY,CAAC,KAAyB;IACpD,MAAM,GAAG,GAAG,KAAK,IAAI,mBAAmB,CAAC;IACzC,MAAM,KAAK,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC;IACpC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,IAAI,KAAK,CAAC,kCAAkC,GAAG,qBAAqB,KAAK,GAAG,CAAC,CAAC;IACtF,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAUD,2EAA2E;AAC3E,2EAA2E;AAC3E,4EAA4E;AAC5E,IAAI,YAAY,GAA+D,IAAI,CAAC;AAEpF,KAAK,UAAU,YAAY;IACzB,IAAI,YAAY;QAAE,OAAO,YAAY,CAAC;IACtC,IAAI,CAAC;QACH,gEAAgE;QAChE,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAErD,CAAC;QACF,IAAI,CAAC,GAAG,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;QACzF,YAAY,GAAG,GAAG,CAAC,QAAQ,CAAC;QAC5B,OAAO,YAAY,CAAC;IACtB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CACb,6HAA6H;YAC3H,iGAAiG;YACjG,mBAAmB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CACxE,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,KAAc;IAC/C,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IAClC,MAAM,QAAQ,GAAG,MAAM,YAAY,EAAE,CAAC;IACtC,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAAC,oBAAoB,EAAE,KAAK,CAAC,IAAI,CAAC,CAGN,CAAC;IAE9D,wEAAwE;IACxE,wEAAwE;IACxE,yEAAyE;IACzE,wEAAwE;IACxE,0EAA0E;IAC1E,sEAAsE;IACtE,MAAM,kBAAkB,GAAG,CAAC,CAAC;IAE7B,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC;IACtB,OAAO;QACL,KAAK;QACL,KAAK,CAAC,KAAK,CAAC,KAAwB;YAClC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAClC,MAAM,GAAG,GAAmB,EAAE,CAAC;YAC/B,oEAAoE;YACpE,gEAAgE;YAChE,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC,MAAM,EAAE,UAAU,IAAI,kBAAkB,EAAE,CAAC;gBACrF,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,UAAU,GAAG,kBAAkB,CAAC,CAAC;gBACvE,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,CAAC,GAAG,KAAK,CAAC,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBACjF,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC3B,MAAM,IAAI,KAAK,CACb,SAAS,KAAK,CAAC,IAAI,iBAAiB,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,cAAc,GAAG,sCAAsC,CAC1G,CAAC;gBACJ,CAAC;gBACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACtC,MAAM,KAAK,GAAG,CAAC,GAAG,GAAG,CAAC;oBACtB,uEAAuE;oBACvE,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;gBACpE,CAAC;YACH,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;KACF,CAAC;AACJ,CAAC;AAED,2EAA2E;AAC3E,MAAM,UAAU,SAAS,CAAC,CAAe,EAAE,CAAe;IACxD,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IACrE,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACjC,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAiCD,MAAM,CAAC,MAAM,eAAe,GAA4C,MAAM,CAAC,MAAM,CAAC;IACpF,6EAA6E;IAC7E,YAAY,EAAE;QACZ,KAAK,EAAE,YAAY;QACnB,IAAI,EAAE,0BAA0B;QAChC,YAAY,EAAE,GAAG;QACjB,YAAY,EAAE,KAAK;QACnB,SAAS,EAAE,GAAG;KACf;IACD,4EAA4E;IAC5E,wEAAwE;IACxE,uEAAuE;IACvE,wBAAwB;IACxB,qBAAqB,EAAE;QACrB,KAAK,EAAE,qBAAqB;QAC5B,IAAI,EAAE,+BAA+B;QACrC,YAAY,EAAE,EAAE;QAChB,YAAY,EAAE,IAAI;QAClB,SAAS,EAAE,GAAG;KACf;CACF,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,sBAAsB,GAAG,qBAAqB,CAAC;AAE5D,MAAM,UAAU,oBAAoB,CAAC,KAAyB;IAC5D,MAAM,GAAG,GAAG,KAAK,IAAI,sBAAsB,CAAC;IAC5C,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;IACnC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtD,MAAM,IAAI,KAAK,CAAC,iCAAiC,GAAG,qBAAqB,KAAK,GAAG,CAAC,CAAC;IACrF,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAgBD;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,KAAc;IAC/C,MAAM,KAAK,GAAG,oBAAoB,CAAC,KAAK,CAAC,CAAC;IAC1C,MAAM,QAAQ,GAAG,MAAM,YAAY,EAAE,CAAC;IACtC,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAAC,qBAAqB,EAAE,KAAK,CAAC,IAAI,CAAC,CAGhB,CAAC;IAEtD,OAAO;QACL,KAAK;QACL,KAAK,CAAC,KAAK,CAAC,KAAa,EAAE,QAA2B;YACpD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YACrC,0DAA0D;YAC1D,8DAA8D;YAC9D,4BAA4B;YAC5B,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YACpE,+DAA+D;YAC/D,qEAAqE;YACrE,6DAA6D;YAC7D,MAAM,kBAAkB,GAAG,CAAC,CAAC;YAC7B,MAAM,GAAG,GAAa,EAAE,CAAC;YACzB,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,UAAU,IAAI,kBAAkB,EAAE,CAAC;gBACtF,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,UAAU,EAAE,UAAU,GAAG,kBAAkB,CAAC,CAAC;gBACxE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,CAAC;gBACvC,sEAAsE;gBACtE,qEAAqE;gBACrE,sDAAsD;gBACtD,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;gBACzD,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;oBACvB,IAAI,OAAO,CAAC,EAAE,KAAK,KAAK,QAAQ,EAAE,CAAC;wBACjC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;oBACpB,CAAC;yBAAM,CAAC;wBACN,iEAAiE;wBACjE,2CAA2C;wBAC3C,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -23,6 +23,13 @@ export interface ServeOptions {
|
|
|
23
23
|
* with --include-pdfs ran). Off by default; opt-in because PDF extraction
|
|
24
24
|
* is slower than markdown. */
|
|
25
25
|
includePdfs?: boolean;
|
|
26
|
+
/** v2.9.0 — enable BGE cross-encoder reranking on top of RRF in
|
|
27
|
+
* obsidian_search. Off by default; adds ~30-50ms per query at top-50. */
|
|
28
|
+
enableReranker?: boolean;
|
|
29
|
+
/** v2.9.0 — reranker model alias (default "rerank-multilingual"). */
|
|
30
|
+
rerankerModel?: string;
|
|
31
|
+
/** v2.9.0 — how many top fused candidates to rerank (default 50). */
|
|
32
|
+
rerankerTopN?: string;
|
|
26
33
|
}
|
|
27
34
|
declare function main(): Promise<void>;
|
|
28
35
|
/**
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAIA,OAAO,EAAE,SAAS,EAAoB,MAAM,yCAAyC,CAAC;AAMtF,OAAO,EAAkC,QAAQ,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAIA,OAAO,EAAE,SAAS,EAAoB,MAAM,yCAAyC,CAAC;AAMtF,OAAO,EAAkC,QAAQ,EAAE,MAAM,WAAW,CAAC;AAyCrE,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAW5C,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IACnC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC;;mCAE+B;IAC/B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB;8EAC0E;IAC1E,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,qEAAqE;IACrE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,qEAAqE;IACrE,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAcD,iBAAe,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAsVnC;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,KAAK,CAAC;IACb,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,YAAY,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC3B,YAAY,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC1B,cAAc,EAAE;QAAE,OAAO,EAAE,OAAO,CAAA;KAAE,CAAC;CACtC;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,UAAU,CAAC,CAkE/E;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,YAAY,GAAG,SAAS,CA8F9E;AAED,iBAAe,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAoD5D;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAY1D;AAy9DD,iBAAS,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAM3D;AAsCD,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,WAAW,EAAE,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -9,10 +9,10 @@ import { z } from "zod";
|
|
|
9
9
|
import { EmbedDb } from "./embed-db.js";
|
|
10
10
|
import { DEFAULT_MODEL_ALIAS, EMBEDDING_MODELS, loadEmbedder, resolveModel } from "./embeddings.js";
|
|
11
11
|
import { chunkContent, defaultIndexFile, FtsIndex } from "./fts5.js";
|
|
12
|
-
import { appendToNote, archiveNote, chatThreadAppend, chatThreadRead, contextPack, createNote, dataviewQuery, embeddingsSearch, findPath, findSimilar, frontmatterGet, frontmatterSearch, frontmatterSet, getBacklinks, getNoteNeighbors, getOpenQuestions, getOutboundLinks, getRecentEdits, getUnresolvedWikilinks, getVaultStats, lintWiki, listCanvases, listNotes, listPdfs, listTags, openInUi, paperAudit, readCanvas, readNote, readPdf, renameNote, replaceInNotes, resolveWikilink, searchHybrid, searchText, semanticSearch, validateNoteProposal } from "./tools.js";
|
|
12
|
+
import { appendToNote, archiveNote, chatThreadAppend, chatThreadRead, contextPack, createNote, dataviewQuery, embeddingsSearch, findPath, findSimilar, frontmatterGet, frontmatterSearch, frontmatterSet, getBacklinks, getNoteNeighbors, getOpenQuestions, getOutboundLinks, getRecentEdits, getUnresolvedWikilinks, getVaultStats, lintWiki, listCanvases, listNotes, listPdfs, listTags, ocrPdf, openInUi, paperAudit, readCanvas, readNote, readPdf, renameNote, replaceInNotes, resolveWikilink, searchHybrid, searchText, semanticSearch, validateNoteProposal } from "./tools.js";
|
|
13
13
|
import { Vault } from "./vault.js";
|
|
14
14
|
import { VaultWatcher } from "./watcher.js";
|
|
15
|
-
const VERSION = "2.
|
|
15
|
+
const VERSION = "2.10.0";
|
|
16
16
|
/** Default location for the persistent embedding index, alongside .fts5.db. */
|
|
17
17
|
function embedDbPath(vaultRoot) {
|
|
18
18
|
// Match the FTS5 location convention by stripping the .fts5.db extension
|
|
@@ -44,6 +44,9 @@ async function main() {
|
|
|
44
44
|
.option("--enabled-tools <name...>", "Strict allowlist — when set, ONLY listed tools register. Complement to --disabled-tools (denylist). If both are set: a tool must be in the allowlist AND not in the denylist. Repeatable. Example: `--enabled-tools obsidian_search_text obsidian_read_note obsidian_get_recent_edits`.")
|
|
45
45
|
.option("--diagnostic-search-tools", "Register the four single-ranker search tools (obsidian_search_text, obsidian_full_text_search, obsidian_semantic_search, obsidian_embeddings_search) IN ADDITION to the default obsidian_search hybrid tool. Off by default in v2.0+ — the umbrella obsidian_search auto-detects available signals and produces consistent recall. Enable when you need single-ranker output for diagnostics or A/B benchmarking.")
|
|
46
46
|
.option("--include-pdfs", 'v2.8.0 — also index PDF files into FTS5 (and embeddings, if `enquire-mcp build-embeddings --include-pdfs` ran). With `--persistent-index`, PDF chunks become first-class hits in `obsidian_search` results, surfaced with `kind: "pdf"` flag. Off by default — opt-in because PDF text extraction is slower than markdown (~50-200ms per page on M1 cold). Requires the `pdfjs-dist` optionalDependency (default-installed unless you used `--omit=optional`).')
|
|
47
|
+
.option("--enable-reranker", "v2.9.0 — enable BGE cross-encoder reranking on top of RRF in `obsidian_search`. After fusion, top-N candidates (default 50) are re-scored by a cross-encoder model and re-sorted. Adds ~30-50ms per query on M1 CPU; +5-10 NDCG@10 typical for retrieval quality. Off by default — opt-in because the cross-encoder model is downloaded from HuggingFace on first call (~25-110 MB depending on alias). Requires the `@huggingface/transformers` optionalDependency.")
|
|
48
|
+
.option("--reranker-model <alias>", "v2.9.0 — reranker alias from RERANKER_MODELS. `rerank-multilingual` (default; Xenova/mxbai-rerank-xsmall-v1, ~25 MB, multilingual) or `rerank-bge` (Xenova/bge-reranker-base, ~110 MB, English-only). Only effective with `--enable-reranker`.")
|
|
49
|
+
.option("--reranker-top-n <n>", "v2.9.0 — how many top RRF-fused candidates to rerank (default 50). Larger N improves recall ceiling but costs more reranker compute (~30-50ms per 50 pairs on M1). Only effective with `--enable-reranker`.")
|
|
47
50
|
.action(async (opts) => {
|
|
48
51
|
await startServer(opts);
|
|
49
52
|
});
|
|
@@ -376,7 +379,16 @@ export function buildMcpServer(deps, opts) {
|
|
|
376
379
|
return origRegisterTool(name, ...rest);
|
|
377
380
|
};
|
|
378
381
|
}
|
|
379
|
-
|
|
382
|
+
// v2.9.0: build reranker config from CLI opts. Off when `--enable-reranker`
|
|
383
|
+
// wasn't passed; otherwise we pass through alias + top-n. The reranker
|
|
384
|
+
// model itself is lazy-loaded on first search call (no boot cost).
|
|
385
|
+
const rerankerConfig = opts.enableReranker
|
|
386
|
+
? {
|
|
387
|
+
...(opts.rerankerModel ? { alias: opts.rerankerModel } : {}),
|
|
388
|
+
...(opts.rerankerTopN ? { topN: parsePositiveInt(opts.rerankerTopN, "--reranker-top-n") } : {})
|
|
389
|
+
}
|
|
390
|
+
: null;
|
|
391
|
+
registerReadTools(server, deps.vault, deps.ftsIndex, opts.diagnosticSearchTools ?? false, rerankerConfig);
|
|
380
392
|
if (deps.vault.writeEnabled)
|
|
381
393
|
registerWriteTools(server, deps.vault);
|
|
382
394
|
if (deps.ftsIndex && opts.diagnosticSearchTools)
|
|
@@ -823,7 +835,13 @@ function registerFtsTools(server, idx, vault) {
|
|
|
823
835
|
});
|
|
824
836
|
});
|
|
825
837
|
}
|
|
826
|
-
function registerReadTools(server, vault, ftsIndex, diagnosticSearchTools
|
|
838
|
+
function registerReadTools(server, vault, ftsIndex, diagnosticSearchTools,
|
|
839
|
+
/**
|
|
840
|
+
* v2.9.0 — optional cross-encoder reranker config. When set, obsidian_search
|
|
841
|
+
* post-RRF reranks the top-N candidates with a BGE-style cross-encoder.
|
|
842
|
+
* `null` means reranker disabled (default).
|
|
843
|
+
*/
|
|
844
|
+
rerankerConfig = null) {
|
|
827
845
|
const READ_ONLY = { readOnlyHint: true, idempotentHint: true, openWorldHint: false };
|
|
828
846
|
server.registerTool("obsidian_list_notes", {
|
|
829
847
|
title: "List notes",
|
|
@@ -1130,7 +1148,7 @@ function registerReadTools(server, vault, ftsIndex, diagnosticSearchTools) {
|
|
|
1130
1148
|
}, async (args) => textResult(await listPdfs(vault, args)));
|
|
1131
1149
|
server.registerTool("obsidian_read_pdf", {
|
|
1132
1150
|
title: "Extract text from a PDF (page-by-page)",
|
|
1133
|
-
description: "Extracts plain text from one PDF, returning per-page text + a `full_text` join + doc-level metadata (title/author/subject/etc). Image-only / scanned PDFs surface `has_text: false` so agents can detect-and-recommend OCR. Optional `pages` slice (1-indexed inclusive range) for partial reads of long documents. Read-only. Same path-safety + privacy filter as `obsidian_read_note`. Powered by Mozilla's PDF.js (Apache-2.0).",
|
|
1151
|
+
description: "Extracts plain text from one PDF, returning per-page text + a `full_text` join + doc-level metadata (title/author/subject/etc). Image-only / scanned PDFs surface `has_text: false` so agents can detect-and-recommend OCR via `obsidian_ocr_pdf` (v2.10.0). Optional `pages` slice (1-indexed inclusive range) for partial reads of long documents. Read-only. Same path-safety + privacy filter as `obsidian_read_note`. Powered by Mozilla's PDF.js (Apache-2.0).",
|
|
1134
1152
|
annotations: { ...READ_ONLY, title: "Read PDF" },
|
|
1135
1153
|
inputSchema: {
|
|
1136
1154
|
path: z.string().describe("Vault-relative path of the .pdf file (with or without .pdf)"),
|
|
@@ -1141,6 +1159,33 @@ function registerReadTools(server, vault, ftsIndex, diagnosticSearchTools) {
|
|
|
1141
1159
|
include_metadata: z.boolean().optional().describe("Include doc-level metadata in result (default true)")
|
|
1142
1160
|
}
|
|
1143
1161
|
}, async (args) => textResult(await readPdf(vault, args)));
|
|
1162
|
+
// v2.10.0 — OCR for image-only / scanned PDFs. Completes the v2.7-v2.8
|
|
1163
|
+
// PDF retrieval story: when `obsidian_read_pdf` returns `has_text: false`,
|
|
1164
|
+
// the agent calls `obsidian_ocr_pdf` to extract text via Tesseract.js.
|
|
1165
|
+
// Tesseract.js + @napi-rs/canvas are optionalDependencies — clean
|
|
1166
|
+
// install-hint error if missing. ~1-2s per page on M1 CPU.
|
|
1167
|
+
server.registerTool("obsidian_ocr_pdf", {
|
|
1168
|
+
title: "OCR a scanned/image-only PDF (Tesseract.js)",
|
|
1169
|
+
description: "Runs Tesseract OCR over each page of an image-only / scanned PDF, returning per-page text + per-page confidence + mean confidence + the same shape as `obsidian_read_pdf`. Use this when `obsidian_read_pdf` returns `has_text: false` (typical for scans, photographed paper, image-only PDFs). Multilingual via `lang` (default `'eng'`; multi-lang via `'+'`, e.g. `'eng+rus'`). Optional `pages` range and `scale` (DPI multiplier, default 2 ~ 150 DPI, capped at 4). ~1-2s per page on M1 CPU. Read-only. Powered by Tesseract.js (Apache-2.0; trained-data files download on first use into the local cache, ~10 MB per language) + @napi-rs/canvas for PDF→bitmap rendering. Both gated to `optionalDependencies` so the markdown-only path stays zero-cost.",
|
|
1170
|
+
annotations: { ...READ_ONLY, title: "OCR PDF" },
|
|
1171
|
+
inputSchema: {
|
|
1172
|
+
path: z.string().describe("Vault-relative path of the .pdf file (with or without .pdf)"),
|
|
1173
|
+
lang: z
|
|
1174
|
+
.string()
|
|
1175
|
+
.optional()
|
|
1176
|
+
.describe("Tesseract language pack(s). Default 'eng'. Multi-lang via '+': 'eng+rus' for English+Russian mixed scans. Common: 'eng', 'rus', 'jpn', 'chi_sim', 'fra', 'deu'."),
|
|
1177
|
+
pages: z
|
|
1178
|
+
.tuple([z.number().int().positive(), z.number().int().positive()])
|
|
1179
|
+
.optional()
|
|
1180
|
+
.describe("Optional 1-indexed inclusive page range, e.g. [2, 5] OCRs pages 2..5"),
|
|
1181
|
+
scale: z
|
|
1182
|
+
.number()
|
|
1183
|
+
.min(0.5)
|
|
1184
|
+
.max(4)
|
|
1185
|
+
.optional()
|
|
1186
|
+
.describe("Render scale (DPI multiplier). Default 2 (~150 DPI). Higher = better OCR on small text but slower.")
|
|
1187
|
+
}
|
|
1188
|
+
}, async (args) => textResult(await ocrPdf(vault, args)));
|
|
1144
1189
|
// v2.0.0-beta.3: gated — see comment on obsidian_search_text above.
|
|
1145
1190
|
if (diagnosticSearchTools)
|
|
1146
1191
|
server.registerTool("obsidian_semantic_search", {
|
|
@@ -1217,7 +1262,11 @@ function registerReadTools(server, vault, ftsIndex, diagnosticSearchTools) {
|
|
|
1217
1262
|
}
|
|
1218
1263
|
}, async (args) => {
|
|
1219
1264
|
const embedFile = embedDbPath(vault.root);
|
|
1220
|
-
return textResult(await searchHybrid(vault, args, {
|
|
1265
|
+
return textResult(await searchHybrid(vault, args, {
|
|
1266
|
+
ftsIndex,
|
|
1267
|
+
embedFile,
|
|
1268
|
+
...(rerankerConfig ? { reranker: rerankerConfig } : {})
|
|
1269
|
+
}));
|
|
1221
1270
|
});
|
|
1222
1271
|
server.registerTool("obsidian_chat_thread_read", {
|
|
1223
1272
|
title: "Read parsed chat thread from a note",
|