@evantahler/mcpx 0.21.2 → 0.21.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/search/semantic.ts +25 -12
package/package.json
CHANGED
package/src/search/semantic.ts
CHANGED
|
@@ -16,10 +16,14 @@ async function getEmbedder(): Promise<(text: string) => Promise<Float32Array>> {
|
|
|
16
16
|
|
|
17
17
|
const transformers = await import("@huggingface/transformers");
|
|
18
18
|
|
|
19
|
-
//
|
|
20
|
-
//
|
|
21
|
-
//
|
|
22
|
-
// be bundled into the
|
|
19
|
+
// In the `bun --compile` binary and during local dev/CI, transformers is
|
|
20
|
+
// patched (patches/@huggingface%2Ftransformers@4.2.0.patch, applied by
|
|
21
|
+
// scripts/apply-transformers-patch.sh) so the only supported device is
|
|
22
|
+
// `wasm` via onnxruntime-web — native bindings can't be bundled into the
|
|
23
|
+
// single binary. For npm-installed mcpx the package is unpatched and `wasm`
|
|
24
|
+
// is rejected; we try `wasm` first and fall back to `cpu` (onnxruntime-node
|
|
25
|
+
// native bindings, which the user already has from npm) on the
|
|
26
|
+
// "Unsupported device" error.
|
|
23
27
|
const ortWasm = transformers.env.backends.onnx?.wasm;
|
|
24
28
|
if (ortWasm) {
|
|
25
29
|
ortWasm.numThreads = 1;
|
|
@@ -52,14 +56,23 @@ async function getEmbedder(): Promise<(text: string) => Promise<Float32Array>> {
|
|
|
52
56
|
transformers.env.cacheDir = userCacheDir;
|
|
53
57
|
transformers.env.localModelPath = join(userCacheDir, "models");
|
|
54
58
|
|
|
55
|
-
//
|
|
56
|
-
//
|
|
57
|
-
//
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
// q8 quantization gives near-identical embedding quality at ~25% the model
|
|
60
|
+
// size (≈22 MB vs ≈86 MB for fp32). See onnx setup comment above for why
|
|
61
|
+
// we try `wasm` first and fall back to `cpu`.
|
|
62
|
+
let extractor: Awaited<ReturnType<typeof transformers.pipeline>>;
|
|
63
|
+
try {
|
|
64
|
+
extractor = await transformers.pipeline("feature-extraction", EMBEDDING_MODEL.REPO, {
|
|
65
|
+
device: "wasm",
|
|
66
|
+
dtype: "q8",
|
|
67
|
+
});
|
|
68
|
+
} catch (err) {
|
|
69
|
+
if (!String((err as Error)?.message ?? "").includes("Unsupported device")) throw err;
|
|
70
|
+
logger.debug("WASM backend unavailable; falling back to cpu (onnxruntime-node)");
|
|
71
|
+
extractor = await transformers.pipeline("feature-extraction", EMBEDDING_MODEL.REPO, {
|
|
72
|
+
device: "cpu",
|
|
73
|
+
dtype: "q8",
|
|
74
|
+
});
|
|
75
|
+
}
|
|
63
76
|
|
|
64
77
|
pipelineInstance = async (text: string): Promise<Float32Array> => {
|
|
65
78
|
const output = await extractor(text, { pooling: "mean", normalize: true });
|