@evantahler/mcpx 0.21.1 → 0.21.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@evantahler/mcpx",
3
- "version": "0.21.1",
3
+ "version": "0.21.3",
4
4
  "description": "A command-line interface for MCP servers. curl for MCP.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -16,10 +16,14 @@ async function getEmbedder(): Promise<(text: string) => Promise<Float32Array>> {
16
16
 
17
17
  const transformers = await import("@huggingface/transformers");
18
18
 
19
- // transformers.js is patched (see patches/@huggingface%2Ftransformers@4.2.0.patch,
20
- // applied by `bun run scripts/apply-transformers-patch.sh` during prebuild) to
21
- // force the WASM backend instead of onnxruntime-node the native bindings can't
22
- // be bundled into the Bun --compile single binary.
19
+ // In the `bun --compile` binary and during local dev/CI, transformers is
20
+ // patched (patches/@huggingface%2Ftransformers@4.2.0.patch, applied by
21
+ // scripts/apply-transformers-patch.sh) so the only supported device is
22
+ // `wasm` via onnxruntime-web — native bindings can't be bundled into the
23
+ // single binary. For npm-installed mcpx the package is unpatched and `wasm`
24
+ // is rejected; we try `wasm` first and fall back to `cpu` (onnxruntime-node
25
+ // native bindings, which the user already has from npm) on the
26
+ // "Unsupported device" error.
23
27
  const ortWasm = transformers.env.backends.onnx?.wasm;
24
28
  if (ortWasm) {
25
29
  ortWasm.numThreads = 1;
@@ -52,14 +56,23 @@ async function getEmbedder(): Promise<(text: string) => Promise<Float32Array>> {
52
56
  transformers.env.cacheDir = userCacheDir;
53
57
  transformers.env.localModelPath = join(userCacheDir, "models");
54
58
 
55
- // WASM device defaults to q8 quantization, which gives near-identical
56
- // embedding quality at ~25% the model size (≈22 MB vs ≈86 MB for fp32).
57
- // Both CI and `bun run build` apply the transformers patch first, so
58
- // wasm is the only supported device in this codepath.
59
- const extractor = await transformers.pipeline("feature-extraction", EMBEDDING_MODEL.REPO, {
60
- device: "wasm",
61
- dtype: "q8",
62
- });
59
+ // q8 quantization gives near-identical embedding quality at ~25% the model
60
+ // size (≈22 MB vs ≈86 MB for fp32). See onnx setup comment above for why
61
+ // we try `wasm` first and fall back to `cpu`.
62
+ let extractor: Awaited<ReturnType<typeof transformers.pipeline>>;
63
+ try {
64
+ extractor = await transformers.pipeline("feature-extraction", EMBEDDING_MODEL.REPO, {
65
+ device: "wasm",
66
+ dtype: "q8",
67
+ });
68
+ } catch (err) {
69
+ if (!String((err as Error)?.message ?? "").includes("Unsupported device")) throw err;
70
+ logger.debug("WASM backend unavailable; falling back to cpu (onnxruntime-node)");
71
+ extractor = await transformers.pipeline("feature-extraction", EMBEDDING_MODEL.REPO, {
72
+ device: "cpu",
73
+ dtype: "q8",
74
+ });
75
+ }
63
76
 
64
77
  pipelineInstance = async (text: string): Promise<Float32Array> => {
65
78
  const output = await extractor(text, { pooling: "mean", normalize: true });