npm - nodebench-mcp - Versions diffs - 2.8.0 → 2.8.2 - Mend

nodebench-mcp 2.8.0 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +71 -0
package/dist/__tests__/audit-registry.d.ts +1 -0
package/dist/__tests__/audit-registry.js +60 -0
package/dist/__tests__/audit-registry.js.map +1 -0
package/dist/__tests__/gaiaCapabilityEval.test.js +59 -1
package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
package/dist/__tests__/gaiaCapabilityFilesEval.test.js +58 -1
package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +15 -0
package/dist/__tests__/gaiaCapabilityMediaEval.test.js +421 -0
package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +1 -0
package/dist/__tests__/tools.test.js +159 -3
package/dist/__tests__/tools.test.js.map +1 -1
package/dist/tools/localFileTools.d.ts +1 -0
package/dist/tools/localFileTools.js +353 -0
package/dist/tools/localFileTools.js.map +1 -1
package/dist/tools/progressiveDiscoveryTools.js +21 -4
package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
package/dist/tools/toolRegistry.d.ts +20 -2
package/dist/tools/toolRegistry.js +612 -36
package/dist/tools/toolRegistry.js.map +1 -1
package/package.json +4 -1

package/dist/tools/localFileTools.js CHANGED Viewed

@@ -11,6 +11,7 @@
  * - papaparse: CSV parsing
  * - pdf-parse: PDF text extraction (page-aware)
  * - yauzl: ZIP/DOCX/PPTX parsing
+ * - tesseract.js: Image OCR (PNG/JPG/etc)
  */
 import { readFile } from "node:fs/promises";
 import { existsSync } from "node:fs";
@@ -197,6 +198,149 @@ async function getYauzl() {
         throw new Error("Missing optional dependency: yauzl. Install it (or run npm install in packages/mcp-local) to use ZIP/DOCX/PPTX parsing.");
     }
 }
+async function getSharpOptional() {
+    try {
+        const mod = await import("sharp");
+        return mod.default ?? mod;
+    }
+    catch {
+        return null;
+    }
+}
+async function getTesseract() {
+    try {
+        // Use a non-literal dynamic import so TypeScript doesn't require the optional
+        // dependency to be installed at build time.
+        const pkg = "tesseract.js";
+        const mod = await import(pkg);
+        return mod.default ?? mod;
+    }
+    catch {
+        throw new Error("Missing optional dependency: tesseract.js. Install it (or run npm install in packages/mcp-local) to use image OCR.");
+    }
+}
+const FASTER_WHISPER_PY_SCRIPT_V1 = `# NodeBench MCP audio transcription helper (faster-whisper)
+# This file is written to a temp directory at runtime.
+import argparse
+import json
+import sys
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--path", required=True)
+    p.add_argument("--model", default="tiny.en")
+    p.add_argument("--language", default="")
+    p.add_argument("--task", default="transcribe")
+    p.add_argument("--beam-size", type=int, default=5)
+    p.add_argument("--vad-filter", type=int, default=0)
+    p.add_argument("--max-chars", type=int, default=12000)
+    p.add_argument("--include-segments", type=int, default=0)
+    args = p.parse_args()
+    try:
+        from faster_whisper import WhisperModel
+    except Exception:
+        sys.stderr.write(
+            "Missing python dependency: faster-whisper. Install with: pip install faster-whisper\\n"
+        )
+        raise
+    model = WhisperModel(args.model, device="cpu", compute_type="int8")
+    segments, info = model.transcribe(
+        args.path,
+        beam_size=max(1, int(args.beam_size)),
+        language=(args.language or None),
+        task=(args.task or "transcribe"),
+        vad_filter=bool(int(args.vad_filter)),
+        word_timestamps=False,
+        temperature=0.0,
+    )
+    include_segments = bool(int(args.include_segments))
+    max_chars = max(200, int(args.max_chars))
+    parts = []
+    segs = []
+    char_budget = 0
+    truncated = False
+    for seg in segments:
+        t = str(getattr(seg, "text", "") or "")
+        if not t:
+            continue
+        parts.append(t)
+        if include_segments:
+            segs.append(
+                {
+                    "start": float(getattr(seg, "start", 0.0) or 0.0),
+                    "end": float(getattr(seg, "end", 0.0) or 0.0),
+                    "text": t,
+                }
+            )
+        char_budget += len(t)
+        if char_budget >= max_chars:
+            truncated = True
+            break
+    text = "".join(parts).strip()
+    if len(text) > max_chars:
+        text = text[:max_chars]
+        truncated = True
+    out = {
+        "path": args.path,
+        "model": args.model,
+        "task": args.task,
+        "language": getattr(info, "language", None),
+        "languageProbability": getattr(info, "language_probability", None),
+        "durationSeconds": getattr(info, "duration", None),
+        "beamSize": int(args.beam_size),
+        "vadFilter": bool(int(args.vad_filter)),
+        "maxChars": max_chars,
+        "truncated": truncated,
+        "text": text,
+    }
+    if include_segments:
+        out["segments"] = segs
+    sys.stdout.write(json.dumps(out, ensure_ascii=False))
+if __name__ == "__main__":
+    main()
+`;
+function findPythonExecutable() {
+    const override = process.env.NODEBENCH_PYTHON ||
+        process.env.NODEBENCH_AUDIO_PYTHON ||
+        process.env.PYTHON ||
+        process.env.PYTHON_EXE ||
+        "";
+    if (override)
+        return String(override);
+    const candidates = [
+        path.join(process.cwd(), "python-mcp-servers", ".venv", "Scripts", "python.exe"),
+        path.join(process.cwd(), "python-mcp-servers", ".venv", "bin", "python"),
+        path.join(process.cwd(), "..", "python-mcp-servers", ".venv", "Scripts", "python.exe"),
+        path.join(process.cwd(), "..", "python-mcp-servers", ".venv", "bin", "python"),
+        path.join(process.cwd(), "..", "..", "python-mcp-servers", ".venv", "Scripts", "python.exe"),
+        path.join(process.cwd(), "..", "..", "python-mcp-servers", ".venv", "bin", "python"),
+    ];
+    for (const p of candidates) {
+        if (existsSync(p))
+            return p;
+    }
+    return "python";
+}
+async function ensureFasterWhisperHelperScript() {
+    const dir = path.join(os.tmpdir(), "nodebench-mcp", "audio");
+    const scriptPath = path.join(dir, "transcribe_faster_whisper_v1.py");
+    if (!existsSync(scriptPath)) {
+        const fs = await import("node:fs/promises");
+        await fs.mkdir(dir, { recursive: true });
+        await fs.writeFile(scriptPath, FASTER_WHISPER_PY_SCRIPT_V1, "utf8");
+    }
+    return scriptPath;
+}
 function decodeXmlEntities(text) {
     return text
         .replace(/&quot;/g, "\"")
@@ -2281,5 +2425,214 @@ export const localFileTools = [
             };
         },
     },
+    {
+        name: "read_image_ocr_text",
+        description: "Extract text from a local image (PNG/JPG/etc) using OCR (tesseract.js). Deterministic, no network.",
+        inputSchema: {
+            type: "object",
+            properties: {
+                path: {
+                    type: "string",
+                    description: "Path to a local image file (absolute or relative to current working directory).",
+                },
+                lang: {
+                    type: "string",
+                    description: "Tesseract language code (default: eng).",
+                    default: "eng",
+                },
+                langPath: {
+                    type: "string",
+                    description: "Optional directory containing traineddata files (e.g. eng.traineddata). If omitted, tesseract.js defaults apply. If .cache/tesseract exists under the current working directory, it is used by default.",
+                },
+                preprocess: {
+                    type: "boolean",
+                    description: "If true (default), attempts basic preprocessing with sharp (grayscale + normalize + PNG conversion) to improve OCR.",
+                    default: true,
+                },
+                maxChars: {
+                    type: "number",
+                    description: "Maximum characters to return (text is truncated).",
+                    default: 12000,
+                },
+            },
+            required: ["path"],
+        },
+        handler: async (args) => {
+            const filePath = resolveLocalPath(args?.path);
+            if (!existsSync(filePath))
+                throw new Error(`File not found: ${filePath}`);
+            const lang = String(args?.lang ?? "eng").trim() || "eng";
+            const maxChars = clampInt(args?.maxChars, 12000, 200, 200000);
+            const preprocess = args?.preprocess !== false;
+            let buffer = await readFile(filePath);
+            let usedSharp = false;
+            if (preprocess) {
+                const sharp = await getSharpOptional();
+                if (sharp) {
+                    try {
+                        // Normalize to PNG and improve contrast for OCR.
+                        buffer = await sharp(buffer).grayscale().normalize().png().toBuffer();
+                        usedSharp = true;
+                    }
+                    catch {
+                        // If preprocessing fails, fall back to the original buffer.
+                    }
+                }
+            }
+            const langPathArg = typeof args?.langPath === "string" ? args.langPath.trim() : "";
+            const defaultLangPath = path.join(process.cwd(), ".cache", "tesseract");
+            const langPathEffective = langPathArg
+                ? resolveLocalPath(langPathArg)
+                : existsSync(defaultLangPath)
+                    ? defaultLangPath
+                    : null;
+            const tesseract = await getTesseract();
+            const recognize = tesseract?.recognize;
+            if (typeof recognize !== "function") {
+                throw new Error("tesseract.js missing recognize() export (unsupported version)");
+            }
+            const result = await recognize(buffer, lang, {
+                ...(langPathEffective ? { langPath: langPathEffective } : {}),
+                logger: () => {
+                    // silence
+                },
+            });
+            let text = String(result?.data?.text ?? "").trim();
+            const confidence = typeof result?.data?.confidence === "number" ? result.data.confidence : null;
+            let truncated = false;
+            if (text.length > maxChars) {
+                text = text.slice(0, maxChars);
+                truncated = true;
+            }
+            return {
+                path: filePath,
+                lang,
+                langPath: langPathEffective,
+                preprocess,
+                usedSharp,
+                confidence,
+                maxChars,
+                truncated,
+                text,
+            };
+        },
+    },
+    {
+        name: "transcribe_audio_file",
+        description: "Transcribe a local audio file (MP3/WAV/etc) to text using faster-whisper via Python. Deterministic, no network.",
+        inputSchema: {
+            type: "object",
+            properties: {
+                path: {
+                    type: "string",
+                    description: "Path to a local audio file (absolute or relative to current working directory).",
+                },
+                model: {
+                    type: "string",
+                    description: "Whisper model name (default: tiny.en).",
+                    default: "tiny.en",
+                },
+                language: {
+                    type: "string",
+                    description: "Optional language hint (e.g. 'en'). If omitted, model auto-detects.",
+                },
+                task: {
+                    type: "string",
+                    description: "Task mode: transcribe or translate.",
+                    default: "transcribe",
+                    enum: ["transcribe", "translate"],
+                },
+                beamSize: {
+                    type: "number",
+                    description: "Beam size (higher = potentially better, slower).",
+                    default: 5,
+                },
+                vadFilter: {
+                    type: "boolean",
+                    description: "If true, enables VAD filtering (can help noisy audio). Default false for determinism.",
+                    default: false,
+                },
+                includeSegments: {
+                    type: "boolean",
+                    description: "If true, returns per-segment timestamps (can be verbose).",
+                    default: false,
+                },
+                maxChars: {
+                    type: "number",
+                    description: "Maximum characters to return (text is truncated).",
+                    default: 12000,
+                },
+                timeoutMs: {
+                    type: "number",
+                    description: "Maximum transcription time before aborting (ms).",
+                    default: 300000,
+                },
+            },
+            required: ["path"],
+        },
+        handler: async (args) => {
+            const filePath = resolveLocalPath(args?.path);
+            if (!existsSync(filePath))
+                throw new Error(`File not found: ${filePath}`);
+            const model = String(args?.model ?? "tiny.en").trim() || "tiny.en";
+            const language = typeof args?.language === "string" ? args.language.trim() : "";
+            const task = args?.task === "translate" ? "translate" : "transcribe";
+            const beamSize = clampInt(args?.beamSize, 5, 1, 10);
+            const vadFilter = args?.vadFilter === true;
+            const includeSegments = args?.includeSegments === true;
+            const maxChars = clampInt(args?.maxChars, 12000, 200, 200000);
+            const timeoutMs = clampInt(args?.timeoutMs, 300000, 1000, 1800000);
+            const pythonExe = findPythonExecutable();
+            const scriptPath = await ensureFasterWhisperHelperScript();
+            const child = await import("node:child_process");
+            const util = await import("node:util");
+            const execFileAsync = util.promisify(child.execFile);
+            const argv = [
+                scriptPath,
+                "--path",
+                filePath,
+                "--model",
+                model,
+                "--task",
+                task,
+                "--beam-size",
+                String(beamSize),
+                "--vad-filter",
+                vadFilter ? "1" : "0",
+                "--max-chars",
+                String(maxChars),
+                "--include-segments",
+                includeSegments ? "1" : "0",
+            ];
+            if (language) {
+                argv.push("--language", language);
+            }
+            try {
+                const { stdout, stderr } = (await execFileAsync(pythonExe, argv, {
+                    timeout: timeoutMs,
+                    maxBuffer: 32 * 1024 * 1024,
+                    env: {
+                        ...process.env,
+                        // Avoid unicode surprises on Windows consoles.
+                        PYTHONUTF8: "1",
+                    },
+                }));
+                const raw = String(stdout ?? "").trim();
+                if (!raw) {
+                    throw new Error(`No output from transcription helper. Stderr: ${String(stderr ?? "").trim() || "(empty)"}`);
+                }
+                const parsed = JSON.parse(raw);
+                return parsed;
+            }
+            catch (err) {
+                const msg = err?.message ?? String(err);
+                const stderr = String(err?.stderr ?? "").trim();
+                const hint = stderr.includes("Missing python dependency: faster-whisper") || msg.includes("No module named")
+                    ? "Install the python dependency first: pip install faster-whisper"
+                    : "";
+                throw new Error(`Audio transcription failed (python=\"${pythonExe}\", model=\"${model}\"). ${hint}\n${stderr || msg}`);
+            }
+        },
+    },
 ];
 //# sourceMappingURL=localFileTools.js.map