npm - @remnic/cli - Versions diffs - 9.3.681 → 9.3.682 - Mend

@remnic/cli 9.3.681 → 9.3.682

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +60 -8
package/package.json +28 -28

package/dist/index.js CHANGED Viewed

@@ -588,7 +588,7 @@ var PUBLISHED_BENCHMARK_NAMES = Object.freeze([
   "membench"
 ]);
 function isBenchRuntimeProfile(value) {
-  return value === "baseline" || value === "real" || value === "openclaw-chain";
+  return value === "baseline" || value === "real" || value === "openclaw-chain" || value === "local-lab";
 }
 function parseBenchRuntimeProfile(value, flagName) {
   if (isBenchRuntimeProfile(value)) {
@@ -596,11 +596,11 @@ function parseBenchRuntimeProfile(value, flagName) {
   }
   if (flagName === "--runtime-profile") {
     throw new Error(
-      'ERROR: --runtime-profile must be "baseline", "real", or "openclaw-chain".'
+      'ERROR: --runtime-profile must be "baseline", "real", "openclaw-chain", or "local-lab".'
     );
   }
   throw new Error(
-    'ERROR: --matrix must contain only "baseline", "real", or "openclaw-chain".'
+    'ERROR: --matrix must contain only "baseline", "real", "openclaw-chain", or "local-lab".'
   );
 }
 var BENCH_PROVIDER_ALLOWED = Object.freeze([
@@ -695,7 +695,8 @@ var BENCH_VALUE_FLAGS = Object.freeze([
   "--ama-bench-cross-judge-model",
   "--ama-bench-cross-judge-base-url",
   "--ama-bench-cross-judge-api-key",
-  "--ama-bench-cross-judge-codex-reasoning-effort"
+  "--ama-bench-cross-judge-codex-reasoning-effort",
+  "--local-lab-manifest"
 ]);
 var BENCH_BOOLEAN_FLAGS = Object.freeze([
   "--quick",
@@ -767,7 +768,8 @@ var RUN_VALUE_FLAGS = Object.freeze([
   "--ama-bench-cross-judge-model",
   "--ama-bench-cross-judge-base-url",
   "--ama-bench-cross-judge-api-key",
-  "--ama-bench-cross-judge-codex-reasoning-effort"
+  "--ama-bench-cross-judge-codex-reasoning-effort",
+  "--local-lab-manifest"
 ]);
 var RUN_BOOLEAN_FLAGS = Object.freeze([
   "--quick",
@@ -1000,6 +1002,7 @@ function parseBenchArgs(argv) {
   const requestTimeoutRaw = readBenchOptionValue(args, "--request-timeout");
   const drainTimeoutRaw = readBenchOptionValue(args, "--drain-timeout");
   const judgeCacheDirRaw = readBenchOptionValue(args, "--judge-cache-dir");
+  const localLabManifestRaw = readBenchOptionValue(args, "--local-lab-manifest");
   const max429WaitRaw = readBenchOptionValue(args, "--max-429-wait");
   const amaBenchJudgeProtocolRaw = readBenchOptionValue(args, "--ama-bench-judge-protocol");
   const amaBenchCrossJudgeProviderRaw = readBenchOptionValue(args, "--ama-bench-cross-judge-provider");
@@ -1022,7 +1025,7 @@ function parseBenchArgs(argv) {
     const candidates = matrixRaw.split(",").map((value) => value.trim()).filter((value) => value.length > 0);
     if (candidates.length === 0) {
       throw new Error(
-        'ERROR: --matrix must contain one or more of "baseline", "real", or "openclaw-chain".'
+        'ERROR: --matrix must contain one or more of "baseline", "real", "openclaw-chain", or "local-lab".'
       );
     }
     matrixProfiles = candidates.map(
@@ -1404,6 +1407,7 @@ function parseBenchArgs(argv) {
     // Issue #1573 PR1: surface judge-cache flags into the runner options.
     noJudgeCache: args.includes("--no-judge-cache"),
     judgeCacheDir: judgeCacheDirRaw ? path4.resolve(expandTilde(judgeCacheDirRaw)) : void 0,
+    localLabManifestPath: localLabManifestRaw ? path4.resolve(expandTilde(localLabManifestRaw)) : void 0,
     max429WaitMs,
     disableThinking: args.includes("--disable-thinking"),
     amaBenchJudgeProtocol,
@@ -3252,7 +3256,7 @@ Commands:
 Options:
   --quick                  Run a lightweight quick pass (maps to --lightweight --limit 1)
   --all                    Run every published benchmark
-  --runtime-profile <baseline|real|openclaw-chain>
+  --runtime-profile <baseline|real|openclaw-chain|local-lab>
                            Choose the benchmark runtime profile
   --matrix <profiles>      Run a benchmark across a comma-separated profile matrix
   --dataset-dir <path>     Override the benchmark dataset directory for full runs
@@ -3304,6 +3308,8 @@ Options:
   --baselines-dir <path>   Override the named baseline directory
   --request-timeout <ms>   Provider request timeout in milliseconds
   --drain-timeout <ms>     Memory drain timeout in milliseconds (defaults to --request-timeout when unset)
+  --local-lab-manifest <path>
+                           Path to a local-lab manifest JSON file (required for --runtime-profile local-lab)
   --threshold <value>      Regression threshold for compare (default: 0.05)
   --trial-limit <n>        Cap scored LoCoMo or MemoryAgentBench QA trials for staged published runs
   --task-filter <pattern>  BEAM diagnostic filter; match task id, ability, or question text
@@ -3373,7 +3379,8 @@ function buildBenchRuntimeProfileRequest(parsed, runtimeProfile) {
     drainTimeout: parsed.drainTimeout,
     max429WaitMs: parsed.max429WaitMs,
     disableThinking: parsed.disableThinking,
-    lcmObserveConcurrency: parsed.publishedIngestConcurrency
+    lcmObserveConcurrency: parsed.publishedIngestConcurrency,
+    localLabManifestPath: runtimeProfile === "local-lab" ? parsed.localLabManifestPath : void 0
   };
 }
 var BENCH_STDOUT_REDACTED_SECRET = "[REDACTED]";
@@ -3493,6 +3500,11 @@ async function runBenchViaFallback(parsed, benchmarkId, runtimeProfile) {
       'Fallback benchmark runner does not support --runtime-profile "openclaw-chain". Build/install @remnic/bench to use package-backed runtime profiles.'
     );
   }
+  if (runtimeProfile === "local-lab") {
+    throw new Error(
+      'Fallback benchmark runner does not support --runtime-profile "local-lab". Build/install @remnic/bench to use package-backed runtime profiles with local-lab manifests.'
+    );
+  }
   const unsupportedOptions = findUnsupportedFallbackBenchOptions(parsed);
   if (unsupportedOptions.length > 0) {
     throw new Error(
@@ -4739,6 +4751,44 @@ async function loadPublishedPromotionHelpers() {
     }
   };
 }
+async function preflightLocalLabEndpointsIfNeeded(benchModule, plan) {
+  if (plan.runtime.profile !== "local-lab" || !plan.runtime.localLab || !benchModule.preflightLocalLabRole) {
+    return;
+  }
+  const localLab = plan.runtime.localLab;
+  const preflightRole = benchModule.preflightLocalLabRole;
+  const responder = localLab.responder;
+  const responderResult = await preflightRole({
+    provider: responder.provider,
+    baseUrl: responder.baseUrl,
+    model: responder.model,
+    ctx: responder.ctx
+  });
+  if (!responderResult.ok) {
+    throw new Error(
+      `local-lab responder endpoint preflight failed: ${responderResult.reason}`
+    );
+  }
+  const judge = localLab.judge;
+  const stripSlash = (url) => url.endsWith("/") ? url.slice(0, -1) : url;
+  const sameEndpoint = stripSlash(responder.baseUrl) === stripSlash(judge.baseUrl);
+  if (!sameEndpoint) {
+    throw new Error(
+      "local-lab multi-endpoint sequential phase execution is not yet wired into the benchmark runner (PR3 calibration scope). The published harness runs recall\u2192answer\u2192judge per trial, which requires both models to be co-resident. Use a single-endpoint profile (both responder and judge models on one Ollama instance) or wait for PR3's runSequentialPhases integration."
+    );
+  }
+  const judgeResult = await preflightRole({
+    provider: judge.provider,
+    baseUrl: judge.baseUrl,
+    model: judge.model,
+    ctx: judge.ctx
+  });
+  if (!judgeResult.ok) {
+    throw new Error(
+      `local-lab judge endpoint preflight failed: ${judgeResult.reason}`
+    );
+  }
+}
 async function runBenchViaPackage(parsed, benchmarkId, runtimeProfile, benchStatusPath) {
   const loaded = await tryLoadBenchModule();
   if (!loaded) return { ok: false };
@@ -4760,6 +4810,7 @@ async function runBenchViaPackage(parsed, benchmarkId, runtimeProfile, benchStat
   if (!plan) {
     return { ok: false };
   }
+  await preflightLocalLabEndpointsIfNeeded(benchModule, plan);
   const outputDir = parsed.resultsDir ?? resolveBenchOutputDir();
   const datasetDir = resolveBenchDatasetDir(
     benchmarkId,
@@ -5010,6 +5061,7 @@ async function runCustomBenchViaPackage(parsed) {
   const writtenPaths = [];
   const customBenchmarkIds = [];
   for (const plan of plans) {
+    await preflightLocalLabEndpointsIfNeeded(benchModule, plan);
     const system = await plan.createAdapter(plan.runtime.adapterOptions);
     try {
       const result = await benchModule.runCustomBenchmarkFile(parsed.custom, {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@remnic/cli",
-  "version": "9.3.681",
+  "version": "9.3.682",
   "description": "CLI for Remnic memory — init, query, doctor, daemon management",
   "type": "module",
   "main": "dist/index.js",
@@ -26,23 +26,23 @@
   },
   "dependencies": {
     "yaml": "^2.4.2",
-    "@remnic/server": "^9.3.681",
-    "@remnic/core": "^9.3.681",
-    "@remnic/plugin-pi": "^9.3.681"
+    "@remnic/plugin-pi": "^9.3.682",
+    "@remnic/server": "^9.3.682",
+    "@remnic/core": "^9.3.682"
   },
   "peerDependencies": {
-    "@remnic/bench": "^9.3.681",
-    "@remnic/export-weclone": "^9.3.681",
-    "@remnic/import-weclone": "^9.3.681",
-    "@remnic/import-chatgpt": "^9.3.681",
-    "@remnic/import-claude": "^9.3.681",
-    "@remnic/import-gemini": "^9.3.681",
-    "@remnic/import-lossless-claw": "^9.3.681",
-    "@remnic/import-mem0": "^9.3.681",
-    "@remnic/import-supermemory": "^9.3.681",
-    "@remnic/connector-limitless": "^9.3.681",
-    "@remnic/connector-bee": "^9.3.681",
-    "@remnic/connector-omi": "^9.3.681"
+    "@remnic/bench": "^9.3.682",
+    "@remnic/export-weclone": "^9.3.682",
+    "@remnic/import-weclone": "^9.3.682",
+    "@remnic/import-chatgpt": "^9.3.682",
+    "@remnic/import-claude": "^9.3.682",
+    "@remnic/import-gemini": "^9.3.682",
+    "@remnic/import-lossless-claw": "^9.3.682",
+    "@remnic/import-mem0": "^9.3.682",
+    "@remnic/import-supermemory": "^9.3.682",
+    "@remnic/connector-limitless": "^9.3.682",
+    "@remnic/connector-bee": "^9.3.682",
+    "@remnic/connector-omi": "^9.3.682"
   },
   "peerDependenciesMeta": {
     "@remnic/bench": {
@@ -85,18 +85,18 @@
   "devDependencies": {
     "tsup": "^8.5.1",
     "typescript": "^5.9.3",
-    "@remnic/bench": "9.3.681",
-    "@remnic/export-weclone": "9.3.681",
-    "@remnic/import-weclone": "9.3.681",
-    "@remnic/import-chatgpt": "9.3.681",
-    "@remnic/import-claude": "9.3.681",
-    "@remnic/import-gemini": "9.3.681",
-    "@remnic/import-lossless-claw": "9.3.681",
-    "@remnic/import-mem0": "9.3.681",
-    "@remnic/connector-limitless": "9.3.681",
-    "@remnic/connector-bee": "9.3.681",
-    "@remnic/connector-omi": "9.3.681",
-    "@remnic/import-supermemory": "9.3.681"
+    "@remnic/bench": "9.3.682",
+    "@remnic/export-weclone": "9.3.682",
+    "@remnic/import-weclone": "9.3.682",
+    "@remnic/import-claude": "9.3.682",
+    "@remnic/import-chatgpt": "9.3.682",
+    "@remnic/import-lossless-claw": "9.3.682",
+    "@remnic/import-supermemory": "9.3.682",
+    "@remnic/import-mem0": "9.3.682",
+    "@remnic/import-gemini": "9.3.682",
+    "@remnic/connector-bee": "9.3.682",
+    "@remnic/connector-limitless": "9.3.682",
+    "@remnic/connector-omi": "9.3.682"
   },
   "license": "MIT",
   "repository": {