@remnic/cli 9.3.681 → 9.3.682

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +60 -8
  2. package/package.json +28 -28
package/dist/index.js CHANGED
@@ -588,7 +588,7 @@ var PUBLISHED_BENCHMARK_NAMES = Object.freeze([
588
588
  "membench"
589
589
  ]);
590
590
  function isBenchRuntimeProfile(value) {
591
- return value === "baseline" || value === "real" || value === "openclaw-chain";
591
+ return value === "baseline" || value === "real" || value === "openclaw-chain" || value === "local-lab";
592
592
  }
593
593
  function parseBenchRuntimeProfile(value, flagName) {
594
594
  if (isBenchRuntimeProfile(value)) {
@@ -596,11 +596,11 @@ function parseBenchRuntimeProfile(value, flagName) {
596
596
  }
597
597
  if (flagName === "--runtime-profile") {
598
598
  throw new Error(
599
- 'ERROR: --runtime-profile must be "baseline", "real", or "openclaw-chain".'
599
+ 'ERROR: --runtime-profile must be "baseline", "real", "openclaw-chain", or "local-lab".'
600
600
  );
601
601
  }
602
602
  throw new Error(
603
- 'ERROR: --matrix must contain only "baseline", "real", or "openclaw-chain".'
603
+ 'ERROR: --matrix must contain only "baseline", "real", "openclaw-chain", or "local-lab".'
604
604
  );
605
605
  }
606
606
  var BENCH_PROVIDER_ALLOWED = Object.freeze([
@@ -695,7 +695,8 @@ var BENCH_VALUE_FLAGS = Object.freeze([
695
695
  "--ama-bench-cross-judge-model",
696
696
  "--ama-bench-cross-judge-base-url",
697
697
  "--ama-bench-cross-judge-api-key",
698
- "--ama-bench-cross-judge-codex-reasoning-effort"
698
+ "--ama-bench-cross-judge-codex-reasoning-effort",
699
+ "--local-lab-manifest"
699
700
  ]);
700
701
  var BENCH_BOOLEAN_FLAGS = Object.freeze([
701
702
  "--quick",
@@ -767,7 +768,8 @@ var RUN_VALUE_FLAGS = Object.freeze([
767
768
  "--ama-bench-cross-judge-model",
768
769
  "--ama-bench-cross-judge-base-url",
769
770
  "--ama-bench-cross-judge-api-key",
770
- "--ama-bench-cross-judge-codex-reasoning-effort"
771
+ "--ama-bench-cross-judge-codex-reasoning-effort",
772
+ "--local-lab-manifest"
771
773
  ]);
772
774
  var RUN_BOOLEAN_FLAGS = Object.freeze([
773
775
  "--quick",
@@ -1000,6 +1002,7 @@ function parseBenchArgs(argv) {
1000
1002
  const requestTimeoutRaw = readBenchOptionValue(args, "--request-timeout");
1001
1003
  const drainTimeoutRaw = readBenchOptionValue(args, "--drain-timeout");
1002
1004
  const judgeCacheDirRaw = readBenchOptionValue(args, "--judge-cache-dir");
1005
+ const localLabManifestRaw = readBenchOptionValue(args, "--local-lab-manifest");
1003
1006
  const max429WaitRaw = readBenchOptionValue(args, "--max-429-wait");
1004
1007
  const amaBenchJudgeProtocolRaw = readBenchOptionValue(args, "--ama-bench-judge-protocol");
1005
1008
  const amaBenchCrossJudgeProviderRaw = readBenchOptionValue(args, "--ama-bench-cross-judge-provider");
@@ -1022,7 +1025,7 @@ function parseBenchArgs(argv) {
1022
1025
  const candidates = matrixRaw.split(",").map((value) => value.trim()).filter((value) => value.length > 0);
1023
1026
  if (candidates.length === 0) {
1024
1027
  throw new Error(
1025
- 'ERROR: --matrix must contain one or more of "baseline", "real", or "openclaw-chain".'
1028
+ 'ERROR: --matrix must contain one or more of "baseline", "real", "openclaw-chain", or "local-lab".'
1026
1029
  );
1027
1030
  }
1028
1031
  matrixProfiles = candidates.map(
@@ -1404,6 +1407,7 @@ function parseBenchArgs(argv) {
1404
1407
  // Issue #1573 PR1: surface judge-cache flags into the runner options.
1405
1408
  noJudgeCache: args.includes("--no-judge-cache"),
1406
1409
  judgeCacheDir: judgeCacheDirRaw ? path4.resolve(expandTilde(judgeCacheDirRaw)) : void 0,
1410
+ localLabManifestPath: localLabManifestRaw ? path4.resolve(expandTilde(localLabManifestRaw)) : void 0,
1407
1411
  max429WaitMs,
1408
1412
  disableThinking: args.includes("--disable-thinking"),
1409
1413
  amaBenchJudgeProtocol,
@@ -3252,7 +3256,7 @@ Commands:
3252
3256
  Options:
3253
3257
  --quick Run a lightweight quick pass (maps to --lightweight --limit 1)
3254
3258
  --all Run every published benchmark
3255
- --runtime-profile <baseline|real|openclaw-chain>
3259
+ --runtime-profile <baseline|real|openclaw-chain|local-lab>
3256
3260
  Choose the benchmark runtime profile
3257
3261
  --matrix <profiles> Run a benchmark across a comma-separated profile matrix
3258
3262
  --dataset-dir <path> Override the benchmark dataset directory for full runs
@@ -3304,6 +3308,8 @@ Options:
3304
3308
  --baselines-dir <path> Override the named baseline directory
3305
3309
  --request-timeout <ms> Provider request timeout in milliseconds
3306
3310
  --drain-timeout <ms> Memory drain timeout in milliseconds (defaults to --request-timeout when unset)
3311
+ --local-lab-manifest <path>
3312
+ Path to a local-lab manifest JSON file (required for --runtime-profile local-lab)
3307
3313
  --threshold <value> Regression threshold for compare (default: 0.05)
3308
3314
  --trial-limit <n> Cap scored LoCoMo or MemoryAgentBench QA trials for staged published runs
3309
3315
  --task-filter <pattern> BEAM diagnostic filter; match task id, ability, or question text
@@ -3373,7 +3379,8 @@ function buildBenchRuntimeProfileRequest(parsed, runtimeProfile) {
3373
3379
  drainTimeout: parsed.drainTimeout,
3374
3380
  max429WaitMs: parsed.max429WaitMs,
3375
3381
  disableThinking: parsed.disableThinking,
3376
- lcmObserveConcurrency: parsed.publishedIngestConcurrency
3382
+ lcmObserveConcurrency: parsed.publishedIngestConcurrency,
3383
+ localLabManifestPath: runtimeProfile === "local-lab" ? parsed.localLabManifestPath : void 0
3377
3384
  };
3378
3385
  }
3379
3386
  var BENCH_STDOUT_REDACTED_SECRET = "[REDACTED]";
@@ -3493,6 +3500,11 @@ async function runBenchViaFallback(parsed, benchmarkId, runtimeProfile) {
3493
3500
  'Fallback benchmark runner does not support --runtime-profile "openclaw-chain". Build/install @remnic/bench to use package-backed runtime profiles.'
3494
3501
  );
3495
3502
  }
3503
+ if (runtimeProfile === "local-lab") {
3504
+ throw new Error(
3505
+ 'Fallback benchmark runner does not support --runtime-profile "local-lab". Build/install @remnic/bench to use package-backed runtime profiles with local-lab manifests.'
3506
+ );
3507
+ }
3496
3508
  const unsupportedOptions = findUnsupportedFallbackBenchOptions(parsed);
3497
3509
  if (unsupportedOptions.length > 0) {
3498
3510
  throw new Error(
@@ -4739,6 +4751,44 @@ async function loadPublishedPromotionHelpers() {
4739
4751
  }
4740
4752
  };
4741
4753
  }
4754
+ async function preflightLocalLabEndpointsIfNeeded(benchModule, plan) {
4755
+ if (plan.runtime.profile !== "local-lab" || !plan.runtime.localLab || !benchModule.preflightLocalLabRole) {
4756
+ return;
4757
+ }
4758
+ const localLab = plan.runtime.localLab;
4759
+ const preflightRole = benchModule.preflightLocalLabRole;
4760
+ const responder = localLab.responder;
4761
+ const responderResult = await preflightRole({
4762
+ provider: responder.provider,
4763
+ baseUrl: responder.baseUrl,
4764
+ model: responder.model,
4765
+ ctx: responder.ctx
4766
+ });
4767
+ if (!responderResult.ok) {
4768
+ throw new Error(
4769
+ `local-lab responder endpoint preflight failed: ${responderResult.reason}`
4770
+ );
4771
+ }
4772
+ const judge = localLab.judge;
4773
+ const stripSlash = (url) => url.endsWith("/") ? url.slice(0, -1) : url;
4774
+ const sameEndpoint = stripSlash(responder.baseUrl) === stripSlash(judge.baseUrl);
4775
+ if (!sameEndpoint) {
4776
+ throw new Error(
4777
+ "local-lab multi-endpoint sequential phase execution is not yet wired into the benchmark runner (PR3 calibration scope). The published harness runs recall\u2192answer\u2192judge per trial, which requires both models to be co-resident. Use a single-endpoint profile (both responder and judge models on one Ollama instance) or wait for PR3's runSequentialPhases integration."
4778
+ );
4779
+ }
4780
+ const judgeResult = await preflightRole({
4781
+ provider: judge.provider,
4782
+ baseUrl: judge.baseUrl,
4783
+ model: judge.model,
4784
+ ctx: judge.ctx
4785
+ });
4786
+ if (!judgeResult.ok) {
4787
+ throw new Error(
4788
+ `local-lab judge endpoint preflight failed: ${judgeResult.reason}`
4789
+ );
4790
+ }
4791
+ }
4742
4792
  async function runBenchViaPackage(parsed, benchmarkId, runtimeProfile, benchStatusPath) {
4743
4793
  const loaded = await tryLoadBenchModule();
4744
4794
  if (!loaded) return { ok: false };
@@ -4760,6 +4810,7 @@ async function runBenchViaPackage(parsed, benchmarkId, runtimeProfile, benchStat
4760
4810
  if (!plan) {
4761
4811
  return { ok: false };
4762
4812
  }
4813
+ await preflightLocalLabEndpointsIfNeeded(benchModule, plan);
4763
4814
  const outputDir = parsed.resultsDir ?? resolveBenchOutputDir();
4764
4815
  const datasetDir = resolveBenchDatasetDir(
4765
4816
  benchmarkId,
@@ -5010,6 +5061,7 @@ async function runCustomBenchViaPackage(parsed) {
5010
5061
  const writtenPaths = [];
5011
5062
  const customBenchmarkIds = [];
5012
5063
  for (const plan of plans) {
5064
+ await preflightLocalLabEndpointsIfNeeded(benchModule, plan);
5013
5065
  const system = await plan.createAdapter(plan.runtime.adapterOptions);
5014
5066
  try {
5015
5067
  const result = await benchModule.runCustomBenchmarkFile(parsed.custom, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@remnic/cli",
3
- "version": "9.3.681",
3
+ "version": "9.3.682",
4
4
  "description": "CLI for Remnic memory — init, query, doctor, daemon management",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -26,23 +26,23 @@
26
26
  },
27
27
  "dependencies": {
28
28
  "yaml": "^2.4.2",
29
- "@remnic/server": "^9.3.681",
30
- "@remnic/core": "^9.3.681",
31
- "@remnic/plugin-pi": "^9.3.681"
29
+ "@remnic/plugin-pi": "^9.3.682",
30
+ "@remnic/server": "^9.3.682",
31
+ "@remnic/core": "^9.3.682"
32
32
  },
33
33
  "peerDependencies": {
34
- "@remnic/bench": "^9.3.681",
35
- "@remnic/export-weclone": "^9.3.681",
36
- "@remnic/import-weclone": "^9.3.681",
37
- "@remnic/import-chatgpt": "^9.3.681",
38
- "@remnic/import-claude": "^9.3.681",
39
- "@remnic/import-gemini": "^9.3.681",
40
- "@remnic/import-lossless-claw": "^9.3.681",
41
- "@remnic/import-mem0": "^9.3.681",
42
- "@remnic/import-supermemory": "^9.3.681",
43
- "@remnic/connector-limitless": "^9.3.681",
44
- "@remnic/connector-bee": "^9.3.681",
45
- "@remnic/connector-omi": "^9.3.681"
34
+ "@remnic/bench": "^9.3.682",
35
+ "@remnic/export-weclone": "^9.3.682",
36
+ "@remnic/import-weclone": "^9.3.682",
37
+ "@remnic/import-chatgpt": "^9.3.682",
38
+ "@remnic/import-claude": "^9.3.682",
39
+ "@remnic/import-gemini": "^9.3.682",
40
+ "@remnic/import-lossless-claw": "^9.3.682",
41
+ "@remnic/import-mem0": "^9.3.682",
42
+ "@remnic/import-supermemory": "^9.3.682",
43
+ "@remnic/connector-limitless": "^9.3.682",
44
+ "@remnic/connector-bee": "^9.3.682",
45
+ "@remnic/connector-omi": "^9.3.682"
46
46
  },
47
47
  "peerDependenciesMeta": {
48
48
  "@remnic/bench": {
@@ -85,18 +85,18 @@
85
85
  "devDependencies": {
86
86
  "tsup": "^8.5.1",
87
87
  "typescript": "^5.9.3",
88
- "@remnic/bench": "9.3.681",
89
- "@remnic/export-weclone": "9.3.681",
90
- "@remnic/import-weclone": "9.3.681",
91
- "@remnic/import-chatgpt": "9.3.681",
92
- "@remnic/import-claude": "9.3.681",
93
- "@remnic/import-gemini": "9.3.681",
94
- "@remnic/import-lossless-claw": "9.3.681",
95
- "@remnic/import-mem0": "9.3.681",
96
- "@remnic/connector-limitless": "9.3.681",
97
- "@remnic/connector-bee": "9.3.681",
98
- "@remnic/connector-omi": "9.3.681",
99
- "@remnic/import-supermemory": "9.3.681"
88
+ "@remnic/bench": "9.3.682",
89
+ "@remnic/export-weclone": "9.3.682",
90
+ "@remnic/import-weclone": "9.3.682",
91
+ "@remnic/import-claude": "9.3.682",
92
+ "@remnic/import-chatgpt": "9.3.682",
93
+ "@remnic/import-lossless-claw": "9.3.682",
94
+ "@remnic/import-supermemory": "9.3.682",
95
+ "@remnic/import-mem0": "9.3.682",
96
+ "@remnic/import-gemini": "9.3.682",
97
+ "@remnic/connector-bee": "9.3.682",
98
+ "@remnic/connector-limitless": "9.3.682",
99
+ "@remnic/connector-omi": "9.3.682"
100
100
  },
101
101
  "license": "MIT",
102
102
  "repository": {