openmates 0.12.0-alpha.12 → 0.12.0-alpha.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41830,6 +41830,7 @@ async function handleBenchmark(client, subcommand, rest, flags) {
41830
41830
  const runs = parseRuns(flags.runs);
41831
41831
  const extensiveSize = parseExtensiveSize(flags["extensive-size"]);
41832
41832
  const parallel = parseParallel(flags.parallel);
41833
+ const caseIds = parseCaseIds(flags.case);
41833
41834
  const dryRun = flags["dry-run"] === true;
41834
41835
  const output = typeof flags.output === "string" ? flags.output : void 0;
41835
41836
  const runId = typeof flags["run-id"] === "string" ? flags["run-id"] : randomUUID3();
@@ -41839,7 +41840,7 @@ async function handleBenchmark(client, subcommand, rest, flags) {
41839
41840
  "Benchmark runs spend real credits from the logged-in account. Rerun with --confirm-spend-credits, or use --dry-run to preview the plan."
41840
41841
  );
41841
41842
  }
41842
- const cases = expandCases(suites, runs, extensiveSize);
41843
+ const cases = filterCases(expandCases(suites, runs, extensiveSize), caseIds);
41843
41844
  const pricing = loadPricingForModels([...targetModels, judgeModel]);
41844
41845
  const estimate = estimateCredits(cases, targetModels, judgeModel, pricing);
41845
41846
  const result = makeBaseResult({
@@ -41893,6 +41894,7 @@ Options:
41893
41894
  --dry-run Preview the benchmark plan without inference or spend
41894
41895
  --compare Compare two or more target models
41895
41896
  --suite <list> Comma-separated suites: quick, extensive, all (default: quick)
41897
+ --case <id[,id...]> Run only specific case id(s) from the selected suites
41896
41898
  --extensive-size <n> Extensive cases to run: 5, 10, or 20 (default: ${DEFAULT_EXTENSIVE_SIZE})
41897
41899
  --runs <n> Repeat each selected case (default: 1)
41898
41900
  --parallel <n> Concurrent target case requests (default: ${DEFAULT_PARALLEL})
@@ -41941,6 +41943,24 @@ function parseParallel(value) {
41941
41943
  }
41942
41944
  return parsed;
41943
41945
  }
41946
+ function parseCaseIds(value) {
41947
+ if (value === void 0 || value === false) return [];
41948
+ if (value === true) throw new Error("--case requires a case id");
41949
+ const caseIds = value.split(",").map((caseId) => caseId.trim()).filter(Boolean);
41950
+ if (caseIds.length === 0) throw new Error("--case requires at least one case id");
41951
+ return [...new Set(caseIds)];
41952
+ }
41953
+ function filterCases(cases, caseIds) {
41954
+ if (caseIds.length === 0) return cases;
41955
+ const availableIds = new Set(cases.map((benchmarkCase) => benchmarkCase.id));
41956
+ const missing = caseIds.filter((caseId) => !availableIds.has(caseId));
41957
+ if (missing.length > 0) {
41958
+ throw new Error(
41959
+ `Unknown benchmark case id(s): ${missing.join(", ")}. Available in selected suite(s): ${[...availableIds].sort().join(", ")}`
41960
+ );
41961
+ }
41962
+ return cases.filter((benchmarkCase) => caseIds.includes(benchmarkCase.id));
41963
+ }
41944
41964
  function expandCases(suites, runs, extensiveSize) {
41945
41965
  const selected = [];
41946
41966
  if (suites.includes("quick")) selected.push(...QUICK_CASES);
@@ -42145,7 +42165,14 @@ async function uploadBenchmarkImage(client, fileEmbed) {
42145
42165
  fileEmbed.embed.status = "finished";
42146
42166
  fileEmbed.embed.contentHash = uploadResult.content_hash;
42147
42167
  fileEmbed.embed.embedId = uploadResult.embed_id;
42148
- fileEmbed.referenceBlock = createEmbedReferenceBlock(embedRef);
42168
+ fileEmbed.referenceBlock = createBenchmarkEmbedReferenceBlock(fileEmbed.embed.embedId, fileEmbed.embed.type);
42169
+ }
42170
+ function createBenchmarkEmbedReferenceBlock(embedId, embedType) {
42171
+ return `
42172
+
42173
+ \`\`\`json
42174
+ ${JSON.stringify({ type: embedType, embed_id: embedId })}
42175
+ \`\`\``;
42149
42176
  }
42150
42177
  async function judgeCase(params) {
42151
42178
  const startedAt = Date.now();
package/dist/cli.js CHANGED
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  getExtForLang,
4
4
  serializeToYaml
5
- } from "./chunk-R5Z4FBJJ.js";
5
+ } from "./chunk-YHOUQRWZ.js";
6
6
  import "./chunk-AXNRPVLE.js";
7
7
  export {
8
8
  getExtForLang,
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  deriveAppUrl,
10
10
  getExtForLang,
11
11
  serializeToYaml
12
- } from "./chunk-R5Z4FBJJ.js";
12
+ } from "./chunk-YHOUQRWZ.js";
13
13
  import "./chunk-AXNRPVLE.js";
14
14
  export {
15
15
  ASSISTANT_FEEDBACK_REPORT_TITLE,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openmates",
3
- "version": "0.12.0-alpha.12",
3
+ "version": "0.12.0-alpha.13",
4
4
  "description": "OpenMates CLI and SDK",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",