@ai-sdk-tool/eval 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright 2025 Woonggi Min (https://github.com/minpeter)
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
package/README.md CHANGED
@@ -122,3 +122,7 @@ export const politenessBenchmark: LanguageModelV2Benchmark = {
122
122
  // benchmarks: [politenessBenchmark],
123
123
  // });
124
124
  ```
125
+
126
+ ## License
127
+
128
+ Licensed under Apache License 2.0. See the repository `LICENSE`. Include the `NOTICE` file in distributions.
package/dist/index.cjs CHANGED
@@ -281,13 +281,13 @@ var reporters = {
281
281
  };
282
282
 
283
283
  // src/evaluate.ts
284
- async function runSingleBenchmark(model, benchmark, modelKey) {
284
+ async function runSingleBenchmark(model, benchmark, modelKey, config) {
285
285
  const modelId = typeof model === "object" && model !== null && "modelId" in model && typeof model.modelId === "string" ? model.modelId : "unknown-model";
286
286
  try {
287
287
  console.log(
288
288
  `[${modelId}]${modelKey ? ` (${modelKey})` : ""} Running benchmark: ${benchmark.name}...`
289
289
  );
290
- const result = await benchmark.run(model);
290
+ const result = await benchmark.run(model, config);
291
291
  console.log(
292
292
  `[${modelId}]${modelKey ? ` (${modelKey})` : ""} Finished benchmark: ${benchmark.name}. Score: ${result.score}`
293
293
  );
@@ -316,7 +316,7 @@ async function runSingleBenchmark(model, benchmark, modelKey) {
316
316
  }
317
317
  }
318
318
  async function evaluate(options) {
319
- const { models, benchmarks, reporter = "console" } = options;
319
+ const { models, benchmarks, reporter = "console", temperature } = options;
320
320
  const modelEntries = [];
321
321
  if (Array.isArray(models)) {
322
322
  for (const m of models) modelEntries.push([void 0, m]);
@@ -335,7 +335,8 @@ async function evaluate(options) {
335
335
  const evaluationResult = await runSingleBenchmark(
336
336
  model,
337
337
  benchmark,
338
- modelKey
338
+ modelKey,
339
+ temperature !== void 0 ? { temperature } : void 0
339
340
  );
340
341
  allResults.push(evaluationResult);
341
342
  }
@@ -675,7 +676,7 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
675
676
  name,
676
677
  version: "1.0.0",
677
678
  description,
678
- async run(model) {
679
+ async run(model, config) {
679
680
  const logs = [];
680
681
  let correctCount = 0;
681
682
  let testCases = [];
@@ -733,6 +734,8 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
733
734
  const runSingleCase = async (testCase) => {
734
735
  const caseLogs = [];
735
736
  const { function: tools, question: messages } = testCase;
737
+ const temp = config?.temperature;
738
+ const temperature = typeof temp === "number" ? temp : void 0;
736
739
  try {
737
740
  const flatMessages = Array.isArray(messages) && messages.some((m) => Array.isArray(m)) ? messages.flat(1) : messages;
738
741
  const nameMap = /* @__PURE__ */ new Map();
@@ -778,6 +781,7 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
778
781
  messages: flatMessages,
779
782
  tools: toolsMap,
780
783
  toolChoice: "auto",
784
+ ...temperature !== void 0 ? { temperature } : {},
781
785
  // Pass original schema information to middleware
782
786
  providerOptions: {
783
787
  toolCallMiddleware: {
@@ -1174,7 +1178,7 @@ var jsonGenerationBenchmark = {
1174
1178
  name: "json-generation",
1175
1179
  version: "2.1.0",
1176
1180
  description: "Evaluates schema-compliant JSON generation from natural language using JSON Schema prompts.",
1177
- async run(model) {
1181
+ async run(model, config) {
1178
1182
  const logs = [];
1179
1183
  const ajv = new import_ajv.default({ allErrors: true, strict: false });
1180
1184
  let schemaValidCount = 0;
@@ -1225,7 +1229,13 @@ var jsonGenerationBenchmark = {
1225
1229
  ].join("\n\n")
1226
1230
  }
1227
1231
  ];
1228
- const { text } = await (0, import_ai2.generateText)({ model, messages });
1232
+ const temp = config?.temperature;
1233
+ const temperature = typeof temp === "number" ? temp : void 0;
1234
+ const { text } = await (0, import_ai2.generateText)({
1235
+ model,
1236
+ messages,
1237
+ ...temperature !== void 0 ? { temperature } : {}
1238
+ });
1229
1239
  let parsed;
1230
1240
  try {
1231
1241
  parsed = extractFirstJsonBlock(text);
@@ -1285,7 +1295,7 @@ var jsonGenerationSchemaOnlyBenchmark = {
1285
1295
  name: "json-generation-schema-only",
1286
1296
  version: "1.0.1",
1287
1297
  description: "Evaluates whether model outputs strictly conform to the provided JSON Schema (structure only).",
1288
- async run(model) {
1298
+ async run(model, config) {
1289
1299
  const logs = [];
1290
1300
  const ajv = new import_ajv.default({ allErrors: true, strict: false });
1291
1301
  let tests = [];
@@ -1327,7 +1337,13 @@ var jsonGenerationSchemaOnlyBenchmark = {
1327
1337
  ].join("\n\n")
1328
1338
  }
1329
1339
  ];
1330
- const { text } = await (0, import_ai2.generateText)({ model, messages });
1340
+ const temp = config?.temperature;
1341
+ const temperature = typeof temp === "number" ? temp : void 0;
1342
+ const { text } = await (0, import_ai2.generateText)({
1343
+ model,
1344
+ messages,
1345
+ ...temperature !== void 0 ? { temperature } : {}
1346
+ });
1331
1347
  let parsed;
1332
1348
  try {
1333
1349
  parsed = extractFirstJsonBlock(text);