@tangle-network/agent-eval 0.69.0 → 0.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/dist/adapters/http.d.ts +1 -1
  3. package/dist/adapters/http.js +1 -1
  4. package/dist/adapters/http.js.map +1 -1
  5. package/dist/adapters/langchain.d.ts +1 -1
  6. package/dist/adapters/otel.d.ts +2 -2
  7. package/dist/campaign/index.d.ts +19 -7
  8. package/dist/campaign/index.js +29 -5
  9. package/dist/campaign/index.js.map +1 -1
  10. package/dist/{chunk-E24XD7A2.js → chunk-6QZUCFKM.js} +2 -2
  11. package/dist/{chunk-3B7Y5AUR.js → chunk-GWGO2K6Y.js} +3 -2
  12. package/dist/chunk-GWGO2K6Y.js.map +1 -0
  13. package/dist/{chunk-Z4ZCBC7M.js → chunk-ODGETRTM.js} +4 -3
  14. package/dist/chunk-ODGETRTM.js.map +1 -0
  15. package/dist/{chunk-JFGZPUMU.js → chunk-VMAYE3LM.js} +26 -6
  16. package/dist/chunk-VMAYE3LM.js.map +1 -0
  17. package/dist/contract/index.d.ts +8 -8
  18. package/dist/contract/index.js +2 -2
  19. package/dist/hosted/index.d.ts +2 -2
  20. package/dist/{index-DSEHMwvS.d.ts → index-BGBrVS24.d.ts} +1 -1
  21. package/dist/index.d.ts +7 -2
  22. package/dist/index.js +21 -16
  23. package/dist/index.js.map +1 -1
  24. package/dist/openapi.json +1 -1
  25. package/dist/pipelines/index.js +1 -1
  26. package/dist/{provenance-CChUqexv.d.ts → provenance-C69gLUXH.d.ts} +3 -3
  27. package/dist/rl.d.ts +156 -2
  28. package/dist/rl.js +195 -6
  29. package/dist/rl.js.map +1 -1
  30. package/dist/{run-improvement-loop-BKpM5T4t.d.ts → run-improvement-loop-Bzamo6GB.d.ts} +1 -1
  31. package/dist/traces.js +1 -1
  32. package/dist/{types-c2R2kfmv.d.ts → types-CnmZ2bkP.d.ts} +7 -1
  33. package/package.json +1 -1
  34. package/dist/chunk-3B7Y5AUR.js.map +0 -1
  35. package/dist/chunk-JFGZPUMU.js.map +0 -1
  36. package/dist/chunk-Z4ZCBC7M.js.map +0 -1
  37. /package/dist/{chunk-E24XD7A2.js.map → chunk-6QZUCFKM.js.map} +0 -0
package/dist/index.js CHANGED
@@ -31,7 +31,7 @@ import {
31
31
  scoreRedTeamOutput,
32
32
  surfaceContentHash,
33
33
  toolNamesForRun
34
- } from "./chunk-JFGZPUMU.js";
34
+ } from "./chunk-VMAYE3LM.js";
35
35
  import {
36
36
  BackendIntegrityError,
37
37
  assertRealBackend,
@@ -49,7 +49,7 @@ import {
49
49
  computeToolUseMetrics,
50
50
  iqr,
51
51
  welchsTTest
52
- } from "./chunk-3B7Y5AUR.js";
52
+ } from "./chunk-GWGO2K6Y.js";
53
53
  import {
54
54
  exportTrainingData,
55
55
  toNdjson
@@ -204,7 +204,7 @@ import {
204
204
  tokenizeDomainWords,
205
205
  traceAnalystFunctionGroup,
206
206
  traceAnalystOnRunComplete
207
- } from "./chunk-Z4ZCBC7M.js";
207
+ } from "./chunk-ODGETRTM.js";
208
208
  import {
209
209
  DEFAULT_REDACTION_RULES,
210
210
  REDACTION_VERSION,
@@ -2595,14 +2595,15 @@ async function executeScenario(tc, scenario, config) {
2595
2595
  const content = resp.choices?.[0]?.message?.content ?? "";
2596
2596
  messages.push({ role: "assistant", content });
2597
2597
  const codeRe = /```(\w+)?\n([\s\S]*?)```/g;
2598
- let codeMatch;
2599
- while ((codeMatch = codeRe.exec(content)) !== null) {
2598
+ let codeMatch = codeRe.exec(content);
2599
+ while (codeMatch !== null) {
2600
2600
  allCodeBlocks.push({ language: codeMatch[1] ?? "text", code: codeMatch[2] ?? "" });
2601
+ codeMatch = codeRe.exec(content);
2601
2602
  }
2602
2603
  const turnBlocks = [];
2603
- let blockMatch;
2604
2604
  const blockReLocal = new RegExp(blockRe.source, blockRe.flags);
2605
- while ((blockMatch = blockReLocal.exec(content)) !== null) {
2605
+ let blockMatch = blockReLocal.exec(content);
2606
+ while (blockMatch !== null) {
2606
2607
  const fields = {};
2607
2608
  for (const line of (blockMatch[2] ?? "").split("\n")) {
2608
2609
  const idx = line.indexOf(":");
@@ -2611,15 +2612,17 @@ async function executeScenario(tc, scenario, config) {
2611
2612
  const blockType = blockMatch[1] ?? "";
2612
2613
  allBlocks.push({ type: blockType, fields });
2613
2614
  turnBlocks.push({ type: blockType, title: fields.title ?? "" });
2615
+ blockMatch = blockReLocal.exec(content);
2614
2616
  }
2615
2617
  let hasToolCall = false;
2616
2618
  if (config.toolCallPatterns) {
2617
2619
  for (const pattern of config.toolCallPatterns) {
2618
2620
  const re = new RegExp(pattern.source, pattern.flags);
2619
- let toolMatch;
2620
- while ((toolMatch = re.exec(content)) !== null) {
2621
+ let toolMatch = re.exec(content);
2622
+ while (toolMatch !== null) {
2621
2623
  allToolCalls.push(toolMatch[0]);
2622
2624
  hasToolCall = true;
2625
+ toolMatch = re.exec(content);
2623
2626
  }
2624
2627
  }
2625
2628
  }
@@ -2941,14 +2944,15 @@ var ProductClient = class {
2941
2944
  }
2942
2945
  }
2943
2946
  const blockRe = /:::(\w+)\s*\n([\s\S]*?)\n\s*:::/g;
2944
- let match;
2945
- while ((match = blockRe.exec(text)) !== null) {
2947
+ let match = blockRe.exec(text);
2948
+ while (match !== null) {
2946
2949
  const fields = {};
2947
2950
  for (const line of match[2].split("\n")) {
2948
2951
  const idx = line.indexOf(":");
2949
2952
  if (idx > 0) fields[line.slice(0, idx).trim()] = line.slice(idx + 1).trim();
2950
2953
  }
2951
2954
  blocks.push({ type: match[1], title: fields.title ?? "" });
2955
+ match = blockRe.exec(text);
2952
2956
  }
2953
2957
  return { text, blocks };
2954
2958
  }
@@ -4680,8 +4684,8 @@ function analyzeAntiSlop(outputs, config) {
4680
4684
  const lower = output.toLowerCase();
4681
4685
  for (const phrase of config.bannedPhrases) {
4682
4686
  const needle = phrase.toLowerCase();
4683
- let idx = 0;
4684
- while ((idx = lower.indexOf(needle, idx)) !== -1) {
4687
+ let idx = lower.indexOf(needle, 0);
4688
+ while (idx !== -1) {
4685
4689
  counts.banned_phrase += 1;
4686
4690
  if (issues.length < 20) {
4687
4691
  issues.push({
@@ -4690,7 +4694,7 @@ function analyzeAntiSlop(outputs, config) {
4690
4694
  example: snippet(output, idx, phrase.length)
4691
4695
  });
4692
4696
  }
4693
- idx += needle.length;
4697
+ idx = lower.indexOf(needle, idx + needle.length);
4694
4698
  }
4695
4699
  }
4696
4700
  for (const re of config.bannedOpenings) {
@@ -8623,13 +8627,14 @@ function extractAssetUrls(html, baseUrl) {
8623
8627
  const linkRe = /<link\b[^>]*\bhref\s*=\s*["']([^"']+)["'][^>]*>/gi;
8624
8628
  const scriptRe = /<script\b[^>]*\bsrc\s*=\s*["']([^"']+)["'][^>]*>/gi;
8625
8629
  for (const re of [linkRe, scriptRe]) {
8626
- let match;
8627
- while ((match = re.exec(html)) !== null) {
8630
+ let match = re.exec(html);
8631
+ while (match !== null) {
8628
8632
  const raw = match[1];
8629
8633
  try {
8630
8634
  urls.add(new URL(raw, baseUrl).toString());
8631
8635
  } catch {
8632
8636
  }
8637
+ match = re.exec(html);
8633
8638
  }
8634
8639
  }
8635
8640
  return Array.from(urls);