@tangle-network/agent-eval 0.69.0 → 0.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/dist/adapters/http.d.ts +1 -1
- package/dist/adapters/http.js +1 -1
- package/dist/adapters/http.js.map +1 -1
- package/dist/adapters/langchain.d.ts +1 -1
- package/dist/adapters/otel.d.ts +2 -2
- package/dist/campaign/index.d.ts +19 -7
- package/dist/campaign/index.js +29 -5
- package/dist/campaign/index.js.map +1 -1
- package/dist/{chunk-E24XD7A2.js → chunk-6QZUCFKM.js} +2 -2
- package/dist/{chunk-3B7Y5AUR.js → chunk-GWGO2K6Y.js} +3 -2
- package/dist/chunk-GWGO2K6Y.js.map +1 -0
- package/dist/{chunk-Z4ZCBC7M.js → chunk-ODGETRTM.js} +4 -3
- package/dist/chunk-ODGETRTM.js.map +1 -0
- package/dist/{chunk-JFGZPUMU.js → chunk-VMAYE3LM.js} +26 -6
- package/dist/chunk-VMAYE3LM.js.map +1 -0
- package/dist/contract/index.d.ts +8 -8
- package/dist/contract/index.js +2 -2
- package/dist/hosted/index.d.ts +2 -2
- package/dist/{index-DSEHMwvS.d.ts → index-BGBrVS24.d.ts} +1 -1
- package/dist/index.d.ts +7 -2
- package/dist/index.js +21 -16
- package/dist/index.js.map +1 -1
- package/dist/openapi.json +1 -1
- package/dist/pipelines/index.js +1 -1
- package/dist/{provenance-CChUqexv.d.ts → provenance-C69gLUXH.d.ts} +3 -3
- package/dist/rl.d.ts +156 -2
- package/dist/rl.js +195 -6
- package/dist/rl.js.map +1 -1
- package/dist/{run-improvement-loop-BKpM5T4t.d.ts → run-improvement-loop-Bzamo6GB.d.ts} +1 -1
- package/dist/traces.js +1 -1
- package/dist/{types-c2R2kfmv.d.ts → types-CnmZ2bkP.d.ts} +7 -1
- package/package.json +1 -1
- package/dist/chunk-3B7Y5AUR.js.map +0 -1
- package/dist/chunk-JFGZPUMU.js.map +0 -1
- package/dist/chunk-Z4ZCBC7M.js.map +0 -1
- /package/dist/{chunk-E24XD7A2.js.map → chunk-6QZUCFKM.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -31,7 +31,7 @@ import {
|
|
|
31
31
|
scoreRedTeamOutput,
|
|
32
32
|
surfaceContentHash,
|
|
33
33
|
toolNamesForRun
|
|
34
|
-
} from "./chunk-
|
|
34
|
+
} from "./chunk-VMAYE3LM.js";
|
|
35
35
|
import {
|
|
36
36
|
BackendIntegrityError,
|
|
37
37
|
assertRealBackend,
|
|
@@ -49,7 +49,7 @@ import {
|
|
|
49
49
|
computeToolUseMetrics,
|
|
50
50
|
iqr,
|
|
51
51
|
welchsTTest
|
|
52
|
-
} from "./chunk-
|
|
52
|
+
} from "./chunk-GWGO2K6Y.js";
|
|
53
53
|
import {
|
|
54
54
|
exportTrainingData,
|
|
55
55
|
toNdjson
|
|
@@ -204,7 +204,7 @@ import {
|
|
|
204
204
|
tokenizeDomainWords,
|
|
205
205
|
traceAnalystFunctionGroup,
|
|
206
206
|
traceAnalystOnRunComplete
|
|
207
|
-
} from "./chunk-
|
|
207
|
+
} from "./chunk-ODGETRTM.js";
|
|
208
208
|
import {
|
|
209
209
|
DEFAULT_REDACTION_RULES,
|
|
210
210
|
REDACTION_VERSION,
|
|
@@ -2595,14 +2595,15 @@ async function executeScenario(tc, scenario, config) {
|
|
|
2595
2595
|
const content = resp.choices?.[0]?.message?.content ?? "";
|
|
2596
2596
|
messages.push({ role: "assistant", content });
|
|
2597
2597
|
const codeRe = /```(\w+)?\n([\s\S]*?)```/g;
|
|
2598
|
-
let codeMatch;
|
|
2599
|
-
while (
|
|
2598
|
+
let codeMatch = codeRe.exec(content);
|
|
2599
|
+
while (codeMatch !== null) {
|
|
2600
2600
|
allCodeBlocks.push({ language: codeMatch[1] ?? "text", code: codeMatch[2] ?? "" });
|
|
2601
|
+
codeMatch = codeRe.exec(content);
|
|
2601
2602
|
}
|
|
2602
2603
|
const turnBlocks = [];
|
|
2603
|
-
let blockMatch;
|
|
2604
2604
|
const blockReLocal = new RegExp(blockRe.source, blockRe.flags);
|
|
2605
|
-
|
|
2605
|
+
let blockMatch = blockReLocal.exec(content);
|
|
2606
|
+
while (blockMatch !== null) {
|
|
2606
2607
|
const fields = {};
|
|
2607
2608
|
for (const line of (blockMatch[2] ?? "").split("\n")) {
|
|
2608
2609
|
const idx = line.indexOf(":");
|
|
@@ -2611,15 +2612,17 @@ async function executeScenario(tc, scenario, config) {
|
|
|
2611
2612
|
const blockType = blockMatch[1] ?? "";
|
|
2612
2613
|
allBlocks.push({ type: blockType, fields });
|
|
2613
2614
|
turnBlocks.push({ type: blockType, title: fields.title ?? "" });
|
|
2615
|
+
blockMatch = blockReLocal.exec(content);
|
|
2614
2616
|
}
|
|
2615
2617
|
let hasToolCall = false;
|
|
2616
2618
|
if (config.toolCallPatterns) {
|
|
2617
2619
|
for (const pattern of config.toolCallPatterns) {
|
|
2618
2620
|
const re = new RegExp(pattern.source, pattern.flags);
|
|
2619
|
-
let toolMatch;
|
|
2620
|
-
while (
|
|
2621
|
+
let toolMatch = re.exec(content);
|
|
2622
|
+
while (toolMatch !== null) {
|
|
2621
2623
|
allToolCalls.push(toolMatch[0]);
|
|
2622
2624
|
hasToolCall = true;
|
|
2625
|
+
toolMatch = re.exec(content);
|
|
2623
2626
|
}
|
|
2624
2627
|
}
|
|
2625
2628
|
}
|
|
@@ -2941,14 +2944,15 @@ var ProductClient = class {
|
|
|
2941
2944
|
}
|
|
2942
2945
|
}
|
|
2943
2946
|
const blockRe = /:::(\w+)\s*\n([\s\S]*?)\n\s*:::/g;
|
|
2944
|
-
let match;
|
|
2945
|
-
while (
|
|
2947
|
+
let match = blockRe.exec(text);
|
|
2948
|
+
while (match !== null) {
|
|
2946
2949
|
const fields = {};
|
|
2947
2950
|
for (const line of match[2].split("\n")) {
|
|
2948
2951
|
const idx = line.indexOf(":");
|
|
2949
2952
|
if (idx > 0) fields[line.slice(0, idx).trim()] = line.slice(idx + 1).trim();
|
|
2950
2953
|
}
|
|
2951
2954
|
blocks.push({ type: match[1], title: fields.title ?? "" });
|
|
2955
|
+
match = blockRe.exec(text);
|
|
2952
2956
|
}
|
|
2953
2957
|
return { text, blocks };
|
|
2954
2958
|
}
|
|
@@ -4680,8 +4684,8 @@ function analyzeAntiSlop(outputs, config) {
|
|
|
4680
4684
|
const lower = output.toLowerCase();
|
|
4681
4685
|
for (const phrase of config.bannedPhrases) {
|
|
4682
4686
|
const needle = phrase.toLowerCase();
|
|
4683
|
-
let idx = 0;
|
|
4684
|
-
while (
|
|
4687
|
+
let idx = lower.indexOf(needle, 0);
|
|
4688
|
+
while (idx !== -1) {
|
|
4685
4689
|
counts.banned_phrase += 1;
|
|
4686
4690
|
if (issues.length < 20) {
|
|
4687
4691
|
issues.push({
|
|
@@ -4690,7 +4694,7 @@ function analyzeAntiSlop(outputs, config) {
|
|
|
4690
4694
|
example: snippet(output, idx, phrase.length)
|
|
4691
4695
|
});
|
|
4692
4696
|
}
|
|
4693
|
-
idx
|
|
4697
|
+
idx = lower.indexOf(needle, idx + needle.length);
|
|
4694
4698
|
}
|
|
4695
4699
|
}
|
|
4696
4700
|
for (const re of config.bannedOpenings) {
|
|
@@ -8623,13 +8627,14 @@ function extractAssetUrls(html, baseUrl) {
|
|
|
8623
8627
|
const linkRe = /<link\b[^>]*\bhref\s*=\s*["']([^"']+)["'][^>]*>/gi;
|
|
8624
8628
|
const scriptRe = /<script\b[^>]*\bsrc\s*=\s*["']([^"']+)["'][^>]*>/gi;
|
|
8625
8629
|
for (const re of [linkRe, scriptRe]) {
|
|
8626
|
-
let match;
|
|
8627
|
-
while (
|
|
8630
|
+
let match = re.exec(html);
|
|
8631
|
+
while (match !== null) {
|
|
8628
8632
|
const raw = match[1];
|
|
8629
8633
|
try {
|
|
8630
8634
|
urls.add(new URL(raw, baseUrl).toString());
|
|
8631
8635
|
} catch {
|
|
8632
8636
|
}
|
|
8637
|
+
match = re.exec(html);
|
|
8633
8638
|
}
|
|
8634
8639
|
}
|
|
8635
8640
|
return Array.from(urls);
|