@tangle-network/agent-eval 0.54.0 → 0.56.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/campaign/index.js +3 -3
- package/dist/{chunk-J3EIOI3O.js → chunk-74Y2EMNH.js} +2 -2
- package/dist/{chunk-UBQGWD3O.js → chunk-AIXHUIHG.js} +2 -2
- package/dist/{chunk-YXTT6GSZ.js → chunk-GM476SZU.js} +3 -2
- package/dist/chunk-GM476SZU.js.map +1 -0
- package/dist/{chunk-YXD7GWJI.js → chunk-JB4UWIM6.js} +3 -3
- package/dist/{chunk-EGIPWXHL.js → chunk-OLIBRKRD.js} +2 -2
- package/dist/{chunk-H4TOS272.js → chunk-QDOSODID.js} +2 -2
- package/dist/{chunk-WP7SY7AI.js → chunk-S3SDD56V.js} +48 -1
- package/dist/chunk-S3SDD56V.js.map +1 -0
- package/dist/contract/index.d.ts +98 -1
- package/dist/contract/index.js +78 -4
- package/dist/contract/index.js.map +1 -1
- package/dist/index.d.ts +110 -5
- package/dist/index.js +139 -5
- package/dist/index.js.map +1 -1
- package/dist/openapi.json +1 -1
- package/dist/pipelines/index.js +2 -2
- package/dist/{release-report-B6l5fi7T.d.ts → release-report-DmPjIce3.d.ts} +44 -1
- package/dist/reporting.d.ts +1 -1
- package/dist/reporting.js +3 -3
- package/dist/{researcher-D4AZjxNa.d.ts → researcher-JP8EvnLv.d.ts} +6 -1
- package/dist/rl.d.ts +2 -2
- package/dist/rl.js +3 -3
- package/dist/{run-campaign-6UEVBPP3.js → run-campaign-ZURVWMMI.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-WP7SY7AI.js.map +0 -1
- package/dist/chunk-YXTT6GSZ.js.map +0 -1
- /package/dist/{chunk-J3EIOI3O.js.map → chunk-74Y2EMNH.js.map} +0 -0
- /package/dist/{chunk-UBQGWD3O.js.map → chunk-AIXHUIHG.js.map} +0 -0
- /package/dist/{chunk-YXD7GWJI.js.map → chunk-JB4UWIM6.js.map} +0 -0
- /package/dist/{chunk-EGIPWXHL.js.map → chunk-OLIBRKRD.js.map} +0 -0
- /package/dist/{chunk-H4TOS272.js.map → chunk-QDOSODID.js.map} +0 -0
- /package/dist/{run-campaign-6UEVBPP3.js.map → run-campaign-ZURVWMMI.js.map} +0 -0
package/dist/contract/index.js
CHANGED
|
@@ -6,12 +6,12 @@ import {
|
|
|
6
6
|
heldOutGate,
|
|
7
7
|
runEval,
|
|
8
8
|
runImprovementLoop
|
|
9
|
-
} from "../chunk-
|
|
9
|
+
} from "../chunk-JB4UWIM6.js";
|
|
10
10
|
import {
|
|
11
11
|
fsCampaignStorage,
|
|
12
12
|
inMemoryCampaignStorage,
|
|
13
13
|
runCampaign
|
|
14
|
-
} from "../chunk-
|
|
14
|
+
} from "../chunk-74Y2EMNH.js";
|
|
15
15
|
import {
|
|
16
16
|
createHostedClient
|
|
17
17
|
} from "../chunk-FQK2CCIM.js";
|
|
@@ -26,14 +26,14 @@ import {
|
|
|
26
26
|
} from "../chunk-3RF76KTD.js";
|
|
27
27
|
import {
|
|
28
28
|
paretoChart
|
|
29
|
-
} from "../chunk-
|
|
29
|
+
} from "../chunk-OLIBRKRD.js";
|
|
30
30
|
import {
|
|
31
31
|
cohensD,
|
|
32
32
|
pairedBootstrap,
|
|
33
33
|
pairedMde,
|
|
34
34
|
pairedTTest,
|
|
35
35
|
requiredSampleSize
|
|
36
|
-
} from "../chunk-
|
|
36
|
+
} from "../chunk-S3SDD56V.js";
|
|
37
37
|
import "../chunk-GGE4NNQT.js";
|
|
38
38
|
import "../chunk-47X6LRCE.js";
|
|
39
39
|
import "../chunk-5BKGXME7.js";
|
|
@@ -1029,6 +1029,78 @@ function cellsToRunRecords(cells, candidateId, runId) {
|
|
|
1029
1029
|
});
|
|
1030
1030
|
}
|
|
1031
1031
|
|
|
1032
|
+
// src/contract/intake/agent-trace.ts
|
|
1033
|
+
function rangeLines(r) {
|
|
1034
|
+
return Math.max(0, r.end_line - r.start_line + 1);
|
|
1035
|
+
}
|
|
1036
|
+
function parseAgentTrace(records) {
|
|
1037
|
+
const acc = /* @__PURE__ */ new Map();
|
|
1038
|
+
for (const record of records) {
|
|
1039
|
+
const sha = record.vcs?.revision;
|
|
1040
|
+
if (!sha) continue;
|
|
1041
|
+
let a = acc.get(sha);
|
|
1042
|
+
if (!a) {
|
|
1043
|
+
a = {
|
|
1044
|
+
models: /* @__PURE__ */ new Set(),
|
|
1045
|
+
tools: /* @__PURE__ */ new Set(),
|
|
1046
|
+
files: /* @__PURE__ */ new Set(),
|
|
1047
|
+
conversationCount: 0,
|
|
1048
|
+
lineCount: 0,
|
|
1049
|
+
humanInvolved: false
|
|
1050
|
+
};
|
|
1051
|
+
acc.set(sha, a);
|
|
1052
|
+
}
|
|
1053
|
+
if (record.tool?.name) a.tools.add(record.tool.name);
|
|
1054
|
+
for (const file of record.files ?? []) {
|
|
1055
|
+
a.files.add(file.path);
|
|
1056
|
+
for (const conv of file.conversations ?? []) {
|
|
1057
|
+
a.conversationCount += 1;
|
|
1058
|
+
for (const range of conv.ranges ?? []) {
|
|
1059
|
+
const contributor = range.contributor ?? conv.contributor;
|
|
1060
|
+
a.lineCount += rangeLines(range);
|
|
1061
|
+
if (!contributor) continue;
|
|
1062
|
+
if (contributor.type === "human" || contributor.type === "mixed") {
|
|
1063
|
+
a.humanInvolved = true;
|
|
1064
|
+
}
|
|
1065
|
+
if ((contributor.type === "ai" || contributor.type === "mixed") && contributor.model_id) {
|
|
1066
|
+
a.models.add(contributor.model_id);
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
const index = /* @__PURE__ */ new Map();
|
|
1073
|
+
for (const [sha, a] of acc) {
|
|
1074
|
+
index.set(sha, {
|
|
1075
|
+
commitSha: sha,
|
|
1076
|
+
aiModels: [...a.models].sort(),
|
|
1077
|
+
tools: [...a.tools].sort(),
|
|
1078
|
+
conversationCount: a.conversationCount,
|
|
1079
|
+
fileCount: a.files.size,
|
|
1080
|
+
lineCount: a.lineCount,
|
|
1081
|
+
humanInvolved: a.humanInvolved
|
|
1082
|
+
});
|
|
1083
|
+
}
|
|
1084
|
+
return index;
|
|
1085
|
+
}
|
|
1086
|
+
function partitionRunsByAuthoringModel(runs, index) {
|
|
1087
|
+
const byModel = /* @__PURE__ */ new Map();
|
|
1088
|
+
const unattributed = [];
|
|
1089
|
+
for (const run of runs) {
|
|
1090
|
+
const provenance = index.get(run.commitSha);
|
|
1091
|
+
if (!provenance || provenance.aiModels.length === 0) {
|
|
1092
|
+
unattributed.push(run);
|
|
1093
|
+
continue;
|
|
1094
|
+
}
|
|
1095
|
+
for (const model of provenance.aiModels) {
|
|
1096
|
+
const cohort = byModel.get(model) ?? [];
|
|
1097
|
+
cohort.push(run);
|
|
1098
|
+
byModel.set(model, cohort);
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
return { byModel, unattributed };
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1032
1104
|
// src/contract/intake/feedback-table.ts
|
|
1033
1105
|
function fromFeedbackTable(opts) {
|
|
1034
1106
|
const { ratings, meta = [], scale, emitRaterScores = true } = opts;
|
|
@@ -1204,6 +1276,8 @@ export {
|
|
|
1204
1276
|
gepaDriver,
|
|
1205
1277
|
heldOutGate,
|
|
1206
1278
|
inMemoryCampaignStorage,
|
|
1279
|
+
parseAgentTrace,
|
|
1280
|
+
partitionRunsByAuthoringModel,
|
|
1207
1281
|
runCampaign,
|
|
1208
1282
|
runEval,
|
|
1209
1283
|
runImprovementLoop,
|