@tangle-network/agent-eval 0.71.0 → 0.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/campaign/index.js +25 -12
- package/dist/campaign/index.js.map +1 -1
- package/dist/{chunk-VMAYE3LM.js → chunk-4QJN7RDX.js} +3 -3
- package/dist/chunk-SL55X4VN.js +186 -0
- package/dist/chunk-SL55X4VN.js.map +1 -0
- package/dist/{chunk-6QZUCFKM.js → chunk-UD6EF73X.js} +3 -3
- package/dist/{chunk-6XQIEUQ2.js → chunk-ZPSKPT3V.js} +5 -3
- package/dist/{chunk-6XQIEUQ2.js.map → chunk-ZPSKPT3V.js.map} +1 -1
- package/dist/contract/index.js +3 -3
- package/dist/index.js +11 -156
- package/dist/index.js.map +1 -1
- package/dist/openapi.json +1 -1
- package/dist/{run-campaign-BVY3RGAZ.js → run-campaign-OVEZF24D.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-PQV2TKC3.js +0 -27
- package/dist/chunk-PQV2TKC3.js.map +0 -1
- /package/dist/{chunk-VMAYE3LM.js.map → chunk-4QJN7RDX.js.map} +0 -0
- /package/dist/{chunk-6QZUCFKM.js.map → chunk-UD6EF73X.js.map} +0 -0
- /package/dist/{run-campaign-BVY3RGAZ.js.map → run-campaign-OVEZF24D.js.map} +0 -0
package/dist/openapi.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"openapi": "3.1.0",
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "@tangle-network/agent-eval — wire protocol",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.72.0",
|
|
6
6
|
"description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
|
|
7
7
|
"contact": {
|
|
8
8
|
"name": "Tangle Network",
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runCampaign
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-ZPSKPT3V.js";
|
|
4
4
|
import "./chunk-ITBRCT73.js";
|
|
5
5
|
import "./chunk-3BFEG2F6.js";
|
|
6
6
|
import "./chunk-PZ5AY32C.js";
|
|
7
7
|
export {
|
|
8
8
|
runCampaign
|
|
9
9
|
};
|
|
10
|
-
//# sourceMappingURL=run-campaign-
|
|
10
|
+
//# sourceMappingURL=run-campaign-OVEZF24D.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.72.0",
|
|
4
4
|
"description": "Substrate for self-improving agents: traces, verifiable rewards, preferences, GEPA / reflective mutation, auto-research, replay, sequential anytime-valid stats, and release gates.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-eval#readme",
|
|
6
6
|
"repository": {
|
package/dist/chunk-PQV2TKC3.js
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
canonicalize
|
|
3
|
-
} from "./chunk-VSMTAMNK.js";
|
|
4
|
-
import {
|
|
5
|
-
ValidationError
|
|
6
|
-
} from "./chunk-3BFEG2F6.js";
|
|
7
|
-
|
|
8
|
-
// src/agent-profile.ts
|
|
9
|
-
import { createHash } from "crypto";
|
|
10
|
-
function agentProfileHash(profile) {
|
|
11
|
-
if (typeof profile.model !== "string" || profile.model.trim().length === 0) {
|
|
12
|
-
throw new ValidationError(`AgentProfile "${profile.id}" has no model \u2014 cannot hash`);
|
|
13
|
-
}
|
|
14
|
-
const behaviour = {
|
|
15
|
-
model: profile.model.trim(),
|
|
16
|
-
skills: [...profile.skills ?? []].sort(),
|
|
17
|
-
promptVersion: profile.promptVersion ?? null,
|
|
18
|
-
tools: [...profile.tools ?? []].sort(),
|
|
19
|
-
metadata: profile.metadata ?? {}
|
|
20
|
-
};
|
|
21
|
-
return createHash("sha256").update(JSON.stringify(canonicalize(behaviour))).digest("hex");
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export {
|
|
25
|
-
agentProfileHash
|
|
26
|
-
};
|
|
27
|
-
//# sourceMappingURL=chunk-PQV2TKC3.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/agent-profile.ts"],"sourcesContent":["/**\n * @stable\n *\n * AgentProfile — the eval harness's unit of variation.\n *\n * A profile pins everything that changes agent behaviour for a benchmark\n * cell: the model, the active skills, the prompt version, the available\n * tools. Vary the profile — swap a model, add a skill — and re-run the suite\n * to benchmark the change. The scorecard keys a cell on\n * `(scenarioId, profileHash)`, so the model is not a separate axis: it lives\n * inside the profile, and two profiles with the same model but different\n * skills are different cells.\n *\n * `agentProfileHash` is the profile's behaviour identity. Two profiles that\n * produce the same agent behaviour share a hash (and a scorecard cell);\n * reordering `skills` or `tools` does not change it; the human-facing `id`\n * label does not affect it.\n */\n\nimport { createHash } from 'node:crypto'\nimport { ValidationError } from './errors'\nimport { canonicalize } from './pre-registration'\n\nexport interface AgentProfile {\n /** Human-facing label, e.g. `sonnet-legal-skills-v3`. Not part of the hash. */\n id: string\n /** Model snapshot id this profile pins, e.g. `claude-sonnet-4-6@2025-04-15`. */\n model: string\n /** Skill ids/versions active in this profile — the primary behaviour lever. */\n skills?: string[]\n /** Prompt version identifier. */\n promptVersion?: string\n /** Tool ids available to the agent. */\n tools?: string[]\n /** Any other behaviour-bearing knobs that should fingerprint into the hash. */\n metadata?: Record<string, string | number | boolean>\n}\n\n/**\n * Deterministic behaviour identity of a profile — a sha256 over the\n * behaviour-bearing fields. `skills` and `tools` are order-insensitive; the\n * `id` label is excluded. Throws on a profile with no `model` — an unkeyable\n * profile must fail loud rather than collapse into a blank-model cell.\n */\nexport function agentProfileHash(profile: AgentProfile): string {\n if (typeof profile.model !== 'string' || profile.model.trim().length === 0) {\n throw new ValidationError(`AgentProfile \"${profile.id}\" has no model — cannot hash`)\n }\n const behaviour = {\n model: profile.model.trim(),\n skills: [...(profile.skills ?? [])].sort(),\n promptVersion: profile.promptVersion ?? null,\n tools: [...(profile.tools ?? [])].sort(),\n metadata: profile.metadata ?? {},\n }\n return createHash('sha256')\n .update(JSON.stringify(canonicalize(behaviour)))\n .digest('hex')\n}\n"],"mappings":";;;;;;;;AAmBA,SAAS,kBAAkB;AAyBpB,SAAS,iBAAiB,SAA+B;AAC9D,MAAI,OAAO,QAAQ,UAAU,YAAY,QAAQ,MAAM,KAAK,EAAE,WAAW,GAAG;AAC1E,UAAM,IAAI,gBAAgB,iBAAiB,QAAQ,EAAE,mCAA8B;AAAA,EACrF;AACA,QAAM,YAAY;AAAA,IAChB,OAAO,QAAQ,MAAM,KAAK;AAAA,IAC1B,QAAQ,CAAC,GAAI,QAAQ,UAAU,CAAC,CAAE,EAAE,KAAK;AAAA,IACzC,eAAe,QAAQ,iBAAiB;AAAA,IACxC,OAAO,CAAC,GAAI,QAAQ,SAAS,CAAC,CAAE,EAAE,KAAK;AAAA,IACvC,UAAU,QAAQ,YAAY,CAAC;AAAA,EACjC;AACA,SAAO,WAAW,QAAQ,EACvB,OAAO,KAAK,UAAU,aAAa,SAAS,CAAC,CAAC,EAC9C,OAAO,KAAK;AACjB;","names":[]}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|