@tangle-network/agent-eval 0.20.12 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +177 -0
- package/README.md +43 -1
- package/dist/{chunk-KWUAAIHR.js → chunk-4W4NCYM2.js} +182 -1
- package/dist/chunk-4W4NCYM2.js.map +1 -0
- package/dist/{chunk-PKCVBYTQ.js → chunk-5IIQKMD5.js} +38 -2
- package/dist/chunk-5IIQKMD5.js.map +1 -0
- package/dist/{chunk-HNJLMAJ2.js → chunk-6KQG5HAH.js} +2 -2
- package/dist/chunk-6M774GY6.js +53 -0
- package/dist/chunk-6M774GY6.js.map +1 -0
- package/dist/{chunk-MCMV7DUL.js → chunk-ARZ6BEV6.js} +2 -2
- package/dist/chunk-IOXMGMHQ.js +1226 -0
- package/dist/chunk-IOXMGMHQ.js.map +1 -0
- package/dist/{chunk-75MCTH7P.js → chunk-KAO3Q65R.js} +198 -3
- package/dist/chunk-KAO3Q65R.js.map +1 -0
- package/dist/chunk-QUKKGHTZ.js +121 -0
- package/dist/chunk-QUKKGHTZ.js.map +1 -0
- package/dist/chunk-SQQLHODJ.js +163 -0
- package/dist/chunk-SQQLHODJ.js.map +1 -0
- package/dist/{chunk-IKFVX537.js → chunk-UAND2LOT.js} +232 -211
- package/dist/chunk-UAND2LOT.js.map +1 -0
- package/dist/{chunk-HKYRWNHV.js → chunk-USHQBPMH.js} +283 -7
- package/dist/chunk-USHQBPMH.js.map +1 -0
- package/dist/cli.js +3 -2
- package/dist/cli.js.map +1 -1
- package/dist/{control-C8NKbF3w.d.ts → control-cxwMOAsy.d.ts} +3 -2
- package/dist/control.d.ts +4 -3
- package/dist/control.js +2 -2
- package/dist/emitter-B2XqDKFU.d.ts +121 -0
- package/dist/{feedback-trajectory-BGQ_ANCN.d.ts → feedback-trajectory-CB0A32o3.d.ts} +2 -1
- package/dist/index.d.ts +16 -302
- package/dist/index.js +70 -62
- package/dist/index.js.map +1 -1
- package/dist/integrity-K2oVlF57.d.ts +210 -0
- package/dist/openapi.json +1 -1
- package/dist/optimization-UVDNKaO6.d.ts +574 -0
- package/dist/optimization.d.ts +7 -144
- package/dist/optimization.js +9 -2
- package/dist/reporting-B82RSv9C.d.ts +593 -0
- package/dist/reporting.d.ts +5 -426
- package/dist/reporting.js +17 -6
- package/dist/{emitter-BYO2nSDA.d.ts → store-u47QaJ9G.d.ts} +1 -91
- package/dist/{multi-shot-optimization-Bvtz294B.d.ts → summary-report-D4p7RlDu.d.ts} +381 -1
- package/dist/traces.d.ts +179 -3
- package/dist/traces.js +35 -4
- package/dist/wire/index.js +3 -2
- package/docs/research-report-methodology.md +170 -0
- package/docs/wire-protocol.md +1 -1
- package/package.json +11 -13
- package/dist/chunk-75MCTH7P.js.map +0 -1
- package/dist/chunk-HKYRWNHV.js.map +0 -1
- package/dist/chunk-IKFVX537.js.map +0 -1
- package/dist/chunk-KWUAAIHR.js.map +0 -1
- package/dist/chunk-ODFINDLQ.js +0 -413
- package/dist/chunk-ODFINDLQ.js.map +0 -1
- package/dist/chunk-PKCVBYTQ.js.map +0 -1
- /package/dist/{chunk-HNJLMAJ2.js.map → chunk-6KQG5HAH.js.map} +0 -0
- /package/dist/{chunk-MCMV7DUL.js.map → chunk-ARZ6BEV6.js.map} +0 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// src/pre-registration.ts
|
|
2
|
+
function canonicalize(v) {
|
|
3
|
+
if (v === null || typeof v !== "object") return v;
|
|
4
|
+
if (Array.isArray(v)) return v.map(canonicalize);
|
|
5
|
+
const keys = Object.keys(v).sort();
|
|
6
|
+
const out = {};
|
|
7
|
+
for (const k of keys) out[k] = canonicalize(v[k]);
|
|
8
|
+
return out;
|
|
9
|
+
}
|
|
10
|
+
async function hashJson(obj) {
|
|
11
|
+
const canonical = canonicalize(obj);
|
|
12
|
+
const bytes = new TextEncoder().encode(JSON.stringify(canonical));
|
|
13
|
+
const digest = await globalThis.crypto.subtle.digest("SHA-256", bytes);
|
|
14
|
+
return Array.from(new Uint8Array(digest)).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
15
|
+
}
|
|
16
|
+
async function signManifest(m) {
|
|
17
|
+
const hash = await hashJson(m);
|
|
18
|
+
return { ...m, contentHash: hash, algo: "sha256-content" };
|
|
19
|
+
}
|
|
20
|
+
async function verifyManifest(m) {
|
|
21
|
+
const { contentHash, algo: _algo, ...rest } = m;
|
|
22
|
+
void _algo;
|
|
23
|
+
const resigned = await signManifest(rest);
|
|
24
|
+
return resigned.contentHash === contentHash;
|
|
25
|
+
}
|
|
26
|
+
async function evaluateHypothesis(manifest, observed) {
|
|
27
|
+
if (!await verifyManifest(manifest)) {
|
|
28
|
+
throw new Error("evaluateHypothesis: manifest content hash mismatch (tampered)");
|
|
29
|
+
}
|
|
30
|
+
const reasons = [];
|
|
31
|
+
const directionOk = manifest.direction === "increase" ? observed.effect > 0 : observed.effect < 0;
|
|
32
|
+
if (!directionOk) reasons.push("wrong_direction");
|
|
33
|
+
if (Math.abs(observed.effect) < manifest.minEffect) reasons.push("effect_too_small");
|
|
34
|
+
if (observed.pValue >= manifest.alpha) reasons.push("not_significant");
|
|
35
|
+
if (observed.n < manifest.preRegisteredN) reasons.push("undersampled");
|
|
36
|
+
return {
|
|
37
|
+
manifest,
|
|
38
|
+
observedN: observed.n,
|
|
39
|
+
observedEffect: observed.effect,
|
|
40
|
+
observedPValue: observed.pValue,
|
|
41
|
+
confirmed: reasons.length === 0,
|
|
42
|
+
rejectionReasons: reasons
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export {
|
|
47
|
+
canonicalize,
|
|
48
|
+
hashJson,
|
|
49
|
+
signManifest,
|
|
50
|
+
verifyManifest,
|
|
51
|
+
evaluateHypothesis
|
|
52
|
+
};
|
|
53
|
+
//# sourceMappingURL=chunk-6M774GY6.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/pre-registration.ts"],"sourcesContent":["/**\n * Pre-registered hypotheses — declare what you're testing BEFORE the\n * run, check it AFTER. Prevents p-hacking, optional stopping, and the\n * \"we ran until it looked good\" failure mode.\n *\n * Manifest is a plain JSON-friendly object. Sign it with a content hash\n * + timestamp; the registered record becomes immutable. Post-run,\n * evaluate the manifest against observed results — the library refuses\n * to let you re-interpret a different metric as the declared one.\n */\n\nexport interface HypothesisManifest {\n id: string\n /** Human prose — goes into the audit trail. */\n hypothesis: string\n /** Metric the hypothesis claims to move. */\n metric: string\n /** 'increase' = candidate should score higher than baseline; 'decrease' = lower. */\n direction: 'increase' | 'decrease'\n /** Minimum effect size to count (same units as the metric). */\n minEffect: number\n /** Alpha threshold. */\n alpha: number\n /** Target statistical power at which sample size was pre-computed. */\n power: number\n /** Declared N per arm before running. */\n preRegisteredN: number\n /** ISO8601 timestamp the manifest was registered. */\n registeredAt: string\n /** Optional identifiers to tie into the trace corpus. */\n baselineLabel?: string\n candidateLabel?: string\n}\n\n/**\n * Identifier for the hashing scheme used to produce `contentHash`.\n *\n * `'sha256-content'` — sha256 hex over the canonicalized manifest with\n * the `contentHash` and `algo` fields stripped. This is what\n * `signManifest` produces today.\n *\n * Held as a string union so future schemes can be added without\n * breaking parsers; legacy SignedManifest values written before this\n * field existed will deserialize cleanly because the field is optional.\n */\nexport type SignedManifestAlgo = 'sha256-content'\n\nexport interface SignedManifest extends HypothesisManifest {\n /** sha256 hex of canonicalized manifest (everything except contentHash and algo). */\n contentHash: string\n /**\n * Algorithm string describing how `contentHash` was produced.\n *\n * Optional on the type so legacy serialized manifests (pre-`algo`)\n * still parse, but ALWAYS populated by {@link signManifest}.\n * Consumers that want to enforce a known algorithm should reject\n * manifests where this field is missing or unrecognized.\n */\n algo?: SignedManifestAlgo\n}\n\nexport interface HypothesisResult {\n manifest: SignedManifest\n observedN: number\n observedEffect: number\n observedPValue: number\n /** True iff the observed effect hits the pre-declared direction with\n * magnitude ≥ minEffect AND p < alpha. */\n confirmed: boolean\n /** Enumerated reasons the hypothesis was rejected (each a machine-tag). */\n rejectionReasons: Array<'wrong_direction' | 'effect_too_small' | 'not_significant' | 'undersampled'>\n notes?: string\n}\n\n/**\n * Deterministic JSON canonicalization — sort object keys recursively.\n *\n * Two semantically-equal objects produce byte-identical canonicalized output;\n * this is what makes a content-hash stable across encoders, key insertion\n * orders, and runtime versions. Exported for any consumer that needs the same\n * canonicalization guarantee outside the manifest-signing path (e.g., signing\n * an artifact bundle, hashing a dataset version, etc.).\n */\nexport function canonicalize(v: unknown): unknown {\n if (v === null || typeof v !== 'object') return v\n if (Array.isArray(v)) return v.map(canonicalize)\n const keys = Object.keys(v as Record<string, unknown>).sort()\n const out: Record<string, unknown> = {}\n for (const k of keys) out[k] = canonicalize((v as Record<string, unknown>)[k])\n return out\n}\n\n/**\n * SHA-256 hex (full 64 chars) over the canonicalized JSON encoding of `obj`.\n *\n * The same primitive `signManifest` and `verifyManifest` are built on, exposed\n * directly so consumers signing arbitrary structured content (artifact bundles,\n * production packets, dataset manifests, etc.) don't have to re-derive\n * canonicalize+sha256 from scratch.\n *\n * Stable across:\n * - object key insertion order (canonicalization sorts keys recursively)\n * - encoder choice (UTF-8 via TextEncoder, fixed)\n * - runtime (uses the Web Crypto subtle digest, present in Node ≥18 and browsers)\n *\n * Naming note: `hashJson` rather than `hashContent` because `hashContent` is\n * already taken in `prompt-registry.ts` for the truncated 12-char prompt-id\n * helper, which has different semantics (string input, short return). Both\n * coexist; `hashJson` is the right name when you mean \"canonicalize then hash.\"\n *\n * @example\n * const hash = await hashJson({ id: '1', kind: 'spec' })\n * // 'a3f1...' (64 hex chars)\n */\nexport async function hashJson<T>(obj: T): Promise<string> {\n const canonical = canonicalize(obj)\n const bytes = new TextEncoder().encode(JSON.stringify(canonical))\n const digest = await globalThis.crypto.subtle.digest('SHA-256', bytes)\n return Array.from(new Uint8Array(digest))\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n\n/**\n * Sign a manifest with a SHA-256 content hash.\n *\n * The hash covers the canonicalized manifest with the `contentHash`\n * and `algo` fields stripped; this lets verifiers re-sign the rest and\n * compare. Returned manifest always carries `algo: 'sha256-content'`\n * so downstream consumers can identify the scheme; legacy serialized\n * manifests without `algo` still verify because it is stripped before\n * hashing on both sides.\n */\nexport async function signManifest(m: HypothesisManifest): Promise<SignedManifest> {\n const hash = await hashJson(m)\n return { ...m, contentHash: hash, algo: 'sha256-content' }\n}\n\n/**\n * Verify that a signed manifest has not been tampered with.\n *\n * Strips `contentHash` and `algo` before re-signing so legacy manifests\n * (written before `algo` was emitted) verify identically to current\n * ones.\n */\nexport async function verifyManifest(m: SignedManifest): Promise<boolean> {\n const { contentHash, algo: _algo, ...rest } = m\n void _algo\n const resigned = await signManifest(rest)\n return resigned.contentHash === contentHash\n}\n\n/**\n * Evaluate a pre-registered hypothesis against observed results.\n * Mechanical — no re-interpretation permitted.\n */\nexport async function evaluateHypothesis(\n manifest: SignedManifest,\n observed: { n: number; effect: number; pValue: number },\n): Promise<HypothesisResult> {\n if (!(await verifyManifest(manifest))) {\n throw new Error('evaluateHypothesis: manifest content hash mismatch (tampered)')\n }\n const reasons: HypothesisResult['rejectionReasons'] = []\n const directionOk =\n manifest.direction === 'increase' ? observed.effect > 0 : observed.effect < 0\n if (!directionOk) reasons.push('wrong_direction')\n if (Math.abs(observed.effect) < manifest.minEffect) reasons.push('effect_too_small')\n if (observed.pValue >= manifest.alpha) reasons.push('not_significant')\n if (observed.n < manifest.preRegisteredN) reasons.push('undersampled')\n return {\n manifest,\n observedN: observed.n,\n observedEffect: observed.effect,\n observedPValue: observed.pValue,\n confirmed: reasons.length === 0,\n rejectionReasons: reasons,\n }\n}\n"],"mappings":";AAmFO,SAAS,aAAa,GAAqB;AAChD,MAAI,MAAM,QAAQ,OAAO,MAAM,SAAU,QAAO;AAChD,MAAI,MAAM,QAAQ,CAAC,EAAG,QAAO,EAAE,IAAI,YAAY;AAC/C,QAAM,OAAO,OAAO,KAAK,CAA4B,EAAE,KAAK;AAC5D,QAAM,MAA+B,CAAC;AACtC,aAAW,KAAK,KAAM,KAAI,CAAC,IAAI,aAAc,EAA8B,CAAC,CAAC;AAC7E,SAAO;AACT;AAwBA,eAAsB,SAAY,KAAyB;AACzD,QAAM,YAAY,aAAa,GAAG;AAClC,QAAM,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK,UAAU,SAAS,CAAC;AAChE,QAAM,SAAS,MAAM,WAAW,OAAO,OAAO,OAAO,WAAW,KAAK;AACrE,SAAO,MAAM,KAAK,IAAI,WAAW,MAAM,CAAC,EACrC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;AAYA,eAAsB,aAAa,GAAgD;AACjF,QAAM,OAAO,MAAM,SAAS,CAAC;AAC7B,SAAO,EAAE,GAAG,GAAG,aAAa,MAAM,MAAM,iBAAiB;AAC3D;AASA,eAAsB,eAAe,GAAqC;AACxE,QAAM,EAAE,aAAa,MAAM,OAAO,GAAG,KAAK,IAAI;AAC9C,OAAK;AACL,QAAM,WAAW,MAAM,aAAa,IAAI;AACxC,SAAO,SAAS,gBAAgB;AAClC;AAMA,eAAsB,mBACpB,UACA,UAC2B;AAC3B,MAAI,CAAE,MAAM,eAAe,QAAQ,GAAI;AACrC,UAAM,IAAI,MAAM,+DAA+D;AAAA,EACjF;AACA,QAAM,UAAgD,CAAC;AACvD,QAAM,cACJ,SAAS,cAAc,aAAa,SAAS,SAAS,IAAI,SAAS,SAAS;AAC9E,MAAI,CAAC,YAAa,SAAQ,KAAK,iBAAiB;AAChD,MAAI,KAAK,IAAI,SAAS,MAAM,IAAI,SAAS,UAAW,SAAQ,KAAK,kBAAkB;AACnF,MAAI,SAAS,UAAU,SAAS,MAAO,SAAQ,KAAK,iBAAiB;AACrE,MAAI,SAAS,IAAI,SAAS,eAAgB,SAAQ,KAAK,cAAc;AACrE,SAAO;AAAA,IACL;AAAA,IACA,WAAW,SAAS;AAAA,IACpB,gBAAgB,SAAS;AAAA,IACzB,gBAAgB,SAAS;AAAA,IACzB,WAAW,QAAQ,WAAW;AAAA,IAC9B,kBAAkB;AAAA,EACpB;AACF;","names":[]}
|
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
} from "./chunk-YUFXO3TU.js";
|
|
4
4
|
import {
|
|
5
5
|
TraceEmitter
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-5IIQKMD5.js";
|
|
7
7
|
|
|
8
8
|
// src/control-runtime.ts
|
|
9
9
|
var DEFAULT_BUDGET = {
|
|
@@ -1307,4 +1307,4 @@ export {
|
|
|
1307
1307
|
runProposeReviewAsControlLoop,
|
|
1308
1308
|
controlFailureClassFromVerification
|
|
1309
1309
|
};
|
|
1310
|
-
//# sourceMappingURL=chunk-
|
|
1310
|
+
//# sourceMappingURL=chunk-ARZ6BEV6.js.map
|