@remnic/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/abstraction-nodes.d.ts +52 -0
- package/dist/abstraction-nodes.js +15 -0
- package/dist/abstraction-nodes.js.map +1 -0
- package/dist/access-cli.d.ts +5 -0
- package/dist/access-cli.js +308 -0
- package/dist/access-cli.js.map +1 -0
- package/dist/access-http.d.ts +158 -0
- package/dist/access-http.js +32 -0
- package/dist/access-http.js.map +1 -0
- package/dist/access-idempotency.d.ts +31 -0
- package/dist/access-idempotency.js +11 -0
- package/dist/access-idempotency.js.map +1 -0
- package/dist/access-mcp.d.ts +76 -0
- package/dist/access-mcp.js +8 -0
- package/dist/access-mcp.js.map +1 -0
- package/dist/access-schema.d.ts +266 -0
- package/dist/access-schema.js +29 -0
- package/dist/access-schema.js.map +1 -0
- package/dist/access-service.d.ts +614 -0
- package/dist/access-service.js +32 -0
- package/dist/access-service.js.map +1 -0
- package/dist/behavior-learner.d.ts +16 -0
- package/dist/behavior-learner.js +124 -0
- package/dist/behavior-learner.js.map +1 -0
- package/dist/behavior-signals.d.ts +15 -0
- package/dist/behavior-signals.js +11 -0
- package/dist/behavior-signals.js.map +1 -0
- package/dist/bootstrap.d.ts +46 -0
- package/dist/bootstrap.js +9 -0
- package/dist/bootstrap.js.map +1 -0
- package/dist/boxes.d.ts +93 -0
- package/dist/boxes.js +14 -0
- package/dist/boxes.js.map +1 -0
- package/dist/buffer.d.ts +22 -0
- package/dist/buffer.js +9 -0
- package/dist/buffer.js.map +1 -0
- package/dist/calibration.d.ts +81 -0
- package/dist/calibration.js +239 -0
- package/dist/calibration.js.map +1 -0
- package/dist/causal-behavior.d.ts +79 -0
- package/dist/causal-behavior.js +190 -0
- package/dist/causal-behavior.js.map +1 -0
- package/dist/causal-chain.d.ts +61 -0
- package/dist/causal-chain.js +24 -0
- package/dist/causal-chain.js.map +1 -0
- package/dist/causal-consolidation.d.ts +71 -0
- package/dist/causal-consolidation.js +211 -0
- package/dist/causal-consolidation.js.map +1 -0
- package/dist/causal-retrieval.d.ts +44 -0
- package/dist/causal-retrieval.js +184 -0
- package/dist/causal-retrieval.js.map +1 -0
- package/dist/causal-trajectory-graph.d.ts +13 -0
- package/dist/causal-trajectory-graph.js +59 -0
- package/dist/causal-trajectory-graph.js.map +1 -0
- package/dist/causal-trajectory.d.ts +68 -0
- package/dist/causal-trajectory.js +18 -0
- package/dist/causal-trajectory.js.map +1 -0
- package/dist/chunk-2CJCWDMR.js +87 -0
- package/dist/chunk-2CJCWDMR.js.map +1 -0
- package/dist/chunk-2NMMFZ5T.js +216 -0
- package/dist/chunk-2NMMFZ5T.js.map +1 -0
- package/dist/chunk-2PO5ZRKV.js +103 -0
- package/dist/chunk-2PO5ZRKV.js.map +1 -0
- package/dist/chunk-3QKK7QOS.js +154 -0
- package/dist/chunk-3QKK7QOS.js.map +1 -0
- package/dist/chunk-3SLRNYNG.js +26 -0
- package/dist/chunk-3SLRNYNG.js.map +1 -0
- package/dist/chunk-4A24LIM2.js +68 -0
- package/dist/chunk-4A24LIM2.js.map +1 -0
- package/dist/chunk-6HZ6AO2P.js +164 -0
- package/dist/chunk-6HZ6AO2P.js.map +1 -0
- package/dist/chunk-763GUIOU.js +302 -0
- package/dist/chunk-763GUIOU.js.map +1 -0
- package/dist/chunk-AAI7JARD.js +173 -0
- package/dist/chunk-AAI7JARD.js.map +1 -0
- package/dist/chunk-B7LOFDVE.js +112 -0
- package/dist/chunk-B7LOFDVE.js.map +1 -0
- package/dist/chunk-BDFZXRSO.js +318 -0
- package/dist/chunk-BDFZXRSO.js.map +1 -0
- package/dist/chunk-BOUYNNYD.js +707 -0
- package/dist/chunk-BOUYNNYD.js.map +1 -0
- package/dist/chunk-BRK4ODMI.js +60 -0
- package/dist/chunk-BRK4ODMI.js.map +1 -0
- package/dist/chunk-C6QPK5GG.js +111 -0
- package/dist/chunk-C6QPK5GG.js.map +1 -0
- package/dist/chunk-C7VW7C3F.js +117 -0
- package/dist/chunk-C7VW7C3F.js.map +1 -0
- package/dist/chunk-CDW777AI.js +621 -0
- package/dist/chunk-CDW777AI.js.map +1 -0
- package/dist/chunk-CULXMQJH.js +185 -0
- package/dist/chunk-CULXMQJH.js.map +1 -0
- package/dist/chunk-CXWFUJR2.js +1203 -0
- package/dist/chunk-CXWFUJR2.js.map +1 -0
- package/dist/chunk-DGXUHMOV.js +61 -0
- package/dist/chunk-DGXUHMOV.js.map +1 -0
- package/dist/chunk-DM2T26WE.js +61 -0
- package/dist/chunk-DM2T26WE.js.map +1 -0
- package/dist/chunk-DORBM6OB.js +81 -0
- package/dist/chunk-DORBM6OB.js.map +1 -0
- package/dist/chunk-DT5TVLJE.js +32 -0
- package/dist/chunk-DT5TVLJE.js.map +1 -0
- package/dist/chunk-EEQLFRUM.js +89 -0
- package/dist/chunk-EEQLFRUM.js.map +1 -0
- package/dist/chunk-EQINRHYR.js +672 -0
- package/dist/chunk-EQINRHYR.js.map +1 -0
- package/dist/chunk-ESSMF2FR.js +146 -0
- package/dist/chunk-ESSMF2FR.js.map +1 -0
- package/dist/chunk-ETOW6ACV.js +158 -0
- package/dist/chunk-ETOW6ACV.js.map +1 -0
- package/dist/chunk-FYIYMQ5N.js +221 -0
- package/dist/chunk-FYIYMQ5N.js.map +1 -0
- package/dist/chunk-G3AG3KZN.js +78 -0
- package/dist/chunk-G3AG3KZN.js.map +1 -0
- package/dist/chunk-GJR6D6KC.js +61 -0
- package/dist/chunk-GJR6D6KC.js.map +1 -0
- package/dist/chunk-GPGBSNKM.js +380 -0
- package/dist/chunk-GPGBSNKM.js.map +1 -0
- package/dist/chunk-H63EDPFJ.js +57 -0
- package/dist/chunk-H63EDPFJ.js.map +1 -0
- package/dist/chunk-HG2NKWR2.js +185 -0
- package/dist/chunk-HG2NKWR2.js.map +1 -0
- package/dist/chunk-HL4DB7TO.js +13 -0
- package/dist/chunk-HL4DB7TO.js.map +1 -0
- package/dist/chunk-HLBYLYRD.js +346 -0
- package/dist/chunk-HLBYLYRD.js.map +1 -0
- package/dist/chunk-HLXVTBF3.js +109 -0
- package/dist/chunk-HLXVTBF3.js.map +1 -0
- package/dist/chunk-IFFFR3MR.js +68 -0
- package/dist/chunk-IFFFR3MR.js.map +1 -0
- package/dist/chunk-ISY75RLM.js +1027 -0
- package/dist/chunk-ISY75RLM.js.map +1 -0
- package/dist/chunk-IZME7KW2.js +1886 -0
- package/dist/chunk-IZME7KW2.js.map +1 -0
- package/dist/chunk-J3BT33K7.js +720 -0
- package/dist/chunk-J3BT33K7.js.map +1 -0
- package/dist/chunk-J47FNDR7.js +113 -0
- package/dist/chunk-J47FNDR7.js.map +1 -0
- package/dist/chunk-JWPLJLDU.js +63 -0
- package/dist/chunk-JWPLJLDU.js.map +1 -0
- package/dist/chunk-K6WK37A6.js +865 -0
- package/dist/chunk-K6WK37A6.js.map +1 -0
- package/dist/chunk-KL4CP4SB.js +130 -0
- package/dist/chunk-KL4CP4SB.js.map +1 -0
- package/dist/chunk-KT4NEUNF.js +315 -0
- package/dist/chunk-KT4NEUNF.js.map +1 -0
- package/dist/chunk-KWBU5S5U.js +42 -0
- package/dist/chunk-KWBU5S5U.js.map +1 -0
- package/dist/chunk-L5RPWGFK.js +59 -0
- package/dist/chunk-L5RPWGFK.js.map +1 -0
- package/dist/chunk-L7WO3MZ4.js +128 -0
- package/dist/chunk-L7WO3MZ4.js.map +1 -0
- package/dist/chunk-LIRZNNUP.js +74 -0
- package/dist/chunk-LIRZNNUP.js.map +1 -0
- package/dist/chunk-LK6SGL53.js +22 -0
- package/dist/chunk-LK6SGL53.js.map +1 -0
- package/dist/chunk-LOBRX7VD.js +200 -0
- package/dist/chunk-LOBRX7VD.js.map +1 -0
- package/dist/chunk-LPSF4OQH.js +47 -0
- package/dist/chunk-LPSF4OQH.js.map +1 -0
- package/dist/chunk-LU3GQNDQ.js +152 -0
- package/dist/chunk-LU3GQNDQ.js.map +1 -0
- package/dist/chunk-M5KEYE5E.js +350 -0
- package/dist/chunk-M5KEYE5E.js.map +1 -0
- package/dist/chunk-M62O4P4T.js +41 -0
- package/dist/chunk-M62O4P4T.js.map +1 -0
- package/dist/chunk-MARWOCVP.js +48 -0
- package/dist/chunk-MARWOCVP.js.map +1 -0
- package/dist/chunk-MDDAA2AO.js +925 -0
- package/dist/chunk-MDDAA2AO.js.map +1 -0
- package/dist/chunk-MWGVGUIS.js +198 -0
- package/dist/chunk-MWGVGUIS.js.map +1 -0
- package/dist/chunk-N5AKDXAI.js +74 -0
- package/dist/chunk-N5AKDXAI.js.map +1 -0
- package/dist/chunk-NGAVDO7E.js +115 -0
- package/dist/chunk-NGAVDO7E.js.map +1 -0
- package/dist/chunk-NTTLPF7F.js +283 -0
- package/dist/chunk-NTTLPF7F.js.map +1 -0
- package/dist/chunk-ONRU4L2N.js +240 -0
- package/dist/chunk-ONRU4L2N.js.map +1 -0
- package/dist/chunk-ORZMT74A.js +209 -0
- package/dist/chunk-ORZMT74A.js.map +1 -0
- package/dist/chunk-OTAVQCSF.js +268 -0
- package/dist/chunk-OTAVQCSF.js.map +1 -0
- package/dist/chunk-PGK3VUHN.js +160 -0
- package/dist/chunk-PGK3VUHN.js.map +1 -0
- package/dist/chunk-Q6FETXJA.js +1362 -0
- package/dist/chunk-Q6FETXJA.js.map +1 -0
- package/dist/chunk-QANCTXQF.js +271 -0
- package/dist/chunk-QANCTXQF.js.map +1 -0
- package/dist/chunk-QCCCQT3O.js +189 -0
- package/dist/chunk-QCCCQT3O.js.map +1 -0
- package/dist/chunk-QDOSNLB4.js +1048 -0
- package/dist/chunk-QDOSNLB4.js.map +1 -0
- package/dist/chunk-QFQVZOGA.js +2168 -0
- package/dist/chunk-QFQVZOGA.js.map +1 -0
- package/dist/chunk-QPKFPHOO.js +178 -0
- package/dist/chunk-QPKFPHOO.js.map +1 -0
- package/dist/chunk-QSVPYQPG.js +268 -0
- package/dist/chunk-QSVPYQPG.js.map +1 -0
- package/dist/chunk-QWUUMMIK.js +3045 -0
- package/dist/chunk-QWUUMMIK.js.map +1 -0
- package/dist/chunk-QY2BHY5O.js +2378 -0
- package/dist/chunk-QY2BHY5O.js.map +1 -0
- package/dist/chunk-SCHEKPYH.js +349 -0
- package/dist/chunk-SCHEKPYH.js.map +1 -0
- package/dist/chunk-SCU65EZI.js +15 -0
- package/dist/chunk-SCU65EZI.js.map +1 -0
- package/dist/chunk-T4WRIV2C.js +170 -0
- package/dist/chunk-T4WRIV2C.js.map +1 -0
- package/dist/chunk-TKO4HZCK.js +1852 -0
- package/dist/chunk-TKO4HZCK.js.map +1 -0
- package/dist/chunk-TP4FZJIZ.js +93 -0
- package/dist/chunk-TP4FZJIZ.js.map +1 -0
- package/dist/chunk-TPB3I2AC.js +403 -0
- package/dist/chunk-TPB3I2AC.js.map +1 -0
- package/dist/chunk-TVVVQQAK.js +1431 -0
- package/dist/chunk-TVVVQQAK.js.map +1 -0
- package/dist/chunk-U4PV25RD.js +14 -0
- package/dist/chunk-U4PV25RD.js.map +1 -0
- package/dist/chunk-UCYSTFZR.js +284 -0
- package/dist/chunk-UCYSTFZR.js.map +1 -0
- package/dist/chunk-UHGBNIOS.js +205 -0
- package/dist/chunk-UHGBNIOS.js.map +1 -0
- package/dist/chunk-UIYZ5T3I.js +108 -0
- package/dist/chunk-UIYZ5T3I.js.map +1 -0
- package/dist/chunk-UV2FO7J4.js +747 -0
- package/dist/chunk-UV2FO7J4.js.map +1 -0
- package/dist/chunk-UZB5KHKX.js +63 -0
- package/dist/chunk-UZB5KHKX.js.map +1 -0
- package/dist/chunk-V3RXWQIE.js +626 -0
- package/dist/chunk-V3RXWQIE.js.map +1 -0
- package/dist/chunk-V4YC4LUK.js +444 -0
- package/dist/chunk-V4YC4LUK.js.map +1 -0
- package/dist/chunk-VEWZZM3H.js +133 -0
- package/dist/chunk-VEWZZM3H.js.map +1 -0
- package/dist/chunk-WWIQTB2Y.js +98 -0
- package/dist/chunk-WWIQTB2Y.js.map +1 -0
- package/dist/chunk-X7XN6YU4.js +24 -0
- package/dist/chunk-X7XN6YU4.js.map +1 -0
- package/dist/chunk-XKECPATV.js +202 -0
- package/dist/chunk-XKECPATV.js.map +1 -0
- package/dist/chunk-XYIK4LF6.js +75 -0
- package/dist/chunk-XYIK4LF6.js.map +1 -0
- package/dist/chunk-Y27UJK6V.js +39 -0
- package/dist/chunk-Y27UJK6V.js.map +1 -0
- package/dist/chunk-Y4Z4I6WK.js +9 -0
- package/dist/chunk-Y4Z4I6WK.js.map +1 -0
- package/dist/chunk-YAPUAHAY.js +10761 -0
- package/dist/chunk-YAPUAHAY.js.map +1 -0
- package/dist/chunk-YAZNBMNF.js +92 -0
- package/dist/chunk-YAZNBMNF.js.map +1 -0
- package/dist/chunk-YCN4BVDK.js +66 -0
- package/dist/chunk-YCN4BVDK.js.map +1 -0
- package/dist/chunk-YNCQ7E4M.js +388 -0
- package/dist/chunk-YNCQ7E4M.js.map +1 -0
- package/dist/chunk-YNI4S5WT.js +143 -0
- package/dist/chunk-YNI4S5WT.js.map +1 -0
- package/dist/chunk-YRMVARQP.js +406 -0
- package/dist/chunk-YRMVARQP.js.map +1 -0
- package/dist/chunk-Z5AAYHUC.js +79 -0
- package/dist/chunk-Z5AAYHUC.js.map +1 -0
- package/dist/chunk-Z5LAYHGJ.js +15 -0
- package/dist/chunk-Z5LAYHGJ.js.map +1 -0
- package/dist/chunk-ZJLY4QSU.js +823 -0
- package/dist/chunk-ZJLY4QSU.js.map +1 -0
- package/dist/chunk-ZKYI7UVO.js +276 -0
- package/dist/chunk-ZKYI7UVO.js.map +1 -0
- package/dist/chunk-ZPKBYX2F.js +297 -0
- package/dist/chunk-ZPKBYX2F.js.map +1 -0
- package/dist/chunking.d.ts +48 -0
- package/dist/chunking.js +11 -0
- package/dist/chunking.js.map +1 -0
- package/dist/cli.d.ts +1162 -0
- package/dist/cli.js +7187 -0
- package/dist/cli.js.map +1 -0
- package/dist/commitment-ledger.d.ts +83 -0
- package/dist/commitment-ledger.js +19 -0
- package/dist/commitment-ledger.js.map +1 -0
- package/dist/compression-optimizer.d.ts +37 -0
- package/dist/compression-optimizer.js +13 -0
- package/dist/compression-optimizer.js.map +1 -0
- package/dist/config.d.ts +6 -0
- package/dist/config.js +12 -0
- package/dist/config.js.map +1 -0
- package/dist/cue-anchors.d.ts +50 -0
- package/dist/cue-anchors.js +15 -0
- package/dist/cue-anchors.js.map +1 -0
- package/dist/dashboard-runtime.d.ts +46 -0
- package/dist/dashboard-runtime.js +10 -0
- package/dist/dashboard-runtime.js.map +1 -0
- package/dist/day-summary.d.ts +6 -0
- package/dist/day-summary.js +10 -0
- package/dist/day-summary.js.map +1 -0
- package/dist/delinearize.d.ts +34 -0
- package/dist/delinearize.js +11 -0
- package/dist/delinearize.js.map +1 -0
- package/dist/embedding-fallback.d.ts +22 -0
- package/dist/embedding-fallback.js +8 -0
- package/dist/embedding-fallback.js.map +1 -0
- package/dist/engine-P26JFSVY.js +19 -0
- package/dist/engine-P26JFSVY.js.map +1 -0
- package/dist/entity-retrieval.d.ts +23 -0
- package/dist/entity-retrieval.js +24 -0
- package/dist/entity-retrieval.js.map +1 -0
- package/dist/evals.d.ts +282 -0
- package/dist/evals.js +32 -0
- package/dist/evals.js.map +1 -0
- package/dist/explicit-capture.d.ts +60 -0
- package/dist/explicit-capture.js +23 -0
- package/dist/explicit-capture.js.map +1 -0
- package/dist/extraction.d.ts +141 -0
- package/dist/extraction.js +22 -0
- package/dist/extraction.js.map +1 -0
- package/dist/fallback-llm.d.ts +95 -0
- package/dist/fallback-llm.js +12 -0
- package/dist/fallback-llm.js.map +1 -0
- package/dist/graph-dashboard-diff.d.ts +12 -0
- package/dist/graph-dashboard-diff.js +8 -0
- package/dist/graph-dashboard-diff.js.map +1 -0
- package/dist/graph-dashboard-key.d.ts +5 -0
- package/dist/graph-dashboard-key.js +7 -0
- package/dist/graph-dashboard-key.js.map +1 -0
- package/dist/graph-dashboard-parser.d.ts +20 -0
- package/dist/graph-dashboard-parser.js +8 -0
- package/dist/graph-dashboard-parser.js.map +1 -0
- package/dist/graph.d.ts +157 -0
- package/dist/graph.js +27 -0
- package/dist/graph.js.map +1 -0
- package/dist/harmonic-retrieval.d.ts +27 -0
- package/dist/harmonic-retrieval.js +12 -0
- package/dist/harmonic-retrieval.js.map +1 -0
- package/dist/himem.d.ts +23 -0
- package/dist/himem.js +7 -0
- package/dist/himem.js.map +1 -0
- package/dist/hygiene.d.ts +24 -0
- package/dist/hygiene.js +9 -0
- package/dist/hygiene.js.map +1 -0
- package/dist/identity-continuity.d.ts +17 -0
- package/dist/identity-continuity.js +19 -0
- package/dist/identity-continuity.js.map +1 -0
- package/dist/importance.d.ts +25 -0
- package/dist/importance.js +11 -0
- package/dist/importance.js.map +1 -0
- package/dist/index.d.ts +923 -0
- package/dist/index.js +2512 -0
- package/dist/index.js.map +1 -0
- package/dist/intent.d.ts +8 -0
- package/dist/intent.js +13 -0
- package/dist/intent.js.map +1 -0
- package/dist/json-extract.d.ts +14 -0
- package/dist/json-extract.js +9 -0
- package/dist/json-extract.js.map +1 -0
- package/dist/json-store.d.ts +5 -0
- package/dist/json-store.js +11 -0
- package/dist/json-store.js.map +1 -0
- package/dist/legacy-hook-compat.d.ts +3 -0
- package/dist/legacy-hook-compat.js +35 -0
- package/dist/legacy-hook-compat.js.map +1 -0
- package/dist/lifecycle.d.ts +52 -0
- package/dist/lifecycle.js +21 -0
- package/dist/lifecycle.js.map +1 -0
- package/dist/local-llm.d.ts +154 -0
- package/dist/local-llm.js +10 -0
- package/dist/local-llm.js.map +1 -0
- package/dist/logger.d.ts +15 -0
- package/dist/logger.js +9 -0
- package/dist/logger.js.map +1 -0
- package/dist/memory-action-policy.d.ts +13 -0
- package/dist/memory-action-policy.js +7 -0
- package/dist/memory-action-policy.js.map +1 -0
- package/dist/memory-cache.d.ts +35 -0
- package/dist/memory-cache.js +37 -0
- package/dist/memory-cache.js.map +1 -0
- package/dist/memory-lifecycle-ledger-utils.d.ts +13 -0
- package/dist/memory-lifecycle-ledger-utils.js +23 -0
- package/dist/memory-lifecycle-ledger-utils.js.map +1 -0
- package/dist/memory-projection-format.d.ts +4 -0
- package/dist/memory-projection-format.js +9 -0
- package/dist/memory-projection-format.js.map +1 -0
- package/dist/memory-projection-store-NxMkbocT.d.ts +221 -0
- package/dist/memory-projection-store.d.ts +3 -0
- package/dist/memory-projection-store.js +31 -0
- package/dist/memory-projection-store.js.map +1 -0
- package/dist/model-registry.d.ts +60 -0
- package/dist/model-registry.js +8 -0
- package/dist/model-registry.js.map +1 -0
- package/dist/native-knowledge.d.ts +94 -0
- package/dist/native-knowledge.js +26 -0
- package/dist/native-knowledge.js.map +1 -0
- package/dist/negative.d.ts +26 -0
- package/dist/negative.js +8 -0
- package/dist/negative.js.map +1 -0
- package/dist/objective-state-writers.d.ts +22 -0
- package/dist/objective-state-writers.js +313 -0
- package/dist/objective-state-writers.js.map +1 -0
- package/dist/objective-state.d.ts +75 -0
- package/dist/objective-state.js +17 -0
- package/dist/objective-state.js.map +1 -0
- package/dist/openai-chat-compat.d.ts +13 -0
- package/dist/openai-chat-compat.js +11 -0
- package/dist/openai-chat-compat.js.map +1 -0
- package/dist/operator-toolkit.d.ts +304 -0
- package/dist/operator-toolkit.js +41 -0
- package/dist/operator-toolkit.js.map +1 -0
- package/dist/opik-exporter.d.ts +72 -0
- package/dist/opik-exporter.js +361 -0
- package/dist/opik-exporter.js.map +1 -0
- package/dist/orchestrator-zTa-Qo-1.d.ts +1104 -0
- package/dist/orchestrator.d.ts +21 -0
- package/dist/orchestrator.js +145 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/policy-runtime.d.ts +37 -0
- package/dist/policy-runtime.js +13 -0
- package/dist/policy-runtime.js.map +1 -0
- package/dist/port-C1GZFv8h.d.ts +41 -0
- package/dist/profiling.d.ts +80 -0
- package/dist/profiling.js +10 -0
- package/dist/profiling.js.map +1 -0
- package/dist/qmd-recall-cache.d.ts +29 -0
- package/dist/qmd-recall-cache.js +13 -0
- package/dist/qmd-recall-cache.js.map +1 -0
- package/dist/qmd.d.ts +105 -0
- package/dist/qmd.js +13 -0
- package/dist/qmd.js.map +1 -0
- package/dist/recall-qos.d.ts +33 -0
- package/dist/recall-qos.js +10 -0
- package/dist/recall-qos.js.map +1 -0
- package/dist/recall-query-policy.d.ts +20 -0
- package/dist/recall-query-policy.js +11 -0
- package/dist/recall-query-policy.js.map +1 -0
- package/dist/recall-state.d.ts +113 -0
- package/dist/recall-state.js +12 -0
- package/dist/recall-state.js.map +1 -0
- package/dist/recall-tokenization.d.ts +4 -0
- package/dist/recall-tokenization.js +9 -0
- package/dist/recall-tokenization.js.map +1 -0
- package/dist/reconstruct.d.ts +16 -0
- package/dist/reconstruct.js +7 -0
- package/dist/reconstruct.js.map +1 -0
- package/dist/release-changelog.d.ts +7 -0
- package/dist/release-changelog.js +30 -0
- package/dist/release-changelog.js.map +1 -0
- package/dist/relevance.d.ts +18 -0
- package/dist/relevance.js +8 -0
- package/dist/relevance.js.map +1 -0
- package/dist/rerank.d.ts +57 -0
- package/dist/rerank.js +11 -0
- package/dist/rerank.js.map +1 -0
- package/dist/resolve-provider-secret.d.ts +16 -0
- package/dist/resolve-provider-secret.js +11 -0
- package/dist/resolve-provider-secret.js.map +1 -0
- package/dist/resume-bundles.d.ts +66 -0
- package/dist/resume-bundles.js +27 -0
- package/dist/resume-bundles.js.map +1 -0
- package/dist/retrieval-agents.d.ts +129 -0
- package/dist/retrieval-agents.js +23 -0
- package/dist/retrieval-agents.js.map +1 -0
- package/dist/retrieval.d.ts +19 -0
- package/dist/retrieval.js +10 -0
- package/dist/retrieval.js.map +1 -0
- package/dist/sanitize.d.ts +9 -0
- package/dist/sanitize.js +9 -0
- package/dist/sanitize.js.map +1 -0
- package/dist/schemas.d.ts +688 -0
- package/dist/schemas.js +51 -0
- package/dist/schemas.js.map +1 -0
- package/dist/sdk-compat.d.ts +21 -0
- package/dist/sdk-compat.js +28 -0
- package/dist/sdk-compat.js.map +1 -0
- package/dist/semantic-consolidation.d.ts +42 -0
- package/dist/semantic-consolidation.js +12 -0
- package/dist/semantic-consolidation.js.map +1 -0
- package/dist/semantic-rule-promotion.d.ts +28 -0
- package/dist/semantic-rule-promotion.js +17 -0
- package/dist/semantic-rule-promotion.js.map +1 -0
- package/dist/semantic-rule-verifier.d.ts +19 -0
- package/dist/semantic-rule-verifier.js +18 -0
- package/dist/semantic-rule-verifier.js.map +1 -0
- package/dist/session-integrity.d.ts +67 -0
- package/dist/session-integrity.js +11 -0
- package/dist/session-integrity.js.map +1 -0
- package/dist/session-observer-bands.d.ts +6 -0
- package/dist/session-observer-bands.js +9 -0
- package/dist/session-observer-bands.js.map +1 -0
- package/dist/session-observer-state.d.ts +40 -0
- package/dist/session-observer-state.js +11 -0
- package/dist/session-observer-state.js.map +1 -0
- package/dist/signal.d.ts +6 -0
- package/dist/signal.js +9 -0
- package/dist/signal.js.map +1 -0
- package/dist/storage.d.ts +453 -0
- package/dist/storage.js +24 -0
- package/dist/storage.js.map +1 -0
- package/dist/store-contract.d.ts +10 -0
- package/dist/store-contract.js +21 -0
- package/dist/store-contract.js.map +1 -0
- package/dist/summarizer.d.ts +35 -0
- package/dist/summarizer.js +17 -0
- package/dist/summarizer.js.map +1 -0
- package/dist/summary-snapshot.d.ts +8 -0
- package/dist/summary-snapshot.js +13 -0
- package/dist/summary-snapshot.js.map +1 -0
- package/dist/temporal-index.d.ts +139 -0
- package/dist/temporal-index.js +29 -0
- package/dist/temporal-index.js.map +1 -0
- package/dist/threading.d.ts +62 -0
- package/dist/threading.js +8 -0
- package/dist/threading.js.map +1 -0
- package/dist/tier-migration.d.ts +44 -0
- package/dist/tier-migration.js +7 -0
- package/dist/tier-migration.js.map +1 -0
- package/dist/tier-routing.d.ts +21 -0
- package/dist/tier-routing.js +10 -0
- package/dist/tier-routing.js.map +1 -0
- package/dist/tmt.d.ts +79 -0
- package/dist/tmt.js +29 -0
- package/dist/tmt.js.map +1 -0
- package/dist/tokens.d.ts +24 -0
- package/dist/tokens.js +21 -0
- package/dist/tokens.js.map +1 -0
- package/dist/topics.d.ts +29 -0
- package/dist/topics.js +9 -0
- package/dist/topics.js.map +1 -0
- package/dist/transcript.d.ts +171 -0
- package/dist/transcript.js +9 -0
- package/dist/transcript.js.map +1 -0
- package/dist/trust-zones.d.ts +170 -0
- package/dist/trust-zones.js +32 -0
- package/dist/trust-zones.js.map +1 -0
- package/dist/types.d.ts +1243 -0
- package/dist/types.js +9 -0
- package/dist/types.js.map +1 -0
- package/dist/utility-learner.d.ts +59 -0
- package/dist/utility-learner.js +17 -0
- package/dist/utility-learner.js.map +1 -0
- package/dist/utility-runtime.d.ts +21 -0
- package/dist/utility-runtime.js +16 -0
- package/dist/utility-runtime.js.map +1 -0
- package/dist/utility-telemetry.d.ts +68 -0
- package/dist/utility-telemetry.js +17 -0
- package/dist/utility-telemetry.js.map +1 -0
- package/dist/verified-recall.d.ts +17 -0
- package/dist/verified-recall.js +19 -0
- package/dist/verified-recall.js.map +1 -0
- package/dist/version-utils.d.ts +4 -0
- package/dist/version-utils.js +7 -0
- package/dist/version-utils.js.map +1 -0
- package/dist/work-product-ledger.d.ts +65 -0
- package/dist/work-product-ledger.js +18 -0
- package/dist/work-product-ledger.js.map +1 -0
- package/package.json +58 -0
|
@@ -0,0 +1,865 @@
|
|
|
1
|
+
import {
|
|
2
|
+
listJsonFiles,
|
|
3
|
+
listNamedFiles,
|
|
4
|
+
readJsonFile
|
|
5
|
+
} from "./chunk-LPSF4OQH.js";
|
|
6
|
+
|
|
7
|
+
// src/evals.ts
|
|
8
|
+
import path from "path";
|
|
9
|
+
import { cp, mkdir, rm, stat, writeFile } from "fs/promises";
|
|
10
|
+
function isRecord(value) {
|
|
11
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
12
|
+
}
|
|
13
|
+
function assertString(value, field) {
|
|
14
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
15
|
+
throw new Error(`${field} must be a non-empty string`);
|
|
16
|
+
}
|
|
17
|
+
return value.trim();
|
|
18
|
+
}
|
|
19
|
+
function optionalStringArray(value, field) {
|
|
20
|
+
if (value === void 0) return void 0;
|
|
21
|
+
if (!Array.isArray(value)) {
|
|
22
|
+
throw new Error(`${field} must be an array of strings`);
|
|
23
|
+
}
|
|
24
|
+
const out = value.filter((item) => typeof item === "string").map((item) => item.trim()).filter((item) => item.length > 0);
|
|
25
|
+
if (out.length !== value.length) {
|
|
26
|
+
throw new Error(`${field} must be an array of non-empty strings`);
|
|
27
|
+
}
|
|
28
|
+
return out;
|
|
29
|
+
}
|
|
30
|
+
function resolveEvalStoreDir(memoryDir, overrideDir) {
|
|
31
|
+
if (typeof overrideDir === "string" && overrideDir.trim().length > 0) {
|
|
32
|
+
return overrideDir.trim();
|
|
33
|
+
}
|
|
34
|
+
return path.join(memoryDir, "state", "evals");
|
|
35
|
+
}
|
|
36
|
+
function assertSafePathSegment(value, field) {
|
|
37
|
+
if (value === "." || value === ".." || value.includes("/") || value.includes("\\")) {
|
|
38
|
+
throw new Error(`${field} must be a safe path segment`);
|
|
39
|
+
}
|
|
40
|
+
return value;
|
|
41
|
+
}
|
|
42
|
+
function assertSafeBenchmarkId(benchmarkId) {
|
|
43
|
+
return assertSafePathSegment(benchmarkId, "benchmarkId");
|
|
44
|
+
}
|
|
45
|
+
function validateEvalBenchmarkManifest(raw, options) {
|
|
46
|
+
if (!isRecord(raw)) throw new Error("benchmark manifest must be an object");
|
|
47
|
+
if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
|
|
48
|
+
if (!Array.isArray(raw.cases)) throw new Error("cases must be an array");
|
|
49
|
+
const benchmarkTypeRaw = typeof raw.benchmarkType === "string" && raw.benchmarkType.trim().length > 0 ? raw.benchmarkType.trim() : "standard";
|
|
50
|
+
if (!["standard", "memory-red-team"].includes(benchmarkTypeRaw)) {
|
|
51
|
+
throw new Error("benchmarkType must be one of standard|memory-red-team");
|
|
52
|
+
}
|
|
53
|
+
const cases = raw.cases.map((item, index) => {
|
|
54
|
+
if (!isRecord(item)) throw new Error(`cases[${index}] must be an object`);
|
|
55
|
+
return {
|
|
56
|
+
id: assertString(item.id, `cases[${index}].id`),
|
|
57
|
+
prompt: assertString(item.prompt, `cases[${index}].prompt`),
|
|
58
|
+
expectedSignals: optionalStringArray(item.expectedSignals, `cases[${index}].expectedSignals`),
|
|
59
|
+
notes: typeof item.notes === "string" && item.notes.trim().length > 0 ? item.notes.trim() : void 0
|
|
60
|
+
};
|
|
61
|
+
});
|
|
62
|
+
const benchmarkType = benchmarkTypeRaw;
|
|
63
|
+
if (benchmarkType === "memory-red-team" && options?.memoryRedTeamBenchEnabled !== true) {
|
|
64
|
+
throw new Error("memory-red-team benchmark packs require memoryRedTeamBenchEnabled");
|
|
65
|
+
}
|
|
66
|
+
const attackClass = typeof raw.attackClass === "string" && raw.attackClass.trim().length > 0 ? raw.attackClass.trim() : void 0;
|
|
67
|
+
const targetSurface = typeof raw.targetSurface === "string" && raw.targetSurface.trim().length > 0 ? raw.targetSurface.trim() : void 0;
|
|
68
|
+
if (benchmarkType === "memory-red-team" && attackClass === void 0) {
|
|
69
|
+
throw new Error("attackClass must be a non-empty string");
|
|
70
|
+
}
|
|
71
|
+
if (benchmarkType === "memory-red-team" && targetSurface === void 0) {
|
|
72
|
+
throw new Error("targetSurface must be a non-empty string");
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
schemaVersion: 1,
|
|
76
|
+
benchmarkId: assertString(raw.benchmarkId, "benchmarkId"),
|
|
77
|
+
benchmarkType,
|
|
78
|
+
title: assertString(raw.title, "title"),
|
|
79
|
+
description: typeof raw.description === "string" && raw.description.trim().length > 0 ? raw.description.trim() : void 0,
|
|
80
|
+
tags: optionalStringArray(raw.tags, "tags"),
|
|
81
|
+
sourceLinks: optionalStringArray(raw.sourceLinks, "sourceLinks"),
|
|
82
|
+
attackClass,
|
|
83
|
+
targetSurface,
|
|
84
|
+
cases
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
function validateEvalRunSummary(raw) {
|
|
88
|
+
if (!isRecord(raw)) throw new Error("eval run summary must be an object");
|
|
89
|
+
if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
|
|
90
|
+
const status = assertString(raw.status, "status");
|
|
91
|
+
if (!["running", "completed", "failed", "partial"].includes(status)) {
|
|
92
|
+
throw new Error("status must be one of running|completed|failed|partial");
|
|
93
|
+
}
|
|
94
|
+
const totalCases = Number(raw.totalCases);
|
|
95
|
+
const passedCases = Number(raw.passedCases);
|
|
96
|
+
const failedCases = Number(raw.failedCases);
|
|
97
|
+
if (!Number.isFinite(totalCases) || totalCases < 0) throw new Error("totalCases must be a non-negative number");
|
|
98
|
+
if (!Number.isFinite(passedCases) || passedCases < 0) throw new Error("passedCases must be a non-negative number");
|
|
99
|
+
if (!Number.isFinite(failedCases) || failedCases < 0) throw new Error("failedCases must be a non-negative number");
|
|
100
|
+
const metrics = parseOptionalEvalRunMetrics(raw.metrics);
|
|
101
|
+
return {
|
|
102
|
+
schemaVersion: 1,
|
|
103
|
+
runId: assertString(raw.runId, "runId"),
|
|
104
|
+
benchmarkId: assertString(raw.benchmarkId, "benchmarkId"),
|
|
105
|
+
status,
|
|
106
|
+
startedAt: assertString(raw.startedAt, "startedAt"),
|
|
107
|
+
completedAt: typeof raw.completedAt === "string" && raw.completedAt.trim().length > 0 ? raw.completedAt.trim() : void 0,
|
|
108
|
+
totalCases,
|
|
109
|
+
passedCases,
|
|
110
|
+
failedCases,
|
|
111
|
+
metrics,
|
|
112
|
+
notes: typeof raw.notes === "string" && raw.notes.trim().length > 0 ? raw.notes.trim() : void 0,
|
|
113
|
+
gitRef: typeof raw.gitRef === "string" && raw.gitRef.trim().length > 0 ? raw.gitRef.trim() : void 0
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
function validateEvalBaselineSnapshot(raw) {
|
|
117
|
+
if (!isRecord(raw)) throw new Error("eval baseline snapshot must be an object");
|
|
118
|
+
if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
|
|
119
|
+
if (!Array.isArray(raw.benchmarks)) throw new Error("benchmarks must be an array");
|
|
120
|
+
const benchmarks = raw.benchmarks.map((item, index) => {
|
|
121
|
+
if (!isRecord(item)) throw new Error(`benchmarks[${index}] must be an object`);
|
|
122
|
+
const passRate = Number(item.passRate);
|
|
123
|
+
if (!Number.isFinite(passRate) || passRate < 0 || passRate > 1) {
|
|
124
|
+
throw new Error(`benchmarks[${index}].passRate must be a number between 0 and 1`);
|
|
125
|
+
}
|
|
126
|
+
const metrics = parseOptionalEvalRunMetrics(item.metrics);
|
|
127
|
+
return {
|
|
128
|
+
benchmarkId: assertString(item.benchmarkId, `benchmarks[${index}].benchmarkId`),
|
|
129
|
+
runId: assertString(item.runId, `benchmarks[${index}].runId`),
|
|
130
|
+
completedAt: typeof item.completedAt === "string" && item.completedAt.trim().length > 0 ? item.completedAt.trim() : void 0,
|
|
131
|
+
gitRef: typeof item.gitRef === "string" && item.gitRef.trim().length > 0 ? item.gitRef.trim() : void 0,
|
|
132
|
+
passRate,
|
|
133
|
+
metrics
|
|
134
|
+
};
|
|
135
|
+
});
|
|
136
|
+
const benchmarkCount = Number(raw.benchmarkCount);
|
|
137
|
+
if (!Number.isFinite(benchmarkCount) || benchmarkCount < 0) {
|
|
138
|
+
throw new Error("benchmarkCount must be a non-negative number");
|
|
139
|
+
}
|
|
140
|
+
if (benchmarkCount !== benchmarks.length) {
|
|
141
|
+
throw new Error("benchmarkCount must match benchmarks.length");
|
|
142
|
+
}
|
|
143
|
+
return {
|
|
144
|
+
schemaVersion: 1,
|
|
145
|
+
snapshotId: assertString(raw.snapshotId, "snapshotId"),
|
|
146
|
+
createdAt: assertString(raw.createdAt, "createdAt"),
|
|
147
|
+
sourceRootDir: assertString(raw.sourceRootDir, "sourceRootDir"),
|
|
148
|
+
benchmarkCount,
|
|
149
|
+
benchmarks,
|
|
150
|
+
notes: typeof raw.notes === "string" && raw.notes.trim().length > 0 ? raw.notes.trim() : void 0,
|
|
151
|
+
gitRef: typeof raw.gitRef === "string" && raw.gitRef.trim().length > 0 ? raw.gitRef.trim() : void 0
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
function parseOptionalEvalRunMetrics(raw) {
|
|
155
|
+
if (!isRecord(raw)) return void 0;
|
|
156
|
+
return {
|
|
157
|
+
recallPrecisionAtK: typeof raw.recallPrecisionAtK === "number" ? raw.recallPrecisionAtK : void 0,
|
|
158
|
+
actionOutcomeScore: typeof raw.actionOutcomeScore === "number" ? raw.actionOutcomeScore : void 0,
|
|
159
|
+
objectiveStateCoverage: typeof raw.objectiveStateCoverage === "number" ? raw.objectiveStateCoverage : void 0,
|
|
160
|
+
causalPathRecall: typeof raw.causalPathRecall === "number" ? raw.causalPathRecall : void 0,
|
|
161
|
+
trustViolationRate: typeof raw.trustViolationRate === "number" ? raw.trustViolationRate : void 0,
|
|
162
|
+
creationRecoveryScore: typeof raw.creationRecoveryScore === "number" ? raw.creationRecoveryScore : void 0
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
function validateEvalShadowRecallRecord(raw) {
|
|
166
|
+
if (!isRecord(raw)) throw new Error("eval shadow recall record must be an object");
|
|
167
|
+
if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
|
|
168
|
+
const recallMode = assertString(raw.recallMode, "recallMode");
|
|
169
|
+
if (!["no_recall", "minimal", "full", "graph_mode"].includes(recallMode)) {
|
|
170
|
+
throw new Error("recallMode must be one of no_recall|minimal|full|graph_mode");
|
|
171
|
+
}
|
|
172
|
+
const source = assertString(raw.source, "source");
|
|
173
|
+
if (!["none", "hot_qmd", "hot_embedding", "cold_fallback", "recent_scan"].includes(source)) {
|
|
174
|
+
throw new Error("source must be one of none|hot_qmd|hot_embedding|cold_fallback|recent_scan");
|
|
175
|
+
}
|
|
176
|
+
const promptLength = Number(raw.promptLength);
|
|
177
|
+
const retrievalQueryLength = Number(raw.retrievalQueryLength);
|
|
178
|
+
const recallResultLimit = Number(raw.recallResultLimit);
|
|
179
|
+
const recalledMemoryCount = Number(raw.recalledMemoryCount);
|
|
180
|
+
const contextChars = Number(raw.contextChars);
|
|
181
|
+
const durationMs = Number(raw.durationMs);
|
|
182
|
+
for (const [field, value] of [
|
|
183
|
+
["promptLength", promptLength],
|
|
184
|
+
["retrievalQueryLength", retrievalQueryLength],
|
|
185
|
+
["recallResultLimit", recallResultLimit],
|
|
186
|
+
["recalledMemoryCount", recalledMemoryCount],
|
|
187
|
+
["contextChars", contextChars],
|
|
188
|
+
["durationMs", durationMs]
|
|
189
|
+
]) {
|
|
190
|
+
if (!Number.isFinite(value) || value < 0) {
|
|
191
|
+
throw new Error(`${field} must be a non-negative number`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
const memoryIds = optionalStringArray(raw.memoryIds, "memoryIds") ?? [];
|
|
195
|
+
if (typeof raw.injected !== "boolean") throw new Error("injected must be a boolean");
|
|
196
|
+
let timings;
|
|
197
|
+
if (raw.timings !== void 0) {
|
|
198
|
+
if (!isRecord(raw.timings)) throw new Error("timings must be an object of strings");
|
|
199
|
+
const out = {};
|
|
200
|
+
for (const [key, value] of Object.entries(raw.timings)) {
|
|
201
|
+
if (typeof value !== "string") throw new Error("timings must be an object of strings");
|
|
202
|
+
out[key] = value;
|
|
203
|
+
}
|
|
204
|
+
timings = out;
|
|
205
|
+
}
|
|
206
|
+
const identityInjectionModeRaw = typeof raw.identityInjectionMode === "string" && raw.identityInjectionMode.trim().length > 0 ? raw.identityInjectionMode.trim() : void 0;
|
|
207
|
+
if (identityInjectionModeRaw !== void 0 && !["recovery_only", "minimal", "full", "none"].includes(identityInjectionModeRaw)) {
|
|
208
|
+
throw new Error("identityInjectionMode must be one of recovery_only|minimal|full|none");
|
|
209
|
+
}
|
|
210
|
+
return {
|
|
211
|
+
schemaVersion: 1,
|
|
212
|
+
traceId: assertString(raw.traceId, "traceId"),
|
|
213
|
+
recordedAt: assertString(raw.recordedAt, "recordedAt"),
|
|
214
|
+
sessionKey: assertString(raw.sessionKey, "sessionKey"),
|
|
215
|
+
promptHash: assertString(raw.promptHash, "promptHash"),
|
|
216
|
+
promptLength,
|
|
217
|
+
retrievalQueryHash: assertString(raw.retrievalQueryHash, "retrievalQueryHash"),
|
|
218
|
+
retrievalQueryLength,
|
|
219
|
+
recallMode,
|
|
220
|
+
recallResultLimit,
|
|
221
|
+
source,
|
|
222
|
+
recalledMemoryCount,
|
|
223
|
+
injected: raw.injected,
|
|
224
|
+
contextChars,
|
|
225
|
+
memoryIds,
|
|
226
|
+
policyVersion: typeof raw.policyVersion === "string" && raw.policyVersion.trim().length > 0 ? raw.policyVersion.trim() : void 0,
|
|
227
|
+
identityInjectionMode: identityInjectionModeRaw,
|
|
228
|
+
identityInjectedChars: typeof raw.identityInjectedChars === "number" && Number.isFinite(raw.identityInjectedChars) ? raw.identityInjectedChars : void 0,
|
|
229
|
+
identityInjectionTruncated: typeof raw.identityInjectionTruncated === "boolean" ? raw.identityInjectionTruncated : void 0,
|
|
230
|
+
durationMs,
|
|
231
|
+
timings
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
var LOWER_IS_BETTER_METRICS = /* @__PURE__ */ new Set(["trustViolationRate"]);
|
|
235
|
+
function computePassRate(run) {
|
|
236
|
+
return run.totalCases > 0 ? run.passedCases / run.totalCases : 0;
|
|
237
|
+
}
|
|
238
|
+
function latestCompletedRunsByBenchmark(runs) {
|
|
239
|
+
const sorted = [...runs].filter((run) => run.status === "completed").sort((a, b) => {
|
|
240
|
+
const aTime = Date.parse(a.completedAt ?? a.startedAt);
|
|
241
|
+
const bTime = Date.parse(b.completedAt ?? b.startedAt);
|
|
242
|
+
return (Number.isNaN(bTime) ? 0 : bTime) - (Number.isNaN(aTime) ? 0 : aTime);
|
|
243
|
+
});
|
|
244
|
+
const out = /* @__PURE__ */ new Map();
|
|
245
|
+
for (const run of sorted) {
|
|
246
|
+
if (!out.has(run.benchmarkId)) {
|
|
247
|
+
out.set(run.benchmarkId, run);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return out;
|
|
251
|
+
}
|
|
252
|
+
function compareMetricDeltas(baseMetrics, candidateMetrics) {
|
|
253
|
+
const deltas = {};
|
|
254
|
+
const regressions = [];
|
|
255
|
+
const improvements = [];
|
|
256
|
+
if (!baseMetrics || !candidateMetrics) {
|
|
257
|
+
return { deltas, regressions, improvements };
|
|
258
|
+
}
|
|
259
|
+
for (const metric of Object.keys(baseMetrics)) {
|
|
260
|
+
const baseValue = baseMetrics[metric];
|
|
261
|
+
const candidateValue = candidateMetrics[metric];
|
|
262
|
+
if (typeof baseValue !== "number" || typeof candidateValue !== "number") continue;
|
|
263
|
+
const delta = candidateValue - baseValue;
|
|
264
|
+
deltas[metric] = delta;
|
|
265
|
+
if (delta === 0) continue;
|
|
266
|
+
const lowerIsBetter = LOWER_IS_BETTER_METRICS.has(metric);
|
|
267
|
+
const improved = lowerIsBetter ? delta < 0 : delta > 0;
|
|
268
|
+
const summary = `${metric} ${baseValue} -> ${candidateValue}`;
|
|
269
|
+
if (improved) {
|
|
270
|
+
improvements.push(summary);
|
|
271
|
+
} else {
|
|
272
|
+
regressions.push(summary);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
return { deltas, regressions, improvements };
|
|
276
|
+
}
|
|
277
|
+
function formatEvalBaselineDeltaMarkdown(report) {
|
|
278
|
+
const lines = [
|
|
279
|
+
"# Eval Baseline Delta Report",
|
|
280
|
+
"",
|
|
281
|
+
`- Passed: ${report.passed ? "yes" : "no"}`,
|
|
282
|
+
`- Baseline snapshot: ${report.baselineSnapshotId}`,
|
|
283
|
+
`- Baseline created: ${report.baselineCreatedAt}`,
|
|
284
|
+
`- Baseline source root: ${report.baselineSourceRootDir}`,
|
|
285
|
+
`- Candidate root: ${report.candidateRootDir}`,
|
|
286
|
+
`- Benchmarks compared: ${report.comparedBenchmarks}`
|
|
287
|
+
];
|
|
288
|
+
if (report.missingCandidateBenchmarks.length > 0) {
|
|
289
|
+
lines.push(`- Missing candidate benchmarks: ${report.missingCandidateBenchmarks.join(", ")}`);
|
|
290
|
+
}
|
|
291
|
+
lines.push(
|
|
292
|
+
`- Invalid candidate artifacts: benchmarks=${report.invalidArtifacts.candidate.benchmarks}, runs=${report.invalidArtifacts.candidate.runs}, shadows=${report.invalidArtifacts.candidate.shadows}, baselines=${report.invalidArtifacts.candidate.baselines}`,
|
|
293
|
+
"",
|
|
294
|
+
"## Regressions"
|
|
295
|
+
);
|
|
296
|
+
if (report.regressions.length === 0) {
|
|
297
|
+
lines.push("- none");
|
|
298
|
+
} else {
|
|
299
|
+
for (const regression of report.regressions) lines.push(`- ${regression}`);
|
|
300
|
+
}
|
|
301
|
+
lines.push("", "## Improvements");
|
|
302
|
+
if (report.improvements.length === 0) {
|
|
303
|
+
lines.push("- none");
|
|
304
|
+
} else {
|
|
305
|
+
for (const improvement of report.improvements) lines.push(`- ${improvement}`);
|
|
306
|
+
}
|
|
307
|
+
lines.push("", "## Benchmark Deltas");
|
|
308
|
+
if (report.deltas.length === 0) {
|
|
309
|
+
lines.push("- none");
|
|
310
|
+
} else {
|
|
311
|
+
for (const delta of report.deltas) {
|
|
312
|
+
lines.push(
|
|
313
|
+
`- ${delta.benchmarkId}: passRate ${delta.basePassRate} -> ${delta.candidatePassRate} (delta ${delta.passRateDelta})`
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
return lines.join("\n");
|
|
318
|
+
}
|
|
319
|
+
async function collectEvalStoreSnapshot(options) {
|
|
320
|
+
const rootDir = options.rootDir;
|
|
321
|
+
const benchmarkDir = path.join(rootDir, "benchmarks");
|
|
322
|
+
const runsDir = path.join(rootDir, "runs");
|
|
323
|
+
const shadowDir = path.join(rootDir, "shadow");
|
|
324
|
+
const baselineDir = path.join(rootDir, "baselines");
|
|
325
|
+
const benchmarkFiles = await listNamedFiles(benchmarkDir, "manifest.json");
|
|
326
|
+
const runFiles = await listJsonFiles(runsDir);
|
|
327
|
+
const shadowFiles = await listJsonFiles(shadowDir);
|
|
328
|
+
const baselineFiles = await listJsonFiles(baselineDir);
|
|
329
|
+
const invalidBenchmarks = [];
|
|
330
|
+
const invalidRuns = [];
|
|
331
|
+
const invalidShadows = [];
|
|
332
|
+
const invalidBaselines = [];
|
|
333
|
+
const manifests = [];
|
|
334
|
+
for (const filePath of benchmarkFiles) {
|
|
335
|
+
try {
|
|
336
|
+
manifests.push(
|
|
337
|
+
validateEvalBenchmarkManifest(await readJsonFile(filePath), {
|
|
338
|
+
memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
|
|
339
|
+
})
|
|
340
|
+
);
|
|
341
|
+
} catch (error) {
|
|
342
|
+
invalidBenchmarks.push({
|
|
343
|
+
path: filePath,
|
|
344
|
+
error: error instanceof Error ? error.message : String(error)
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
const runs = [];
|
|
349
|
+
for (const filePath of runFiles) {
|
|
350
|
+
try {
|
|
351
|
+
runs.push(validateEvalRunSummary(await readJsonFile(filePath)));
|
|
352
|
+
} catch (error) {
|
|
353
|
+
invalidRuns.push({
|
|
354
|
+
path: filePath,
|
|
355
|
+
error: error instanceof Error ? error.message : String(error)
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
const shadows = [];
|
|
360
|
+
for (const filePath of shadowFiles) {
|
|
361
|
+
try {
|
|
362
|
+
shadows.push(validateEvalShadowRecallRecord(await readJsonFile(filePath)));
|
|
363
|
+
} catch (error) {
|
|
364
|
+
invalidShadows.push({
|
|
365
|
+
path: filePath,
|
|
366
|
+
error: error instanceof Error ? error.message : String(error)
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
const baselines = [];
|
|
371
|
+
for (const filePath of baselineFiles) {
|
|
372
|
+
try {
|
|
373
|
+
baselines.push(validateEvalBaselineSnapshot(await readJsonFile(filePath)));
|
|
374
|
+
} catch (error) {
|
|
375
|
+
invalidBaselines.push({
|
|
376
|
+
path: filePath,
|
|
377
|
+
error: error instanceof Error ? error.message : String(error)
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
runs.sort((a, b) => {
|
|
382
|
+
const aTime = Date.parse(a.completedAt ?? a.startedAt);
|
|
383
|
+
const bTime = Date.parse(b.completedAt ?? b.startedAt);
|
|
384
|
+
return (Number.isNaN(bTime) ? 0 : bTime) - (Number.isNaN(aTime) ? 0 : aTime);
|
|
385
|
+
});
|
|
386
|
+
shadows.sort((a, b) => b.recordedAt.localeCompare(a.recordedAt));
|
|
387
|
+
baselines.sort((a, b) => b.createdAt.localeCompare(a.createdAt));
|
|
388
|
+
const tags = /* @__PURE__ */ new Set();
|
|
389
|
+
const attackClasses = /* @__PURE__ */ new Set();
|
|
390
|
+
const sourceLinks = /* @__PURE__ */ new Set();
|
|
391
|
+
const targetSurfaces = /* @__PURE__ */ new Set();
|
|
392
|
+
let totalCases = 0;
|
|
393
|
+
let redTeam = 0;
|
|
394
|
+
for (const manifest of manifests) {
|
|
395
|
+
totalCases += manifest.cases.length;
|
|
396
|
+
if (manifest.benchmarkType === "memory-red-team") {
|
|
397
|
+
redTeam += 1;
|
|
398
|
+
if (manifest.attackClass) attackClasses.add(manifest.attackClass);
|
|
399
|
+
if (manifest.targetSurface) targetSurfaces.add(manifest.targetSurface);
|
|
400
|
+
}
|
|
401
|
+
for (const tag of manifest.tags ?? []) tags.add(tag);
|
|
402
|
+
for (const link of manifest.sourceLinks ?? []) sourceLinks.add(link);
|
|
403
|
+
}
|
|
404
|
+
return {
|
|
405
|
+
status: {
|
|
406
|
+
enabled: options.enabled,
|
|
407
|
+
shadowModeEnabled: options.shadowModeEnabled,
|
|
408
|
+
rootDir,
|
|
409
|
+
benchmarkDir,
|
|
410
|
+
runsDir,
|
|
411
|
+
benchmarks: {
|
|
412
|
+
total: benchmarkFiles.length,
|
|
413
|
+
valid: manifests.length,
|
|
414
|
+
invalid: invalidBenchmarks.length,
|
|
415
|
+
redTeam,
|
|
416
|
+
totalCases,
|
|
417
|
+
attackClasses: [...attackClasses].sort(),
|
|
418
|
+
tags: [...tags].sort(),
|
|
419
|
+
targetSurfaces: [...targetSurfaces].sort(),
|
|
420
|
+
sourceLinks: [...sourceLinks].sort()
|
|
421
|
+
},
|
|
422
|
+
runs: {
|
|
423
|
+
total: runFiles.length,
|
|
424
|
+
invalid: invalidRuns.length,
|
|
425
|
+
completed: runs.filter((run) => run.status === "completed").length,
|
|
426
|
+
failed: runs.filter((run) => run.status === "failed").length,
|
|
427
|
+
partial: runs.filter((run) => run.status === "partial").length,
|
|
428
|
+
running: runs.filter((run) => run.status === "running").length,
|
|
429
|
+
latestRunId: runs[0]?.runId,
|
|
430
|
+
latestBenchmarkId: runs[0]?.benchmarkId,
|
|
431
|
+
latestCompletedAt: runs[0]?.completedAt
|
|
432
|
+
},
|
|
433
|
+
shadows: {
|
|
434
|
+
total: shadowFiles.length,
|
|
435
|
+
invalid: invalidShadows.length,
|
|
436
|
+
latestTraceId: shadows[0]?.traceId,
|
|
437
|
+
latestRecordedAt: shadows[0]?.recordedAt,
|
|
438
|
+
latestSessionKey: shadows[0]?.sessionKey
|
|
439
|
+
},
|
|
440
|
+
baselines: {
|
|
441
|
+
enabled: options.baselineSnapshotsEnabled === true,
|
|
442
|
+
total: baselineFiles.length,
|
|
443
|
+
invalid: invalidBaselines.length,
|
|
444
|
+
latestSnapshotId: baselines[0]?.snapshotId,
|
|
445
|
+
latestCreatedAt: baselines[0]?.createdAt,
|
|
446
|
+
latestBenchmarkCount: baselines[0]?.benchmarkCount
|
|
447
|
+
},
|
|
448
|
+
latestRun: runs[0],
|
|
449
|
+
latestShadow: shadows[0],
|
|
450
|
+
latestBaseline: baselines[0],
|
|
451
|
+
invalidBenchmarks,
|
|
452
|
+
invalidRuns,
|
|
453
|
+
invalidShadows,
|
|
454
|
+
invalidBaselines
|
|
455
|
+
},
|
|
456
|
+
manifests,
|
|
457
|
+
runs,
|
|
458
|
+
shadows,
|
|
459
|
+
baselines
|
|
460
|
+
};
|
|
461
|
+
}
|
|
462
|
+
async function resolveBenchmarkManifestPath(sourcePath) {
|
|
463
|
+
const info = await stat(sourcePath);
|
|
464
|
+
if (info.isDirectory()) {
|
|
465
|
+
return {
|
|
466
|
+
sourceKind: "directory",
|
|
467
|
+
manifestPath: path.join(sourcePath, "manifest.json")
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
if (info.isFile()) {
|
|
471
|
+
return {
|
|
472
|
+
sourceKind: "file",
|
|
473
|
+
manifestPath: sourcePath
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
throw new Error("benchmark pack source must be a file or directory");
|
|
477
|
+
}
|
|
478
|
+
async function validateEvalBenchmarkPack(sourcePath, options) {
|
|
479
|
+
const trimmedSourcePath = sourcePath.trim();
|
|
480
|
+
if (trimmedSourcePath.length === 0) {
|
|
481
|
+
throw new Error("benchmark pack path must be a non-empty string");
|
|
482
|
+
}
|
|
483
|
+
const { manifestPath } = await resolveBenchmarkManifestPath(trimmedSourcePath);
|
|
484
|
+
const manifest = validateEvalBenchmarkManifest(await readJsonFile(manifestPath), {
|
|
485
|
+
memoryRedTeamBenchEnabled: options?.memoryRedTeamBenchEnabled
|
|
486
|
+
});
|
|
487
|
+
return {
|
|
488
|
+
sourcePath: trimmedSourcePath,
|
|
489
|
+
manifestPath,
|
|
490
|
+
benchmarkId: assertSafeBenchmarkId(manifest.benchmarkId),
|
|
491
|
+
benchmarkType: manifest.benchmarkType ?? "standard",
|
|
492
|
+
title: manifest.title,
|
|
493
|
+
attackClass: manifest.attackClass,
|
|
494
|
+
targetSurface: manifest.targetSurface,
|
|
495
|
+
totalCases: manifest.cases.length,
|
|
496
|
+
tags: [...manifest.tags ?? []],
|
|
497
|
+
sourceLinks: [...manifest.sourceLinks ?? []]
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
async function importEvalBenchmarkPack(options) {
|
|
501
|
+
const summary = await validateEvalBenchmarkPack(options.sourcePath, {
|
|
502
|
+
memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
|
|
503
|
+
});
|
|
504
|
+
const rootDir = resolveEvalStoreDir(options.memoryDir, options.evalStoreDir);
|
|
505
|
+
const benchmarkDir = path.join(rootDir, "benchmarks");
|
|
506
|
+
const targetDir = path.join(benchmarkDir, summary.benchmarkId);
|
|
507
|
+
const { sourceKind, manifestPath } = await resolveBenchmarkManifestPath(summary.sourcePath);
|
|
508
|
+
let overwritten = false;
|
|
509
|
+
try {
|
|
510
|
+
await stat(targetDir);
|
|
511
|
+
if (options.force !== true) {
|
|
512
|
+
throw new Error(`benchmark pack already exists at ${targetDir}; rerun with force to replace it`);
|
|
513
|
+
}
|
|
514
|
+
overwritten = true;
|
|
515
|
+
await rm(targetDir, { recursive: true, force: true });
|
|
516
|
+
} catch (error) {
|
|
517
|
+
if (!(error instanceof Error) || !("code" in error) || error.code !== "ENOENT") {
|
|
518
|
+
throw error;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
await mkdir(benchmarkDir, { recursive: true });
|
|
522
|
+
if (sourceKind === "directory") {
|
|
523
|
+
await cp(summary.sourcePath, targetDir, { recursive: true });
|
|
524
|
+
} else {
|
|
525
|
+
await mkdir(targetDir, { recursive: true });
|
|
526
|
+
await cp(manifestPath, path.join(targetDir, "manifest.json"));
|
|
527
|
+
}
|
|
528
|
+
return {
|
|
529
|
+
...summary,
|
|
530
|
+
targetDir,
|
|
531
|
+
overwritten
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
async function recordEvalShadowRecall(options) {
|
|
535
|
+
const rootDir = resolveEvalStoreDir(options.memoryDir, options.evalStoreDir);
|
|
536
|
+
const validated = validateEvalShadowRecallRecord(options.record);
|
|
537
|
+
const day = validated.recordedAt.slice(0, 10);
|
|
538
|
+
const shadowDir = path.join(rootDir, "shadow", day);
|
|
539
|
+
const targetPath = path.join(shadowDir, `${validated.traceId}.json`);
|
|
540
|
+
await mkdir(shadowDir, { recursive: true });
|
|
541
|
+
await writeFile(targetPath, JSON.stringify(validated, null, 2), "utf-8");
|
|
542
|
+
return targetPath;
|
|
543
|
+
}
|
|
544
|
+
async function getEvalHarnessStatus(options) {
|
|
545
|
+
return (await collectEvalStoreSnapshot({
|
|
546
|
+
rootDir: resolveEvalStoreDir(options.memoryDir, options.evalStoreDir),
|
|
547
|
+
enabled: options.enabled,
|
|
548
|
+
shadowModeEnabled: options.shadowModeEnabled,
|
|
549
|
+
baselineSnapshotsEnabled: options.baselineSnapshotsEnabled,
|
|
550
|
+
memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
|
|
551
|
+
})).status;
|
|
552
|
+
}
|
|
553
|
+
async function createEvalBaselineSnapshot(options) {
|
|
554
|
+
if (options.baselineSnapshotsEnabled !== true) {
|
|
555
|
+
throw new Error("benchmark baseline snapshots are disabled");
|
|
556
|
+
}
|
|
557
|
+
const snapshotId = assertSafePathSegment(assertString(options.snapshotId, "snapshotId"), "snapshotId");
|
|
558
|
+
const rootDir = resolveEvalStoreDir(options.memoryDir, options.evalStoreDir);
|
|
559
|
+
const store = await collectEvalStoreSnapshot({
|
|
560
|
+
rootDir,
|
|
561
|
+
enabled: true,
|
|
562
|
+
shadowModeEnabled: true,
|
|
563
|
+
baselineSnapshotsEnabled: true,
|
|
564
|
+
memoryRedTeamBenchEnabled: true
|
|
565
|
+
});
|
|
566
|
+
const latestRuns = latestCompletedRunsByBenchmark(store.runs);
|
|
567
|
+
const benchmarks = [...latestRuns.values()].sort((a, b) => a.benchmarkId.localeCompare(b.benchmarkId)).map((run) => ({
|
|
568
|
+
benchmarkId: run.benchmarkId,
|
|
569
|
+
runId: run.runId,
|
|
570
|
+
completedAt: run.completedAt,
|
|
571
|
+
gitRef: run.gitRef,
|
|
572
|
+
passRate: computePassRate(run),
|
|
573
|
+
metrics: run.metrics
|
|
574
|
+
}));
|
|
575
|
+
const snapshot = validateEvalBaselineSnapshot({
|
|
576
|
+
schemaVersion: 1,
|
|
577
|
+
snapshotId,
|
|
578
|
+
createdAt: options.createdAt ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
579
|
+
sourceRootDir: rootDir,
|
|
580
|
+
benchmarkCount: benchmarks.length,
|
|
581
|
+
benchmarks,
|
|
582
|
+
notes: options.notes,
|
|
583
|
+
gitRef: options.gitRef
|
|
584
|
+
});
|
|
585
|
+
const targetPath = path.join(rootDir, "baselines", `${snapshot.snapshotId}.json`);
|
|
586
|
+
await mkdir(path.dirname(targetPath), { recursive: true });
|
|
587
|
+
await writeFile(targetPath, JSON.stringify(snapshot, null, 2), "utf-8");
|
|
588
|
+
return { targetPath, snapshot };
|
|
589
|
+
}
|
|
590
|
+
async function runEvalBaselineDeltaReport(options) {
|
|
591
|
+
if (options.benchmarkDeltaReporterEnabled !== true) {
|
|
592
|
+
throw new Error("benchmark delta reporter is disabled");
|
|
593
|
+
}
|
|
594
|
+
const snapshotId = assertSafePathSegment(assertString(options.snapshotId, "snapshotId"), "snapshotId");
|
|
595
|
+
const candidateRootDir = resolveEvalStoreDir(options.memoryDir, options.evalStoreDir);
|
|
596
|
+
const candidateSnapshot = await collectEvalStoreSnapshot({
|
|
597
|
+
rootDir: candidateRootDir,
|
|
598
|
+
enabled: true,
|
|
599
|
+
shadowModeEnabled: true,
|
|
600
|
+
baselineSnapshotsEnabled: true,
|
|
601
|
+
memoryRedTeamBenchEnabled: true
|
|
602
|
+
});
|
|
603
|
+
const baselineSnapshot = candidateSnapshot.baselines.find((snapshot) => snapshot.snapshotId === snapshotId);
|
|
604
|
+
if (!baselineSnapshot) {
|
|
605
|
+
throw new Error(`benchmark baseline snapshot not found: ${snapshotId}`);
|
|
606
|
+
}
|
|
607
|
+
return buildEvalBaselineDeltaReport({
|
|
608
|
+
baselineSnapshot,
|
|
609
|
+
candidateSnapshot
|
|
610
|
+
});
|
|
611
|
+
}
|
|
612
|
+
async function runEvalStoredBaselineCiGate(options) {
|
|
613
|
+
const snapshotId = assertSafePathSegment(assertString(options.snapshotId, "snapshotId"), "snapshotId");
|
|
614
|
+
const baseRootDir = resolveEvalStoreDir(options.baseMemoryDir ?? options.baseEvalStoreDir, options.baseEvalStoreDir);
|
|
615
|
+
const candidateRootDir = resolveEvalStoreDir(
|
|
616
|
+
options.candidateMemoryDir ?? options.candidateEvalStoreDir,
|
|
617
|
+
options.candidateEvalStoreDir
|
|
618
|
+
);
|
|
619
|
+
const [baseSnapshot, candidateSnapshot] = await Promise.all([
|
|
620
|
+
collectEvalStoreSnapshot({
|
|
621
|
+
rootDir: baseRootDir,
|
|
622
|
+
enabled: true,
|
|
623
|
+
shadowModeEnabled: true,
|
|
624
|
+
baselineSnapshotsEnabled: true,
|
|
625
|
+
memoryRedTeamBenchEnabled: true
|
|
626
|
+
}),
|
|
627
|
+
collectEvalStoreSnapshot({
|
|
628
|
+
rootDir: candidateRootDir,
|
|
629
|
+
enabled: true,
|
|
630
|
+
shadowModeEnabled: true,
|
|
631
|
+
baselineSnapshotsEnabled: true,
|
|
632
|
+
memoryRedTeamBenchEnabled: true
|
|
633
|
+
})
|
|
634
|
+
]);
|
|
635
|
+
const baselineSnapshot = baseSnapshot.baselines.find((snapshot) => snapshot.snapshotId === snapshotId) ?? candidateSnapshot.baselines.find((snapshot) => snapshot.snapshotId === snapshotId);
|
|
636
|
+
if (!baselineSnapshot) {
|
|
637
|
+
throw new Error(`benchmark baseline snapshot not found: ${snapshotId}`);
|
|
638
|
+
}
|
|
639
|
+
return {
|
|
640
|
+
baseRootDir,
|
|
641
|
+
baselineResolvedFrom: baseSnapshot.baselines.some((snapshot) => snapshot.snapshotId === snapshotId) ? "base" : "candidate",
|
|
642
|
+
...buildEvalBaselineDeltaReport({
|
|
643
|
+
baselineSnapshot,
|
|
644
|
+
candidateSnapshot
|
|
645
|
+
})
|
|
646
|
+
};
|
|
647
|
+
}
|
|
648
|
+
function buildEvalBaselineDeltaReport(options) {
|
|
649
|
+
const { baselineSnapshot, candidateSnapshot } = options;
|
|
650
|
+
const regressions = [];
|
|
651
|
+
const improvements = [];
|
|
652
|
+
if (candidateSnapshot.status.invalidBenchmarks.length > 0) {
|
|
653
|
+
regressions.push(`candidate store has ${candidateSnapshot.status.invalidBenchmarks.length} invalid benchmark manifest(s)`);
|
|
654
|
+
}
|
|
655
|
+
if (candidateSnapshot.status.invalidRuns.length > 0) {
|
|
656
|
+
regressions.push(`candidate store has ${candidateSnapshot.status.invalidRuns.length} invalid run summary file(s)`);
|
|
657
|
+
}
|
|
658
|
+
if (candidateSnapshot.status.invalidShadows.length > 0) {
|
|
659
|
+
regressions.push(`candidate store has ${candidateSnapshot.status.invalidShadows.length} invalid shadow record(s)`);
|
|
660
|
+
}
|
|
661
|
+
if (candidateSnapshot.status.invalidBaselines.length > 0) {
|
|
662
|
+
regressions.push(`candidate store has ${candidateSnapshot.status.invalidBaselines.length} invalid baseline snapshot file(s)`);
|
|
663
|
+
}
|
|
664
|
+
const candidateRuns = latestCompletedRunsByBenchmark(candidateSnapshot.runs);
|
|
665
|
+
const baselineBenchmarks = new Map(
|
|
666
|
+
baselineSnapshot.benchmarks.map((benchmark) => [benchmark.benchmarkId, benchmark])
|
|
667
|
+
);
|
|
668
|
+
const missingCandidateBenchmarks = [...baselineBenchmarks.keys()].filter((benchmarkId) => !candidateRuns.has(benchmarkId)).sort();
|
|
669
|
+
for (const benchmarkId of missingCandidateBenchmarks) {
|
|
670
|
+
regressions.push(`candidate is missing latest completed benchmark run for ${benchmarkId}`);
|
|
671
|
+
}
|
|
672
|
+
const deltas = [];
|
|
673
|
+
for (const benchmarkId of [...baselineBenchmarks.keys()].sort()) {
|
|
674
|
+
const baseBenchmark = baselineBenchmarks.get(benchmarkId);
|
|
675
|
+
const candidateRun = candidateRuns.get(benchmarkId);
|
|
676
|
+
if (!baseBenchmark || !candidateRun) continue;
|
|
677
|
+
const passRateDelta = computePassRate(candidateRun) - baseBenchmark.passRate;
|
|
678
|
+
const delta = {
|
|
679
|
+
benchmarkId,
|
|
680
|
+
baseRunId: baseBenchmark.runId,
|
|
681
|
+
candidateRunId: candidateRun.runId,
|
|
682
|
+
basePassRate: baseBenchmark.passRate,
|
|
683
|
+
candidatePassRate: computePassRate(candidateRun),
|
|
684
|
+
passRateDelta,
|
|
685
|
+
metricDeltas: {},
|
|
686
|
+
regressions: [],
|
|
687
|
+
improvements: []
|
|
688
|
+
};
|
|
689
|
+
if (passRateDelta < 0) {
|
|
690
|
+
delta.regressions.push(`passRate ${baseBenchmark.passRate} -> ${delta.candidatePassRate}`);
|
|
691
|
+
regressions.push(`${benchmarkId} pass rate regressed (${baseBenchmark.passRate} -> ${delta.candidatePassRate})`);
|
|
692
|
+
} else if (passRateDelta > 0) {
|
|
693
|
+
delta.improvements.push(`passRate ${baseBenchmark.passRate} -> ${delta.candidatePassRate}`);
|
|
694
|
+
improvements.push(`${benchmarkId} pass rate improved (${baseBenchmark.passRate} -> ${delta.candidatePassRate})`);
|
|
695
|
+
}
|
|
696
|
+
const metricDelta = compareMetricDeltas(baseBenchmark.metrics, candidateRun.metrics);
|
|
697
|
+
delta.metricDeltas = metricDelta.deltas;
|
|
698
|
+
for (const regression of metricDelta.regressions) {
|
|
699
|
+
delta.regressions.push(regression);
|
|
700
|
+
regressions.push(`${benchmarkId} ${regression}`);
|
|
701
|
+
}
|
|
702
|
+
for (const improvement of metricDelta.improvements) {
|
|
703
|
+
delta.improvements.push(improvement);
|
|
704
|
+
improvements.push(`${benchmarkId} ${improvement}`);
|
|
705
|
+
}
|
|
706
|
+
deltas.push(delta);
|
|
707
|
+
}
|
|
708
|
+
const report = {
|
|
709
|
+
passed: regressions.length === 0,
|
|
710
|
+
baselineSnapshotId: baselineSnapshot.snapshotId,
|
|
711
|
+
baselineCreatedAt: baselineSnapshot.createdAt,
|
|
712
|
+
baselineSourceRootDir: baselineSnapshot.sourceRootDir,
|
|
713
|
+
candidateRootDir: candidateSnapshot.status.rootDir,
|
|
714
|
+
comparedBenchmarks: deltas.length,
|
|
715
|
+
missingCandidateBenchmarks,
|
|
716
|
+
invalidArtifacts: {
|
|
717
|
+
candidate: {
|
|
718
|
+
benchmarks: candidateSnapshot.status.invalidBenchmarks.length,
|
|
719
|
+
runs: candidateSnapshot.status.invalidRuns.length,
|
|
720
|
+
shadows: candidateSnapshot.status.invalidShadows.length,
|
|
721
|
+
baselines: candidateSnapshot.status.invalidBaselines.length
|
|
722
|
+
}
|
|
723
|
+
},
|
|
724
|
+
regressions,
|
|
725
|
+
improvements,
|
|
726
|
+
deltas,
|
|
727
|
+
markdownReport: ""
|
|
728
|
+
};
|
|
729
|
+
report.markdownReport = formatEvalBaselineDeltaMarkdown(report);
|
|
730
|
+
return report;
|
|
731
|
+
}
|
|
732
|
+
function resolveRequiredEvalStoreRoot(options, label) {
|
|
733
|
+
if (typeof options.evalStoreDir === "string" && options.evalStoreDir.trim().length > 0) {
|
|
734
|
+
return options.evalStoreDir.trim();
|
|
735
|
+
}
|
|
736
|
+
if (typeof options.memoryDir === "string" && options.memoryDir.trim().length > 0) {
|
|
737
|
+
return resolveEvalStoreDir(options.memoryDir.trim());
|
|
738
|
+
}
|
|
739
|
+
throw new Error(`${label} requires memoryDir or evalStoreDir`);
|
|
740
|
+
}
|
|
741
|
+
async function runEvalBenchmarkCiGate(options) {
|
|
742
|
+
const baseRootDir = resolveRequiredEvalStoreRoot(
|
|
743
|
+
{ memoryDir: options.baseMemoryDir, evalStoreDir: options.baseEvalStoreDir },
|
|
744
|
+
"base"
|
|
745
|
+
);
|
|
746
|
+
const candidateRootDir = resolveRequiredEvalStoreRoot(
|
|
747
|
+
{ memoryDir: options.candidateMemoryDir, evalStoreDir: options.candidateEvalStoreDir },
|
|
748
|
+
"candidate"
|
|
749
|
+
);
|
|
750
|
+
const baseSnapshot = await collectEvalStoreSnapshot({
|
|
751
|
+
rootDir: baseRootDir,
|
|
752
|
+
enabled: true,
|
|
753
|
+
shadowModeEnabled: true,
|
|
754
|
+
memoryRedTeamBenchEnabled: true
|
|
755
|
+
});
|
|
756
|
+
const candidateSnapshot = await collectEvalStoreSnapshot({
|
|
757
|
+
rootDir: candidateRootDir,
|
|
758
|
+
enabled: true,
|
|
759
|
+
shadowModeEnabled: true,
|
|
760
|
+
memoryRedTeamBenchEnabled: true
|
|
761
|
+
});
|
|
762
|
+
const regressions = [];
|
|
763
|
+
const improvements = [];
|
|
764
|
+
if (baseSnapshot.status.invalidBenchmarks.length > 0) {
|
|
765
|
+
regressions.push(`base store has ${baseSnapshot.status.invalidBenchmarks.length} invalid benchmark manifest(s)`);
|
|
766
|
+
}
|
|
767
|
+
if (baseSnapshot.status.invalidRuns.length > 0) {
|
|
768
|
+
regressions.push(`base store has ${baseSnapshot.status.invalidRuns.length} invalid run summary file(s)`);
|
|
769
|
+
}
|
|
770
|
+
if (baseSnapshot.status.invalidShadows.length > 0) {
|
|
771
|
+
regressions.push(`base store has ${baseSnapshot.status.invalidShadows.length} invalid shadow record(s)`);
|
|
772
|
+
}
|
|
773
|
+
if (candidateSnapshot.status.invalidBenchmarks.length > 0) {
|
|
774
|
+
regressions.push(`candidate store has ${candidateSnapshot.status.invalidBenchmarks.length} invalid benchmark manifest(s)`);
|
|
775
|
+
}
|
|
776
|
+
if (candidateSnapshot.status.invalidRuns.length > 0) {
|
|
777
|
+
regressions.push(`candidate store has ${candidateSnapshot.status.invalidRuns.length} invalid run summary file(s)`);
|
|
778
|
+
}
|
|
779
|
+
if (candidateSnapshot.status.invalidShadows.length > 0) {
|
|
780
|
+
regressions.push(`candidate store has ${candidateSnapshot.status.invalidShadows.length} invalid shadow record(s)`);
|
|
781
|
+
}
|
|
782
|
+
const baseRuns = latestCompletedRunsByBenchmark(baseSnapshot.runs);
|
|
783
|
+
const candidateRuns = latestCompletedRunsByBenchmark(candidateSnapshot.runs);
|
|
784
|
+
const missingCandidateBenchmarks = [...baseRuns.keys()].filter((benchmarkId) => !candidateRuns.has(benchmarkId)).sort();
|
|
785
|
+
for (const benchmarkId of missingCandidateBenchmarks) {
|
|
786
|
+
regressions.push(`candidate is missing latest completed benchmark run for ${benchmarkId}`);
|
|
787
|
+
}
|
|
788
|
+
const deltas = [];
|
|
789
|
+
for (const benchmarkId of [...baseRuns.keys()].sort()) {
|
|
790
|
+
const baseRun = baseRuns.get(benchmarkId);
|
|
791
|
+
const candidateRun = candidateRuns.get(benchmarkId);
|
|
792
|
+
if (!baseRun || !candidateRun) continue;
|
|
793
|
+
const basePassRate = computePassRate(baseRun);
|
|
794
|
+
const candidatePassRate = computePassRate(candidateRun);
|
|
795
|
+
const passRateDelta = candidatePassRate - basePassRate;
|
|
796
|
+
const delta = {
|
|
797
|
+
benchmarkId,
|
|
798
|
+
baseRunId: baseRun.runId,
|
|
799
|
+
candidateRunId: candidateRun.runId,
|
|
800
|
+
basePassRate,
|
|
801
|
+
candidatePassRate,
|
|
802
|
+
passRateDelta,
|
|
803
|
+
metricDeltas: {},
|
|
804
|
+
regressions: [],
|
|
805
|
+
improvements: []
|
|
806
|
+
};
|
|
807
|
+
if (passRateDelta < 0) {
|
|
808
|
+
delta.regressions.push(`passRate ${basePassRate} -> ${candidatePassRate}`);
|
|
809
|
+
regressions.push(`${benchmarkId} pass rate regressed (${basePassRate} -> ${candidatePassRate})`);
|
|
810
|
+
} else if (passRateDelta > 0) {
|
|
811
|
+
delta.improvements.push(`passRate ${basePassRate} -> ${candidatePassRate}`);
|
|
812
|
+
improvements.push(`${benchmarkId} pass rate improved (${basePassRate} -> ${candidatePassRate})`);
|
|
813
|
+
}
|
|
814
|
+
const metricDelta = compareMetricDeltas(baseRun.metrics, candidateRun.metrics);
|
|
815
|
+
delta.metricDeltas = metricDelta.deltas;
|
|
816
|
+
for (const regression of metricDelta.regressions) {
|
|
817
|
+
delta.regressions.push(regression);
|
|
818
|
+
regressions.push(`${benchmarkId} ${regression}`);
|
|
819
|
+
}
|
|
820
|
+
for (const improvement of metricDelta.improvements) {
|
|
821
|
+
delta.improvements.push(improvement);
|
|
822
|
+
improvements.push(`${benchmarkId} ${improvement}`);
|
|
823
|
+
}
|
|
824
|
+
deltas.push(delta);
|
|
825
|
+
}
|
|
826
|
+
return {
|
|
827
|
+
passed: regressions.length === 0,
|
|
828
|
+
baseRootDir: baseSnapshot.status.rootDir,
|
|
829
|
+
candidateRootDir: candidateSnapshot.status.rootDir,
|
|
830
|
+
comparedBenchmarks: deltas.length,
|
|
831
|
+
missingCandidateBenchmarks,
|
|
832
|
+
invalidArtifacts: {
|
|
833
|
+
base: {
|
|
834
|
+
benchmarks: baseSnapshot.status.invalidBenchmarks.length,
|
|
835
|
+
runs: baseSnapshot.status.invalidRuns.length,
|
|
836
|
+
shadows: baseSnapshot.status.invalidShadows.length
|
|
837
|
+
},
|
|
838
|
+
candidate: {
|
|
839
|
+
benchmarks: candidateSnapshot.status.invalidBenchmarks.length,
|
|
840
|
+
runs: candidateSnapshot.status.invalidRuns.length,
|
|
841
|
+
shadows: candidateSnapshot.status.invalidShadows.length
|
|
842
|
+
}
|
|
843
|
+
},
|
|
844
|
+
regressions,
|
|
845
|
+
improvements,
|
|
846
|
+
deltas
|
|
847
|
+
};
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
export {
|
|
851
|
+
resolveEvalStoreDir,
|
|
852
|
+
validateEvalBenchmarkManifest,
|
|
853
|
+
validateEvalRunSummary,
|
|
854
|
+
validateEvalBaselineSnapshot,
|
|
855
|
+
validateEvalShadowRecallRecord,
|
|
856
|
+
validateEvalBenchmarkPack,
|
|
857
|
+
importEvalBenchmarkPack,
|
|
858
|
+
recordEvalShadowRecall,
|
|
859
|
+
getEvalHarnessStatus,
|
|
860
|
+
createEvalBaselineSnapshot,
|
|
861
|
+
runEvalBaselineDeltaReport,
|
|
862
|
+
runEvalStoredBaselineCiGate,
|
|
863
|
+
runEvalBenchmarkCiGate
|
|
864
|
+
};
|
|
865
|
+
//# sourceMappingURL=chunk-K6WK37A6.js.map
|