@remnic/core 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-audit.d.ts +56 -0
- package/dist/access-audit.js +9 -0
- package/dist/access-cli.js +70 -53
- package/dist/access-cli.js.map +1 -1
- package/dist/access-http.d.ts +16 -9
- package/dist/access-http.js +26 -18
- package/dist/access-mcp.d.ts +16 -9
- package/dist/access-mcp.js +30 -8
- package/dist/access-schema.d.ts +124 -33
- package/dist/access-schema.js +5 -1
- package/dist/{access-service-HmO1Trrx.d.ts → access-service-Br8ZydTK.d.ts} +158 -63
- package/dist/access-service.d.ts +13 -6
- package/dist/access-service.js +23 -14
- package/dist/bootstrap.d.ts +6 -3
- package/dist/briefing.d.ts +1 -0
- package/dist/briefing.js +8 -6
- package/dist/buffer-surprise-report.d.ts +70 -0
- package/dist/buffer-surprise-report.js +7 -0
- package/dist/buffer-surprise-report.js.map +1 -0
- package/dist/buffer-surprise.d.ts +98 -0
- package/dist/buffer-surprise.js +11 -0
- package/dist/buffer-surprise.js.map +1 -0
- package/dist/buffer.d.ts +100 -2
- package/dist/buffer.js +1 -1
- package/dist/calibration.js +6 -6
- package/dist/causal-behavior.js +4 -4
- package/dist/causal-chain.js +2 -2
- package/dist/causal-consolidation.js +19 -18
- package/dist/causal-consolidation.js.map +1 -1
- package/dist/causal-retrieval.js +4 -4
- package/dist/causal-trajectory.js +1 -1
- package/dist/{chunk-QNJMBKFK.js → chunk-2LGMW3DJ.js} +3 -2
- package/dist/chunk-2LGMW3DJ.js.map +1 -0
- package/dist/{chunk-QDYXG4CS.js → chunk-3FPTCC3Z.js} +4 -3
- package/dist/chunk-3FPTCC3Z.js.map +1 -0
- package/dist/chunk-3GPTTA4J.js +57 -0
- package/dist/chunk-3GPTTA4J.js.map +1 -0
- package/dist/{chunk-ITRLGI2T.js → chunk-3OGMS3PE.js} +2 -2
- package/dist/{chunk-DEPL3635.js → chunk-3YGHKTBF.js} +1446 -196
- package/dist/chunk-3YGHKTBF.js.map +1 -0
- package/dist/{chunk-BLKTA7MM.js → chunk-4HQS2HPX.js} +54 -21
- package/dist/chunk-4HQS2HPX.js.map +1 -0
- package/dist/chunk-54V4BZWP.js +139 -0
- package/dist/chunk-54V4BZWP.js.map +1 -0
- package/dist/chunk-5JRF2PZA.js +67 -0
- package/dist/chunk-5JRF2PZA.js.map +1 -0
- package/dist/chunk-64NJRYU2.js +332 -0
- package/dist/chunk-64NJRYU2.js.map +1 -0
- package/dist/{chunk-OIT5QGG4.js → chunk-6AUUAZEX.js} +72 -2
- package/dist/chunk-6AUUAZEX.js.map +1 -0
- package/dist/{chunk-3QHL5ABG.js → chunk-6YJHX2DL.js} +191 -10
- package/dist/chunk-6YJHX2DL.js.map +1 -0
- package/dist/chunk-AJU4PJGY.js +126 -0
- package/dist/chunk-AJU4PJGY.js.map +1 -0
- package/dist/chunk-ASAITVLA.js +64 -0
- package/dist/chunk-ASAITVLA.js.map +1 -0
- package/dist/{chunk-44ICJRF3.js → chunk-AYXIPSZO.js} +5 -5
- package/dist/{chunk-MBJHSA7F.js → chunk-BECYBZLX.js} +265 -20
- package/dist/chunk-BECYBZLX.js.map +1 -0
- package/dist/chunk-C4SQJZAF.js +486 -0
- package/dist/chunk-C4SQJZAF.js.map +1 -0
- package/dist/{chunk-6UJ47TVX.js → chunk-CUPFXL3J.js} +2 -2
- package/dist/chunk-DF3RVK3X.js +119 -0
- package/dist/chunk-DF3RVK3X.js.map +1 -0
- package/dist/{chunk-N42IWANG.js → chunk-DG6YMRDC.js} +3 -3
- package/dist/chunk-DGVM5SFL.js +69 -0
- package/dist/chunk-DGVM5SFL.js.map +1 -0
- package/dist/{chunk-3SV6CQHO.js → chunk-DIXB44VE.js} +102 -66
- package/dist/chunk-DIXB44VE.js.map +1 -0
- package/dist/chunk-EIR5VLIH.js +90 -0
- package/dist/chunk-EIR5VLIH.js.map +1 -0
- package/dist/{chunk-GV6NLQ4X.js → chunk-F5VP6YCB.js} +374 -16
- package/dist/chunk-F5VP6YCB.js.map +1 -0
- package/dist/{chunk-6ZH4TU6I.js → chunk-FAAFWE4G.js} +2 -1
- package/dist/chunk-FAAFWE4G.js.map +1 -0
- package/dist/{chunk-7WQ6SLIE.js → chunk-FVA6TGI3.js} +2 -2
- package/dist/{chunk-PAORGQRI.js → chunk-GA5P7RST.js} +37 -23
- package/dist/chunk-GA5P7RST.js.map +1 -0
- package/dist/chunk-GDFS42HT.js +206 -0
- package/dist/chunk-GDFS42HT.js.map +1 -0
- package/dist/chunk-IISBCCWR.js +52 -0
- package/dist/chunk-IISBCCWR.js.map +1 -0
- package/dist/chunk-JBMSGZEQ.js +441 -0
- package/dist/chunk-JBMSGZEQ.js.map +1 -0
- package/dist/{chunk-J4IYOZZ5.js → chunk-JXS5PDQ7.js} +3 -1
- package/dist/chunk-JXS5PDQ7.js.map +1 -0
- package/dist/chunk-KVBLZUKV.js +173 -0
- package/dist/chunk-KVBLZUKV.js.map +1 -0
- package/dist/{chunk-4LACOVZX.js → chunk-L7IXWRYE.js} +10 -5
- package/dist/chunk-L7IXWRYE.js.map +1 -0
- package/dist/chunk-LBLXEFWK.js +51 -0
- package/dist/chunk-LBLXEFWK.js.map +1 -0
- package/dist/{chunk-WBSAYXVI.js → chunk-LOIMBRDE.js} +201 -45
- package/dist/chunk-LOIMBRDE.js.map +1 -0
- package/dist/{chunk-3WHVNEN7.js → chunk-LTCGGW2D.js} +1 -1
- package/dist/chunk-LTCGGW2D.js.map +1 -0
- package/dist/{chunk-ZVBB3T7V.js → chunk-NBVAS5MT.js} +25 -23
- package/dist/chunk-NBVAS5MT.js.map +1 -0
- package/dist/{chunk-UEYA6UC7.js → chunk-NZLQTHS5.js} +25 -2
- package/dist/chunk-NZLQTHS5.js.map +1 -0
- package/dist/{chunk-NQEVYWX6.js → chunk-OC5OXUQ4.js} +211 -7
- package/dist/chunk-OC5OXUQ4.js.map +1 -0
- package/dist/{chunk-LK6SGL53.js → chunk-OR64ZGRZ.js} +3 -2
- package/dist/chunk-OR64ZGRZ.js.map +1 -0
- package/dist/{chunk-SYUK3VLY.js → chunk-PVICZTKG.js} +117 -5
- package/dist/chunk-PVICZTKG.js.map +1 -0
- package/dist/chunk-PVPWZSSI.js +37 -0
- package/dist/chunk-PVPWZSSI.js.map +1 -0
- package/dist/{chunk-JL2PU6AI.js → chunk-R2XRID2N.js} +2 -2
- package/dist/{chunk-4NRAJUDS.js → chunk-RBBWYEFJ.js} +1 -1
- package/dist/chunk-RFYAYKTD.js +146 -0
- package/dist/chunk-RFYAYKTD.js.map +1 -0
- package/dist/chunk-SOBJ6NEY.js +18 -0
- package/dist/chunk-SOBJ6NEY.js.map +1 -0
- package/dist/{chunk-JIU55F3X.js → chunk-SPI27QT6.js} +2 -2
- package/dist/{chunk-MVTHXUBX.js → chunk-STGWEHYR.js} +479 -20
- package/dist/chunk-STGWEHYR.js.map +1 -0
- package/dist/{chunk-6LX5ORAS.js → chunk-TMYO7B5P.js} +4 -4
- package/dist/chunk-TVVEYCNW.js +65 -0
- package/dist/chunk-TVVEYCNW.js.map +1 -0
- package/dist/chunk-ULYOGL6R.js +322 -0
- package/dist/chunk-ULYOGL6R.js.map +1 -0
- package/dist/{chunk-37UIFYWO.js → chunk-UWB5LMWY.js} +108 -9
- package/dist/chunk-UWB5LMWY.js.map +1 -0
- package/dist/{chunk-47UU5PU2.js → chunk-VBVG2M5G.js} +18 -3
- package/dist/chunk-VBVG2M5G.js.map +1 -0
- package/dist/{chunk-7ECD5ATE.js → chunk-VDX363PS.js} +2 -2
- package/dist/{chunk-O5ETUNBT.js → chunk-VTU2B4VF.js} +7 -3
- package/dist/chunk-VTU2B4VF.js.map +1 -0
- package/dist/{chunk-MTLYEMJB.js → chunk-WCLICCGB.js} +18 -3
- package/dist/chunk-WCLICCGB.js.map +1 -0
- package/dist/chunk-X6GF3FX2.js +26 -0
- package/dist/chunk-X6GF3FX2.js.map +1 -0
- package/dist/{chunk-3QFQGRHO.js → chunk-XMHBH5H6.js} +4 -4
- package/dist/{chunk-DHHP2Z4X.js → chunk-XXVWLXSG.js} +2 -2
- package/dist/{chunk-XZ2TIKGC.js → chunk-Y7R2XJ5Q.js} +25 -9
- package/dist/chunk-Y7R2XJ5Q.js.map +1 -0
- package/dist/{chunk-ALXMCZEU.js → chunk-Z2E7VW55.js} +6 -3
- package/dist/chunk-Z2E7VW55.js.map +1 -0
- package/dist/chunk-ZAIM4TUE.js +488 -0
- package/dist/chunk-ZAIM4TUE.js.map +1 -0
- package/dist/chunk-ZZTOURJI.js +91 -0
- package/dist/chunk-ZZTOURJI.js.map +1 -0
- package/dist/{cli-BneVIEvh.d.ts → cli-BkeRaYfk.d.ts} +2 -2
- package/dist/cli.d.ts +13 -6
- package/dist/cli.js +42 -31
- package/dist/config.js +2 -2
- package/dist/consolidation-operator.d.ts +41 -0
- package/dist/consolidation-operator.js +11 -0
- package/dist/consolidation-operator.js.map +1 -0
- package/dist/consolidation-provenance-check.d.ts +68 -0
- package/dist/consolidation-provenance-check.js +9 -0
- package/dist/consolidation-provenance-check.js.map +1 -0
- package/dist/consolidation-undo.d.ts +123 -0
- package/dist/consolidation-undo.js +426 -0
- package/dist/consolidation-undo.js.map +1 -0
- package/dist/{contradiction-scan-GR33PONM.js → contradiction-scan-E3GJTI4F.js} +43 -7
- package/dist/contradiction-scan-E3GJTI4F.js.map +1 -0
- package/dist/cross-namespace-budget.d.ts +133 -0
- package/dist/cross-namespace-budget.js +9 -0
- package/dist/cross-namespace-budget.js.map +1 -0
- package/dist/direct-answer-wiring.js +5 -70
- package/dist/direct-answer-wiring.js.map +1 -1
- package/dist/embedding-fallback.js +2 -1
- package/dist/{engine-5TIQBYZR.js → engine-72LSIWQP.js} +8 -7
- package/dist/engine-72LSIWQP.js.map +1 -0
- package/dist/entity-retrieval.d.ts +1 -0
- package/dist/entity-retrieval.js +7 -6
- package/dist/explicit-capture.d.ts +6 -3
- package/dist/explicit-capture.js +2 -2
- package/dist/extraction-judge-telemetry.d.ts +113 -0
- package/dist/extraction-judge-telemetry.js +14 -0
- package/dist/extraction-judge-telemetry.js.map +1 -0
- package/dist/extraction-judge-training.d.ts +85 -0
- package/dist/extraction-judge-training.js +16 -0
- package/dist/extraction-judge-training.js.map +1 -0
- package/dist/extraction-judge.d.ts +124 -2
- package/dist/extraction-judge.js +11 -1
- package/dist/extraction.js +10 -9
- package/dist/fallback-llm.js +3 -3
- package/dist/graph-recall.d.ts +100 -0
- package/dist/graph-recall.js +8 -0
- package/dist/graph-recall.js.map +1 -0
- package/dist/graph-retrieval.d.ts +271 -0
- package/dist/graph-retrieval.js +21 -0
- package/dist/graph-retrieval.js.map +1 -0
- package/dist/importance.js +1 -1
- package/dist/index.d.ts +585 -20
- package/dist/index.js +542 -344
- package/dist/index.js.map +1 -1
- package/dist/local-llm.js +2 -2
- package/dist/memory-worth-bench.d.ts +51 -0
- package/dist/memory-worth-bench.js +131 -0
- package/dist/memory-worth-bench.js.map +1 -0
- package/dist/memory-worth-filter.d.ts +128 -0
- package/dist/memory-worth-filter.js +10 -0
- package/dist/memory-worth-filter.js.map +1 -0
- package/dist/memory-worth-outcomes.d.ts +118 -0
- package/dist/memory-worth-outcomes.js +9 -0
- package/dist/memory-worth-outcomes.js.map +1 -0
- package/dist/memory-worth.d.ts +102 -0
- package/dist/memory-worth.js +7 -0
- package/dist/memory-worth.js.map +1 -0
- package/dist/operator-toolkit.d.ts +40 -1
- package/dist/operator-toolkit.js +25 -16
- package/dist/{orchestrator-DRYA6_lW.d.ts → orchestrator-CmJ-NTdJ.d.ts} +233 -8
- package/dist/orchestrator.d.ts +6 -3
- package/dist/orchestrator.js +54 -44
- package/dist/page-versioning.d.ts +12 -1
- package/dist/page-versioning.js +5 -3
- package/dist/{port-C1GZFv8h.d.ts → port-BADbLZU5.d.ts} +2 -2
- package/dist/qmd-recall-cache.d.ts +1 -1
- package/dist/qmd.d.ts +5 -3
- package/dist/qmd.js +3 -3
- package/dist/reasoning-trace-recall.d.ts +90 -0
- package/dist/reasoning-trace-recall.js +13 -0
- package/dist/reasoning-trace-recall.js.map +1 -0
- package/dist/reasoning-trace-types.d.ts +54 -0
- package/dist/reasoning-trace-types.js +17 -0
- package/dist/reasoning-trace-types.js.map +1 -0
- package/dist/recall-audit-anomaly.d.ts +112 -0
- package/dist/recall-audit-anomaly.js +11 -0
- package/dist/recall-audit-anomaly.js.map +1 -0
- package/dist/recall-audit.js +5 -44
- package/dist/recall-audit.js.map +1 -1
- package/dist/recall-explain-renderer.d.ts +49 -0
- package/dist/recall-explain-renderer.js +18 -0
- package/dist/recall-explain-renderer.js.map +1 -0
- package/dist/recall-state.d.ts +12 -1
- package/dist/recall-state.js +1 -1
- package/dist/recall-xray-cli.d.ts +40 -0
- package/dist/recall-xray-cli.js +11 -0
- package/dist/recall-xray-cli.js.map +1 -0
- package/dist/recall-xray-renderer.d.ts +44 -0
- package/dist/recall-xray-renderer.js +18 -0
- package/dist/recall-xray-renderer.js.map +1 -0
- package/dist/recall-xray.d.ts +179 -0
- package/dist/recall-xray.js +13 -0
- package/dist/recall-xray.js.map +1 -0
- package/dist/resolve-provider-secret.d.ts +5 -1
- package/dist/resolve-provider-secret.js +3 -1
- package/dist/resume-bundles.js +6 -6
- package/dist/retrieval-agents.d.ts +1 -1
- package/dist/retrieval-tiers.d.ts +17 -0
- package/dist/retrieval-tiers.js +9 -0
- package/dist/retrieval-tiers.js.map +1 -0
- package/dist/schemas.d.ts +309 -53
- package/dist/schemas.js +1 -1
- package/dist/{semantic-consolidation-DrvSYRdB.d.ts → semantic-consolidation-CxJU6MJk.d.ts} +62 -1
- package/dist/semantic-consolidation.d.ts +2 -1
- package/dist/semantic-consolidation.js +22 -7
- package/dist/semantic-rule-promotion.js +7 -6
- package/dist/semantic-rule-verifier.js +7 -6
- package/dist/storage.d.ts +82 -1
- package/dist/storage.js +6 -5
- package/dist/summarizer.js +6 -6
- package/dist/temporal-supersession.d.ts +1 -0
- package/dist/tier-migration.d.ts +2 -1
- package/dist/tokens.js +2 -1
- package/dist/types.d.ts +276 -2
- package/dist/types.js +1 -1
- package/dist/verified-recall.js +7 -6
- package/package.json +1 -1
- package/dist/chunk-37UIFYWO.js.map +0 -1
- package/dist/chunk-3QHL5ABG.js.map +0 -1
- package/dist/chunk-3SV6CQHO.js.map +0 -1
- package/dist/chunk-3WHVNEN7.js.map +0 -1
- package/dist/chunk-47UU5PU2.js.map +0 -1
- package/dist/chunk-4LACOVZX.js.map +0 -1
- package/dist/chunk-6ZH4TU6I.js.map +0 -1
- package/dist/chunk-ALXMCZEU.js.map +0 -1
- package/dist/chunk-BLKTA7MM.js.map +0 -1
- package/dist/chunk-DEPL3635.js.map +0 -1
- package/dist/chunk-GV6NLQ4X.js.map +0 -1
- package/dist/chunk-J4IYOZZ5.js.map +0 -1
- package/dist/chunk-LAYN4LDC.js +0 -267
- package/dist/chunk-LAYN4LDC.js.map +0 -1
- package/dist/chunk-LK6SGL53.js.map +0 -1
- package/dist/chunk-MBJHSA7F.js.map +0 -1
- package/dist/chunk-MTLYEMJB.js.map +0 -1
- package/dist/chunk-MVTHXUBX.js.map +0 -1
- package/dist/chunk-NQEVYWX6.js.map +0 -1
- package/dist/chunk-O5ETUNBT.js.map +0 -1
- package/dist/chunk-OIT5QGG4.js.map +0 -1
- package/dist/chunk-PAORGQRI.js.map +0 -1
- package/dist/chunk-QDYXG4CS.js.map +0 -1
- package/dist/chunk-QNJMBKFK.js.map +0 -1
- package/dist/chunk-SYUK3VLY.js.map +0 -1
- package/dist/chunk-UEYA6UC7.js.map +0 -1
- package/dist/chunk-UVJFDP7P.js +0 -202
- package/dist/chunk-UVJFDP7P.js.map +0 -1
- package/dist/chunk-WBSAYXVI.js.map +0 -1
- package/dist/chunk-XZ2TIKGC.js.map +0 -1
- package/dist/chunk-ZVBB3T7V.js.map +0 -1
- package/dist/contradiction-scan-GR33PONM.js.map +0 -1
- /package/dist/{engine-5TIQBYZR.js.map → access-audit.js.map} +0 -0
- /package/dist/{chunk-ITRLGI2T.js.map → chunk-3OGMS3PE.js.map} +0 -0
- /package/dist/{chunk-44ICJRF3.js.map → chunk-AYXIPSZO.js.map} +0 -0
- /package/dist/{chunk-6UJ47TVX.js.map → chunk-CUPFXL3J.js.map} +0 -0
- /package/dist/{chunk-N42IWANG.js.map → chunk-DG6YMRDC.js.map} +0 -0
- /package/dist/{chunk-7WQ6SLIE.js.map → chunk-FVA6TGI3.js.map} +0 -0
- /package/dist/{chunk-JL2PU6AI.js.map → chunk-R2XRID2N.js.map} +0 -0
- /package/dist/{chunk-4NRAJUDS.js.map → chunk-RBBWYEFJ.js.map} +0 -0
- /package/dist/{chunk-JIU55F3X.js.map → chunk-SPI27QT6.js.map} +0 -0
- /package/dist/{chunk-6LX5ORAS.js.map → chunk-TMYO7B5P.js.map} +0 -0
- /package/dist/{chunk-7ECD5ATE.js.map → chunk-VDX363PS.js.map} +0 -0
- /package/dist/{chunk-3QFQGRHO.js.map → chunk-XMHBH5H6.js.map} +0 -0
- /package/dist/{chunk-DHHP2Z4X.js.map → chunk-XXVWLXSG.js.map} +0 -0
package/dist/local-llm.js
CHANGED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Issue #560 PR 5 — Memory Worth recall filter benchmark.
|
|
3
|
+
*
|
|
4
|
+
* Self-contained precision benchmark for `applyMemoryWorthFilter`. Seeds a
|
|
5
|
+
* synthetic corpus where a small subset of memories have known-bad outcome
|
|
6
|
+
* history, then compares top-K precision with the filter off vs. on.
|
|
7
|
+
*
|
|
8
|
+
* Why a dedicated in-package file (rather than the full `@remnic/bench`
|
|
9
|
+
* harness): the filter is a pure function over candidate scores and counter
|
|
10
|
+
* data; it doesn't need QMD, the orchestrator, or the schema-tier fixtures.
|
|
11
|
+
* Running it as a plain `tsx` script keeps the signal tight — any drift in
|
|
12
|
+
* the scorer's math shows up as a precision delta here, no integration
|
|
13
|
+
* wiring required.
|
|
14
|
+
*
|
|
15
|
+
* The `runMemoryWorthBench()` export is the programmatic entry point;
|
|
16
|
+
* `runMemoryWorthBenchCli()` is what `tsx` calls when this file is executed
|
|
17
|
+
* directly. Both return (or print) a structured result so CI can gate on
|
|
18
|
+
* it if we later want to.
|
|
19
|
+
*
|
|
20
|
+
* Verdict for PR 5: run the bench once, confirm filter-on ≥ filter-off on
|
|
21
|
+
* precision@K across every seed, and only then flip the default to `true`.
|
|
22
|
+
*/
|
|
23
|
+
interface MemoryWorthBenchResult {
|
|
24
|
+
cases: number;
|
|
25
|
+
k: number;
|
|
26
|
+
/** Mean precision@K with the filter disabled. */
|
|
27
|
+
precisionAtK_off: number;
|
|
28
|
+
/** Mean precision@K with the filter enabled. */
|
|
29
|
+
precisionAtK_on: number;
|
|
30
|
+
/** `on - off`; positive means filter helps, zero means tied. */
|
|
31
|
+
delta: number;
|
|
32
|
+
/** Filter-on wins at least as often as it loses, case-by-case. */
|
|
33
|
+
filterWinsOrTies: boolean;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Run the benchmark over N synthetic cases using a fixed PRNG seed. Returns
|
|
37
|
+
* aggregate precision numbers + a boolean verdict.
|
|
38
|
+
*/
|
|
39
|
+
declare function runMemoryWorthBench(options?: {
|
|
40
|
+
cases?: number;
|
|
41
|
+
seed?: number;
|
|
42
|
+
now?: Date;
|
|
43
|
+
}): MemoryWorthBenchResult;
|
|
44
|
+
/**
|
|
45
|
+
* CLI entry point — run the bench and print a structured result. Exits
|
|
46
|
+
* non-zero if the filter ever loses to the no-filter baseline (so CI can
|
|
47
|
+
* gate on this in the future).
|
|
48
|
+
*/
|
|
49
|
+
declare function runMemoryWorthBenchCli(): void;
|
|
50
|
+
|
|
51
|
+
export { type MemoryWorthBenchResult, runMemoryWorthBench, runMemoryWorthBenchCli };
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import {
|
|
2
|
+
applyMemoryWorthFilter
|
|
3
|
+
} from "./chunk-3GPTTA4J.js";
|
|
4
|
+
import "./chunk-IISBCCWR.js";
|
|
5
|
+
|
|
6
|
+
// src/memory-worth-bench.ts
|
|
7
|
+
function mulberry32(seed) {
|
|
8
|
+
let t = seed >>> 0;
|
|
9
|
+
return () => {
|
|
10
|
+
t = t + 1831565813 >>> 0;
|
|
11
|
+
let r = t;
|
|
12
|
+
r = Math.imul(r ^ r >>> 15, r | 1);
|
|
13
|
+
r ^= r + Math.imul(r ^ r >>> 7, r | 61);
|
|
14
|
+
return ((r ^ r >>> 14) >>> 0) / 4294967296;
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
function buildCase(caseIndex, rng) {
|
|
18
|
+
const candidates = [];
|
|
19
|
+
for (let i = 0; i < 2; i += 1) {
|
|
20
|
+
candidates.push({
|
|
21
|
+
path: `case-${caseIndex}-trap-${i}.md`,
|
|
22
|
+
baseScore: 0.95 - i * 0.02,
|
|
23
|
+
isRelevant: false,
|
|
24
|
+
counters: { mw_success: 0, mw_fail: 10 }
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
for (let i = 0; i < 3; i += 1) {
|
|
28
|
+
candidates.push({
|
|
29
|
+
path: `case-${caseIndex}-noise-high-${i}.md`,
|
|
30
|
+
baseScore: 0.9 - i * 0.02,
|
|
31
|
+
isRelevant: false
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
for (let i = 0; i < 3; i += 1) {
|
|
35
|
+
candidates.push({
|
|
36
|
+
path: `case-${caseIndex}-gold-${i}.md`,
|
|
37
|
+
baseScore: 0.7 - i * 0.05,
|
|
38
|
+
isRelevant: true,
|
|
39
|
+
counters: { mw_success: 10, mw_fail: 0 }
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
for (let i = 0; i < 12; i += 1) {
|
|
43
|
+
candidates.push({
|
|
44
|
+
path: `case-${caseIndex}-noise-low-${i}.md`,
|
|
45
|
+
baseScore: rng() * 0.5,
|
|
46
|
+
isRelevant: false
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
for (let i = candidates.length - 1; i > 0; i -= 1) {
|
|
50
|
+
const j = Math.floor(rng() * (i + 1));
|
|
51
|
+
[candidates[i], candidates[j]] = [candidates[j], candidates[i]];
|
|
52
|
+
}
|
|
53
|
+
return { id: `case-${caseIndex}`, candidates, k: 5 };
|
|
54
|
+
}
|
|
55
|
+
function computePrecisionAtK(orderedPaths, relevant, k) {
|
|
56
|
+
const topK = orderedPaths.slice(0, k);
|
|
57
|
+
if (topK.length === 0) return 0;
|
|
58
|
+
let hits = 0;
|
|
59
|
+
for (const p of topK) if (relevant.has(p)) hits += 1;
|
|
60
|
+
return hits / Math.min(k, topK.length);
|
|
61
|
+
}
|
|
62
|
+
function runMemoryWorthBench(options) {
|
|
63
|
+
const requestedCases = options?.cases ?? 50;
|
|
64
|
+
if (!Number.isFinite(requestedCases) || !Number.isInteger(requestedCases) || requestedCases < 1) {
|
|
65
|
+
throw new Error(
|
|
66
|
+
`runMemoryWorthBench: cases must be a positive integer; got ${requestedCases}`
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
const numCases = requestedCases;
|
|
70
|
+
const rng = mulberry32(options?.seed ?? 3735928559);
|
|
71
|
+
const now = options?.now ?? /* @__PURE__ */ new Date("2026-01-01T00:00:00.000Z");
|
|
72
|
+
let sumOff = 0;
|
|
73
|
+
let sumOn = 0;
|
|
74
|
+
let onWinsOrTies = 0;
|
|
75
|
+
for (let i = 0; i < numCases; i += 1) {
|
|
76
|
+
const c = buildCase(i, rng);
|
|
77
|
+
const relevant = new Set(
|
|
78
|
+
c.candidates.filter((x) => x.isRelevant).map((x) => x.path)
|
|
79
|
+
);
|
|
80
|
+
const off = [...c.candidates].sort((a, b) => b.baseScore - a.baseScore).map((x) => x.path);
|
|
81
|
+
const pOff = computePrecisionAtK(off, relevant, c.k);
|
|
82
|
+
const counters = /* @__PURE__ */ new Map();
|
|
83
|
+
for (const cand of c.candidates) {
|
|
84
|
+
if (cand.counters) counters.set(cand.path, cand.counters);
|
|
85
|
+
}
|
|
86
|
+
const filtered = applyMemoryWorthFilter(
|
|
87
|
+
c.candidates.map((x) => ({ path: x.path, score: x.baseScore })),
|
|
88
|
+
{ counters, now }
|
|
89
|
+
);
|
|
90
|
+
const on = filtered.map((x) => x.path);
|
|
91
|
+
const pOn = computePrecisionAtK(on, relevant, c.k);
|
|
92
|
+
sumOff += pOff;
|
|
93
|
+
sumOn += pOn;
|
|
94
|
+
if (pOn >= pOff) onWinsOrTies += 1;
|
|
95
|
+
}
|
|
96
|
+
const avgOff = sumOff / numCases;
|
|
97
|
+
const avgOn = sumOn / numCases;
|
|
98
|
+
return {
|
|
99
|
+
cases: numCases,
|
|
100
|
+
k: 5,
|
|
101
|
+
precisionAtK_off: avgOff,
|
|
102
|
+
precisionAtK_on: avgOn,
|
|
103
|
+
delta: avgOn - avgOff,
|
|
104
|
+
filterWinsOrTies: onWinsOrTies === numCases
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
function runMemoryWorthBenchCli() {
|
|
108
|
+
const result = runMemoryWorthBench();
|
|
109
|
+
console.log(JSON.stringify(result, null, 2));
|
|
110
|
+
if (!result.filterWinsOrTies) {
|
|
111
|
+
console.error("memory-worth bench: filter lost to no-filter baseline on at least one case");
|
|
112
|
+
process.exit(1);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (process.argv[1]) {
|
|
116
|
+
try {
|
|
117
|
+
const { pathToFileURL } = await import("url");
|
|
118
|
+
if (import.meta.url === pathToFileURL(process.argv[1]).href) {
|
|
119
|
+
runMemoryWorthBenchCli();
|
|
120
|
+
}
|
|
121
|
+
} catch {
|
|
122
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
123
|
+
runMemoryWorthBenchCli();
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
export {
|
|
128
|
+
runMemoryWorthBench,
|
|
129
|
+
runMemoryWorthBenchCli
|
|
130
|
+
};
|
|
131
|
+
//# sourceMappingURL=memory-worth-bench.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/memory-worth-bench.ts"],"sourcesContent":["/**\n * Issue #560 PR 5 — Memory Worth recall filter benchmark.\n *\n * Self-contained precision benchmark for `applyMemoryWorthFilter`. Seeds a\n * synthetic corpus where a small subset of memories have known-bad outcome\n * history, then compares top-K precision with the filter off vs. on.\n *\n * Why a dedicated in-package file (rather than the full `@remnic/bench`\n * harness): the filter is a pure function over candidate scores and counter\n * data; it doesn't need QMD, the orchestrator, or the schema-tier fixtures.\n * Running it as a plain `tsx` script keeps the signal tight — any drift in\n * the scorer's math shows up as a precision delta here, no integration\n * wiring required.\n *\n * The `runMemoryWorthBench()` export is the programmatic entry point;\n * `runMemoryWorthBenchCli()` is what `tsx` calls when this file is executed\n * directly. Both return (or print) a structured result so CI can gate on\n * it if we later want to.\n *\n * Verdict for PR 5: run the bench once, confirm filter-on ≥ filter-off on\n * precision@K across every seed, and only then flip the default to `true`.\n */\n\nimport {\n applyMemoryWorthFilter,\n type MemoryWorthCounters,\n} from \"./memory-worth-filter.js\";\n\n/**\n * One synthetic query + candidate pool + relevance labels.\n *\n * Candidates are scored by the pretend retrieval tier (`baseScore`); the\n * ground-truth relevance is `isRelevant`; `counters` seeds each candidate's\n * outcome history. Some \"bad\" candidates have baseline scores just above\n * the \"good\" candidates — the filter should be able to demote them.\n */\ninterface BenchCase {\n id: string;\n candidates: {\n path: string;\n baseScore: number;\n isRelevant: boolean;\n counters?: MemoryWorthCounters;\n }[];\n /** Top-K used for precision@K. */\n k: number;\n}\n\n/**\n * Deterministic pseudo-random number generator (mulberry32) so the bench\n * produces identical results across runs, making precision changes easy to\n * attribute to code rather than seed drift.\n */\nfunction mulberry32(seed: number): () => number {\n let t = seed >>> 0;\n return () => {\n t = (t + 0x6d2b79f5) >>> 0;\n let r = t;\n r = Math.imul(r ^ (r >>> 15), r | 1);\n r ^= r + Math.imul(r ^ (r >>> 7), r | 61);\n return ((r ^ (r >>> 14)) >>> 0) / 4294967296;\n };\n}\n\n/**\n * Generate a synthetic test case.\n *\n * Corpus design:\n * - 20 candidates per case.\n * - Top 5 by base score include 2 \"traps\" — memories that the retriever\n * ranks highly but whose outcome history is 10/0 failures. These should\n * be sunk by the filter.\n * - 3 lower-ranked memories are genuinely relevant with 10/0 success\n * history. The filter should promote them into the top 5.\n * - Remaining 15 are noise at the neutral prior (no counter data).\n *\n * `k = 5`, so the ideal precision@K is 3/5 = 0.6 (three genuinely relevant\n * items, after filter promotion). Without the filter, precision@5 is 0/5\n * because the top 5 by base score are the 2 traps + 3 irrelevant neutral\n * items.\n */\nfunction buildCase(caseIndex: number, rng: () => number): BenchCase {\n const candidates: BenchCase[\"candidates\"] = [];\n // 2 high-ranked traps: high base score, bad outcome history, NOT relevant.\n for (let i = 0; i < 2; i += 1) {\n candidates.push({\n path: `case-${caseIndex}-trap-${i}.md`,\n baseScore: 0.95 - i * 0.02,\n isRelevant: false,\n counters: { mw_success: 0, mw_fail: 10 },\n });\n }\n // 3 high-ranked irrelevant neutral items.\n for (let i = 0; i < 3; i += 1) {\n candidates.push({\n path: `case-${caseIndex}-noise-high-${i}.md`,\n baseScore: 0.9 - i * 0.02,\n isRelevant: false,\n });\n }\n // 3 lower-ranked TRUE POSITIVES with strong success history — the filter\n // must float these into the top 5.\n for (let i = 0; i < 3; i += 1) {\n candidates.push({\n path: `case-${caseIndex}-gold-${i}.md`,\n baseScore: 0.7 - i * 0.05,\n isRelevant: true,\n counters: { mw_success: 10, mw_fail: 0 },\n });\n }\n // 12 irrelevant noise candidates at random lower scores.\n for (let i = 0; i < 12; i += 1) {\n candidates.push({\n path: `case-${caseIndex}-noise-low-${i}.md`,\n baseScore: rng() * 0.5,\n isRelevant: false,\n });\n }\n // Shuffle to remove any input-order bias.\n for (let i = candidates.length - 1; i > 0; i -= 1) {\n const j = Math.floor(rng() * (i + 1));\n [candidates[i], candidates[j]] = [candidates[j]!, candidates[i]!];\n }\n return { id: `case-${caseIndex}`, candidates, k: 5 };\n}\n\nfunction computePrecisionAtK(\n orderedPaths: string[],\n relevant: Set<string>,\n k: number,\n): number {\n const topK = orderedPaths.slice(0, k);\n if (topK.length === 0) return 0;\n let hits = 0;\n for (const p of topK) if (relevant.has(p)) hits += 1;\n return hits / Math.min(k, topK.length);\n}\n\nexport interface MemoryWorthBenchResult {\n cases: number;\n k: number;\n /** Mean precision@K with the filter disabled. */\n precisionAtK_off: number;\n /** Mean precision@K with the filter enabled. */\n precisionAtK_on: number;\n /** `on - off`; positive means filter helps, zero means tied. */\n delta: number;\n /** Filter-on wins at least as often as it loses, case-by-case. */\n filterWinsOrTies: boolean;\n}\n\n/**\n * Run the benchmark over N synthetic cases using a fixed PRNG seed. Returns\n * aggregate precision numbers + a boolean verdict.\n */\nexport function runMemoryWorthBench(options?: {\n cases?: number;\n seed?: number;\n now?: Date;\n}): MemoryWorthBenchResult {\n const requestedCases = options?.cases ?? 50;\n // Reject non-positive-integer case counts. Passing 0 would divide by zero\n // and produce a NaN precision that the `filterWinsOrTies` boolean would\n // still mark as `true` — dangerously misleading since this result is used\n // to justify the default flip. Fractional values would inflate precision\n // because the loop rounds up (Array.from ceil) but the average divides\n // by the fractional input.\n if (\n !Number.isFinite(requestedCases) ||\n !Number.isInteger(requestedCases) ||\n requestedCases < 1\n ) {\n throw new Error(\n `runMemoryWorthBench: cases must be a positive integer; got ${requestedCases}`,\n );\n }\n const numCases = requestedCases;\n const rng = mulberry32(options?.seed ?? 0xdeadbeef);\n const now = options?.now ?? new Date(\"2026-01-01T00:00:00.000Z\");\n\n let sumOff = 0;\n let sumOn = 0;\n let onWinsOrTies = 0;\n\n for (let i = 0; i < numCases; i += 1) {\n const c = buildCase(i, rng);\n const relevant = new Set(\n c.candidates.filter((x) => x.isRelevant).map((x) => x.path),\n );\n\n // Filter OFF: sort by baseScore descending.\n const off = [...c.candidates]\n .sort((a, b) => b.baseScore - a.baseScore)\n .map((x) => x.path);\n const pOff = computePrecisionAtK(off, relevant, c.k);\n\n // Filter ON: build counter map and apply the filter.\n const counters = new Map<string, MemoryWorthCounters>();\n for (const cand of c.candidates) {\n if (cand.counters) counters.set(cand.path, cand.counters);\n }\n const filtered = applyMemoryWorthFilter(\n c.candidates.map((x) => ({ path: x.path, score: x.baseScore })),\n { counters, now },\n );\n const on = filtered.map((x) => x.path);\n const pOn = computePrecisionAtK(on, relevant, c.k);\n\n sumOff += pOff;\n sumOn += pOn;\n if (pOn >= pOff) onWinsOrTies += 1;\n }\n\n const avgOff = sumOff / numCases;\n const avgOn = sumOn / numCases;\n return {\n cases: numCases,\n k: 5,\n precisionAtK_off: avgOff,\n precisionAtK_on: avgOn,\n delta: avgOn - avgOff,\n filterWinsOrTies: onWinsOrTies === numCases,\n };\n}\n\n/**\n * CLI entry point — run the bench and print a structured result. Exits\n * non-zero if the filter ever loses to the no-filter baseline (so CI can\n * gate on this in the future).\n */\nexport function runMemoryWorthBenchCli(): void {\n const result = runMemoryWorthBench();\n // eslint-disable-next-line no-console\n console.log(JSON.stringify(result, null, 2));\n if (!result.filterWinsOrTies) {\n // eslint-disable-next-line no-console\n console.error(\"memory-worth bench: filter lost to no-filter baseline on at least one case\");\n process.exit(1);\n }\n}\n\n// When this file is invoked directly (e.g. `tsx memory-worth-bench.ts`),\n// run the CLI. Use url.pathToFileURL to produce a normalized file:// URL\n// from `process.argv[1]` — that handles Windows drive letters,\n// URL-encoded characters (spaces, etc.), and symlinked entrypoints. A\n// naïve `file://${process.argv[1]}` comparison fails on all three and\n// would silently skip runMemoryWorthBenchCli() on those platforms.\nif (process.argv[1]) {\n try {\n // Lazy import so this file can still be loaded in environments that\n // don't have node:url (browsers, Deno test runners in some modes).\n const { pathToFileURL } = await import(\"node:url\");\n if (import.meta.url === pathToFileURL(process.argv[1]).href) {\n runMemoryWorthBenchCli();\n }\n } catch {\n // If the normalization fails for any reason, fall back to the naive\n // comparison. Worse case: the bench doesn't auto-run; callers can\n // always invoke `runMemoryWorthBenchCli()` or `runMemoryWorthBench()`\n // explicitly.\n if (import.meta.url === `file://${process.argv[1]}`) {\n runMemoryWorthBenchCli();\n }\n }\n}\n"],"mappings":";;;;;;AAqDA,SAAS,WAAW,MAA4B;AAC9C,MAAI,IAAI,SAAS;AACjB,SAAO,MAAM;AACX,QAAK,IAAI,eAAgB;AACzB,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;AAmBA,SAAS,UAAU,WAAmB,KAA8B;AAClE,QAAM,aAAsC,CAAC;AAE7C,WAAS,IAAI,GAAG,IAAI,GAAG,KAAK,GAAG;AAC7B,eAAW,KAAK;AAAA,MACd,MAAM,QAAQ,SAAS,SAAS,CAAC;AAAA,MACjC,WAAW,OAAO,IAAI;AAAA,MACtB,YAAY;AAAA,MACZ,UAAU,EAAE,YAAY,GAAG,SAAS,GAAG;AAAA,IACzC,CAAC;AAAA,EACH;AAEA,WAAS,IAAI,GAAG,IAAI,GAAG,KAAK,GAAG;AAC7B,eAAW,KAAK;AAAA,MACd,MAAM,QAAQ,SAAS,eAAe,CAAC;AAAA,MACvC,WAAW,MAAM,IAAI;AAAA,MACrB,YAAY;AAAA,IACd,CAAC;AAAA,EACH;AAGA,WAAS,IAAI,GAAG,IAAI,GAAG,KAAK,GAAG;AAC7B,eAAW,KAAK;AAAA,MACd,MAAM,QAAQ,SAAS,SAAS,CAAC;AAAA,MACjC,WAAW,MAAM,IAAI;AAAA,MACrB,YAAY;AAAA,MACZ,UAAU,EAAE,YAAY,IAAI,SAAS,EAAE;AAAA,IACzC,CAAC;AAAA,EACH;AAEA,WAAS,IAAI,GAAG,IAAI,IAAI,KAAK,GAAG;AAC9B,eAAW,KAAK;AAAA,MACd,MAAM,QAAQ,SAAS,cAAc,CAAC;AAAA,MACtC,WAAW,IAAI,IAAI;AAAA,MACnB,YAAY;AAAA,IACd,CAAC;AAAA,EACH;AAEA,WAAS,IAAI,WAAW,SAAS,GAAG,IAAI,GAAG,KAAK,GAAG;AACjD,UAAM,IAAI,KAAK,MAAM,IAAI,KAAK,IAAI,EAAE;AACpC,KAAC,WAAW,CAAC,GAAG,WAAW,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAI,WAAW,CAAC,CAAE;AAAA,EAClE;AACA,SAAO,EAAE,IAAI,QAAQ,SAAS,IAAI,YAAY,GAAG,EAAE;AACrD;AAEA,SAAS,oBACP,cACA,UACA,GACQ;AACR,QAAM,OAAO,aAAa,MAAM,GAAG,CAAC;AACpC,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,MAAI,OAAO;AACX,aAAW,KAAK,KAAM,KAAI,SAAS,IAAI,CAAC,EAAG,SAAQ;AACnD,SAAO,OAAO,KAAK,IAAI,GAAG,KAAK,MAAM;AACvC;AAmBO,SAAS,oBAAoB,SAIT;AACzB,QAAM,iBAAiB,SAAS,SAAS;AAOzC,MACE,CAAC,OAAO,SAAS,cAAc,KAC/B,CAAC,OAAO,UAAU,cAAc,KAChC,iBAAiB,GACjB;AACA,UAAM,IAAI;AAAA,MACR,8DAA8D,cAAc;AAAA,IAC9E;AAAA,EACF;AACA,QAAM,WAAW;AACjB,QAAM,MAAM,WAAW,SAAS,QAAQ,UAAU;AAClD,QAAM,MAAM,SAAS,OAAO,oBAAI,KAAK,0BAA0B;AAE/D,MAAI,SAAS;AACb,MAAI,QAAQ;AACZ,MAAI,eAAe;AAEnB,WAAS,IAAI,GAAG,IAAI,UAAU,KAAK,GAAG;AACpC,UAAM,IAAI,UAAU,GAAG,GAAG;AAC1B,UAAM,WAAW,IAAI;AAAA,MACnB,EAAE,WAAW,OAAO,CAAC,MAAM,EAAE,UAAU,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI;AAAA,IAC5D;AAGA,UAAM,MAAM,CAAC,GAAG,EAAE,UAAU,EACzB,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS,EACxC,IAAI,CAAC,MAAM,EAAE,IAAI;AACpB,UAAM,OAAO,oBAAoB,KAAK,UAAU,EAAE,CAAC;AAGnD,UAAM,WAAW,oBAAI,IAAiC;AACtD,eAAW,QAAQ,EAAE,YAAY;AAC/B,UAAI,KAAK,SAAU,UAAS,IAAI,KAAK,MAAM,KAAK,QAAQ;AAAA,IAC1D;AACA,UAAM,WAAW;AAAA,MACf,EAAE,WAAW,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,OAAO,EAAE,UAAU,EAAE;AAAA,MAC9D,EAAE,UAAU,IAAI;AAAA,IAClB;AACA,UAAM,KAAK,SAAS,IAAI,CAAC,MAAM,EAAE,IAAI;AACrC,UAAM,MAAM,oBAAoB,IAAI,UAAU,EAAE,CAAC;AAEjD,cAAU;AACV,aAAS;AACT,QAAI,OAAO,KAAM,iBAAgB;AAAA,EACnC;AAEA,QAAM,SAAS,SAAS;AACxB,QAAM,QAAQ,QAAQ;AACtB,SAAO;AAAA,IACL,OAAO;AAAA,IACP,GAAG;AAAA,IACH,kBAAkB;AAAA,IAClB,iBAAiB;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,kBAAkB,iBAAiB;AAAA,EACrC;AACF;AAOO,SAAS,yBAA+B;AAC7C,QAAM,SAAS,oBAAoB;AAEnC,UAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAC3C,MAAI,CAAC,OAAO,kBAAkB;AAE5B,YAAQ,MAAM,4EAA4E;AAC1F,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;AAQA,IAAI,QAAQ,KAAK,CAAC,GAAG;AACnB,MAAI;AAGF,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,KAAU;AACjD,QAAI,YAAY,QAAQ,cAAc,QAAQ,KAAK,CAAC,CAAC,EAAE,MAAM;AAC3D,6BAAuB;AAAA,IACzB;AAAA,EACF,QAAQ;AAKN,QAAI,YAAY,QAAQ,UAAU,QAAQ,KAAK,CAAC,CAAC,IAAI;AACnD,6BAAuB;AAAA,IACzB;AAAA,EACF;AACF;","names":[]}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { MemoryWorthResult } from './memory-worth.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Issue #560 PR 4 — Memory Worth recall filter.
|
|
5
|
+
*
|
|
6
|
+
* Pure helper that multiplies candidate recall scores by a Memory Worth
|
|
7
|
+
* factor (from `computeMemoryWorth`, PR 2) so memories with a history of
|
|
8
|
+
* failed sessions sink in the ranking. Reading the per-memory counters is
|
|
9
|
+
* the caller's job — this module does no I/O and depends only on PR 2's
|
|
10
|
+
* pure scorer.
|
|
11
|
+
*
|
|
12
|
+
* The filter is feature-flagged (`recallMemoryWorthFilterEnabled` on
|
|
13
|
+
* PluginConfig, default `false` in this PR) so operators can A/B it safely
|
|
14
|
+
* before PR 5 flips the default.
|
|
15
|
+
*
|
|
16
|
+
* Intentional properties:
|
|
17
|
+
* - Pure function. No side effects, no I/O. Tested directly.
|
|
18
|
+
* - Candidates with no counters (empty `counters` map entry) score exactly
|
|
19
|
+
* the same as they did pre-filter (multiplier = 0.5, but we renormalize
|
|
20
|
+
* so the neutral prior stays neutral — see the "neutral prior preserves
|
|
21
|
+
* ranking among unseen memories" test below).
|
|
22
|
+
* - Stable: a strictly sorted input stays in its original order among
|
|
23
|
+
* items that all score to the prior. This matters because an ad-hoc
|
|
24
|
+
* sort that returns 0 for ties on some comparators but not others
|
|
25
|
+
* produces non-deterministic ordering (CLAUDE.md rule 19).
|
|
26
|
+
* - Does not mutate inputs — returns a new array.
|
|
27
|
+
*
|
|
28
|
+
* How the multiplier is applied:
|
|
29
|
+
* `new_score = old_score * (p_success / PRIOR)`
|
|
30
|
+
* where PRIOR = 0.5. This way an uninstrumented memory (p_success = 0.5)
|
|
31
|
+
* gets a multiplier of exactly 1.0 (no penalty, no boost), a memory that
|
|
32
|
+
* always succeeds gets a multiplier approaching 2.0 (boosted) and a
|
|
33
|
+
* memory that always fails gets a multiplier approaching 0.0 (sunk).
|
|
34
|
+
* Using the ratio instead of raw `p_success` keeps the filter from
|
|
35
|
+
* accidentally halving every un-instrumented memory the moment it ships.
|
|
36
|
+
*
|
|
37
|
+
* Out of scope:
|
|
38
|
+
* - Reading counters from storage (caller does that once per recall).
|
|
39
|
+
* - Orchestrator wiring / config plumbing (separate commit in this PR).
|
|
40
|
+
* - Default flip to `true` (PR 5 once benchmark confirms a win).
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* One memory's outcome history, keyed by memory path so the filter can look
|
|
45
|
+
* up a candidate's counters in O(1). `lastAccessed` is passed through to
|
|
46
|
+
* `computeMemoryWorth` where it drives optional recency decay.
|
|
47
|
+
*/
|
|
48
|
+
interface MemoryWorthCounters {
|
|
49
|
+
mw_success?: number;
|
|
50
|
+
mw_fail?: number;
|
|
51
|
+
lastAccessed?: string | null;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* A scored recall candidate. Defined locally (rather than importing
|
|
55
|
+
* `QmdSearchResult`) so the filter can be reused by any caller that has a
|
|
56
|
+
* `{ path, score }` shape — e.g. unit tests, bench fixtures, and future
|
|
57
|
+
* non-QMD retrieval backends.
|
|
58
|
+
*/
|
|
59
|
+
interface MemoryWorthFilterCandidate {
|
|
60
|
+
path: string;
|
|
61
|
+
score: number;
|
|
62
|
+
}
|
|
63
|
+
interface MemoryWorthFilterOptions {
|
|
64
|
+
/**
|
|
65
|
+
* Map from memory path → outcome counters. Candidates whose path is not
|
|
66
|
+
* in this map score at the neutral prior (multiplier = 1.0).
|
|
67
|
+
*/
|
|
68
|
+
counters: ReadonlyMap<string, MemoryWorthCounters>;
|
|
69
|
+
/**
|
|
70
|
+
* Current time reference — passed through to `computeMemoryWorth` for
|
|
71
|
+
* decay math. Required (not defaulted) so tests and deterministic bench
|
|
72
|
+
* runs don't depend on the wall clock.
|
|
73
|
+
*/
|
|
74
|
+
now: Date;
|
|
75
|
+
/**
|
|
76
|
+
* Half-life for outcome decay, in milliseconds. Optional; when omitted,
|
|
77
|
+
* decay is disabled and raw counters are used.
|
|
78
|
+
*/
|
|
79
|
+
halfLifeMs?: number;
|
|
80
|
+
/**
|
|
81
|
+
* Re-sort the candidates by descending filtered score before returning.
|
|
82
|
+
* When `false`, the original input order is preserved (but the `.score`
|
|
83
|
+
* fields still reflect the multiplier). Default `true` because most
|
|
84
|
+
* callers want a ranked result; a few tests / bench fixtures want the
|
|
85
|
+
* order preserved so they can assert on position.
|
|
86
|
+
*/
|
|
87
|
+
reorder?: boolean;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Output of `applyMemoryWorthFilter`. `worth` surfaces the computed
|
|
91
|
+
* `{ score, p_success, confidence }` for each candidate so observability /
|
|
92
|
+
* xray layers can report why each item moved without re-deriving it.
|
|
93
|
+
*/
|
|
94
|
+
interface MemoryWorthFilterResultItem {
|
|
95
|
+
path: string;
|
|
96
|
+
/** Final score after the Memory Worth multiplier is applied. */
|
|
97
|
+
score: number;
|
|
98
|
+
/** The untouched input score — useful for telemetry and xray. */
|
|
99
|
+
originalScore: number;
|
|
100
|
+
/** The multiplier that was applied (1.0 for uninstrumented memories). */
|
|
101
|
+
multiplier: number;
|
|
102
|
+
/** The Memory Worth result (`score`, `p_success`, `confidence`). */
|
|
103
|
+
worth: MemoryWorthResult;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Apply the Memory Worth multiplier to each candidate's score and (by
|
|
107
|
+
* default) re-sort the list by descending score.
|
|
108
|
+
*
|
|
109
|
+
* When `counters` is empty, every candidate gets a multiplier of 1.0 — the
|
|
110
|
+
* function is safe to call unconditionally when the feature flag is on,
|
|
111
|
+
* even for namespaces that have zero instrumented memories.
|
|
112
|
+
*/
|
|
113
|
+
declare function applyMemoryWorthFilter(candidates: readonly MemoryWorthFilterCandidate[], options: MemoryWorthFilterOptions): MemoryWorthFilterResultItem[];
|
|
114
|
+
/**
|
|
115
|
+
* Convenience lookup helper for callers that already have an array of
|
|
116
|
+
* memory files with `path` and frontmatter fields on each. Keeps the map
|
|
117
|
+
* construction in one place so call sites don't drift.
|
|
118
|
+
*/
|
|
119
|
+
declare function buildMemoryWorthCounterMap(memories: readonly {
|
|
120
|
+
path: string;
|
|
121
|
+
frontmatter: {
|
|
122
|
+
mw_success?: number;
|
|
123
|
+
mw_fail?: number;
|
|
124
|
+
lastAccessed?: string | null;
|
|
125
|
+
};
|
|
126
|
+
}[]): Map<string, MemoryWorthCounters>;
|
|
127
|
+
|
|
128
|
+
export { type MemoryWorthCounters, type MemoryWorthFilterCandidate, type MemoryWorthFilterOptions, type MemoryWorthFilterResultItem, applyMemoryWorthFilter, buildMemoryWorthCounterMap };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { StorageManager } from './storage.js';
|
|
2
|
+
import { MemoryFrontmatter } from './types.js';
|
|
3
|
+
import './page-versioning.js';
|
|
4
|
+
import './consolidation-operator.js';
|
|
5
|
+
import './memory-projection-store-DeSXPh1j.js';
|
|
6
|
+
import 'better-sqlite3';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Issue #560 PR 3 — Memory Worth outcome signal pipeline.
|
|
10
|
+
*
|
|
11
|
+
* PR 1 added `mw_success` / `mw_fail` fields to MemoryFrontmatter. PR 2 added
|
|
12
|
+
* a pure scoring helper. This module adds the one piece tying the two
|
|
13
|
+
* together: a way for callers to record a single outcome observation against
|
|
14
|
+
* a memory, which increments the appropriate counter in frontmatter.
|
|
15
|
+
*
|
|
16
|
+
* The public entry point is `recordMemoryOutcome({ memoryPath, outcome, ... })`.
|
|
17
|
+
* Callers pass the full path to the memory file (not just the ID) because in
|
|
18
|
+
* the usual outcome source — the observation ledger — the memory path is
|
|
19
|
+
* already captured in the event payload, and path-based lookup avoids a
|
|
20
|
+
* full-corpus scan.
|
|
21
|
+
*
|
|
22
|
+
* Intentional properties:
|
|
23
|
+
* - Works on a per-memory basis (no bulk API in this slice). Bulk update is
|
|
24
|
+
* an easy layer on top of this once a second caller needs it.
|
|
25
|
+
* - Reuses the existing `updateMemoryFrontmatter(id, patch)` write path so
|
|
26
|
+
* unrelated fields (confidence, importance, lifecycle hooks, etc.) are
|
|
27
|
+
* preserved. The PR 1 serializer rejects negative / non-integer counters,
|
|
28
|
+
* so we rely on that for defensive validation rather than duplicating it.
|
|
29
|
+
* - Only instruments categories in `MEMORY_WORTH_OUTCOME_ELIGIBLE_CATEGORIES`
|
|
30
|
+
* (currently `fact`, matching `MEMORY_WORTH_ELIGIBLE_CATEGORIES` in
|
|
31
|
+
* operator-toolkit.ts for the doctor audit). Non-eligible memories return
|
|
32
|
+
* `{ ok: false, reason: "ineligible_category" }` rather than throwing so
|
|
33
|
+
* the caller — typically a ledger consumer draining heterogeneous events
|
|
34
|
+
* — doesn't need to pre-filter by category.
|
|
35
|
+
* - Missing / unknown memory IDs return `{ ok: false, reason: "not_found" }`
|
|
36
|
+
* rather than throwing, because outcome events may reference memories
|
|
37
|
+
* that were archived/deleted between the session and the ledger drain.
|
|
38
|
+
* That isn't an operator-actionable error.
|
|
39
|
+
* - On success, returns the new counter values so observability surfaces
|
|
40
|
+
* can report the increment without a second read.
|
|
41
|
+
*
|
|
42
|
+
* Out of scope (later PRs):
|
|
43
|
+
* - Recall filter reading the counters (PR 4).
|
|
44
|
+
* - Benchmark + default flip (PR 5).
|
|
45
|
+
* - Automatic increments from extraction or summarization. Only the
|
|
46
|
+
* explicit `MEM_OUTCOME` ledger tag or an MCP tool call drives writes.
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Exported so downstream tests / operators can query the allowlist without
|
|
51
|
+
* re-declaring it. Returned as a frozen copy so consumers cannot mutate the
|
|
52
|
+
* module-internal set.
|
|
53
|
+
*/
|
|
54
|
+
declare function memoryWorthOutcomeEligibleCategories(): ReadonlySet<MemoryFrontmatter["category"]>;
|
|
55
|
+
/**
|
|
56
|
+
* The direction of an outcome — whether the session that consumed this
|
|
57
|
+
* memory succeeded or failed. Restricted to a string literal union so
|
|
58
|
+
* callers in TypeScript land can't pass arbitrary tags.
|
|
59
|
+
*/
|
|
60
|
+
type MemoryOutcomeKind = "success" | "failure";
|
|
61
|
+
/**
|
|
62
|
+
* Arguments to `recordMemoryOutcome`.
|
|
63
|
+
*
|
|
64
|
+
* `memoryPath` is the filesystem path to the memory; we derive the ID from
|
|
65
|
+
* the basename, matching how the operator-toolkit and recall layers map
|
|
66
|
+
* paths to IDs.
|
|
67
|
+
*/
|
|
68
|
+
interface RecordMemoryOutcomeInput {
|
|
69
|
+
/**
|
|
70
|
+
* Absolute or repo-relative path to the memory file. Typically the value
|
|
71
|
+
* of `MemoryFile.path` for a memory returned by `readAllMemories`.
|
|
72
|
+
*/
|
|
73
|
+
memoryPath: string;
|
|
74
|
+
/** Outcome direction — "success" bumps mw_success; "failure" bumps mw_fail. */
|
|
75
|
+
outcome: MemoryOutcomeKind;
|
|
76
|
+
/**
|
|
77
|
+
* Optional observation timestamp for audit / telemetry. This PR doesn't
|
|
78
|
+
* persist the timestamp (PR 4/5 will use `lastAccessed`, which already
|
|
79
|
+
* covers the recency-decay requirement), but accepting it here keeps the
|
|
80
|
+
* call shape stable so future ledger integrations don't need a breaking
|
|
81
|
+
* change.
|
|
82
|
+
*/
|
|
83
|
+
timestamp?: Date | string;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Outcome of a `recordMemoryOutcome` call.
|
|
87
|
+
*
|
|
88
|
+
* `ok: true` means the counter was incremented and flushed. The returned
|
|
89
|
+
* values reflect the post-increment state, so callers can log
|
|
90
|
+
* `"fact-xyz: 4/1 → 5/1"` without re-reading.
|
|
91
|
+
*
|
|
92
|
+
* `ok: false` carries a short machine-readable `reason` so a ledger drainer
|
|
93
|
+
* can aggregate metrics ("how many events hit not_found this hour?"). The
|
|
94
|
+
* human-readable `message` is a friendlier version for logs.
|
|
95
|
+
*/
|
|
96
|
+
type RecordMemoryOutcomeResult = {
|
|
97
|
+
ok: true;
|
|
98
|
+
memoryId: string;
|
|
99
|
+
/** New value of `mw_success` after the increment. */
|
|
100
|
+
mw_success: number;
|
|
101
|
+
/** New value of `mw_fail` after the increment. */
|
|
102
|
+
mw_fail: number;
|
|
103
|
+
} | {
|
|
104
|
+
ok: false;
|
|
105
|
+
reason: "not_found" | "ineligible_category" | "invalid_outcome" | "invalid_path";
|
|
106
|
+
message: string;
|
|
107
|
+
};
|
|
108
|
+
/**
|
|
109
|
+
* Record a single outcome observation against a memory. Increments
|
|
110
|
+
* `mw_success` or `mw_fail` on the memory's frontmatter (preserving all
|
|
111
|
+
* other fields) via the existing `updateMemoryFrontmatter` write path.
|
|
112
|
+
*
|
|
113
|
+
* See the top-of-file doc comment for policy details (eligible categories,
|
|
114
|
+
* error semantics, and what is intentionally out of scope).
|
|
115
|
+
*/
|
|
116
|
+
declare function recordMemoryOutcome(storage: StorageManager, input: RecordMemoryOutcomeInput): Promise<RecordMemoryOutcomeResult>;
|
|
117
|
+
|
|
118
|
+
export { type MemoryOutcomeKind, type RecordMemoryOutcomeInput, type RecordMemoryOutcomeResult, memoryWorthOutcomeEligibleCategories, recordMemoryOutcome };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Issue #560 PR 2 — Memory Worth scoring (pure helper).
|
|
3
|
+
*
|
|
4
|
+
* Given per-memory outcome counters (`mw_success`, `mw_fail` — added to
|
|
5
|
+
* frontmatter in PR 1), compute a scalar worth score plus interpretable
|
|
6
|
+
* metadata. The score is a Laplace-smoothed success probability with an
|
|
7
|
+
* optional recency decay, and is meant to be used as a multiplier on existing
|
|
8
|
+
* recall scores (PR 4) to sink memories that consistently lead to failed
|
|
9
|
+
* sessions and keep uninstrumented memories at a neutral baseline.
|
|
10
|
+
*
|
|
11
|
+
* Intentional properties:
|
|
12
|
+
* - Pure function. No I/O, no time-of-import side effects. Testable in
|
|
13
|
+
* isolation; callers pass `now` so tests don't depend on the wall clock.
|
|
14
|
+
* - Laplace-smoothed ratio `(s + 1) / (s + f + 2)` ensures a memory with
|
|
15
|
+
* zero observations scores exactly 0.5 — neither boosted nor penalized.
|
|
16
|
+
* A single failure on a new memory lands at 1/3, not 0, so one bad
|
|
17
|
+
* session doesn't permanently exile a fact.
|
|
18
|
+
* - Recency decay is optional. When a memory hasn't been touched in a long
|
|
19
|
+
* time, its `p_success` is pulled back toward 0.5 (the prior). Decay is
|
|
20
|
+
* exponential with an operator-configured half-life so old verdicts
|
|
21
|
+
* aren't treated as equally informative as fresh ones.
|
|
22
|
+
* - Corrupt / missing inputs fail safely to the prior. Callers upstream of
|
|
23
|
+
* this helper (see `storage.parseMemoryWorthCounterField` in PR 1) already
|
|
24
|
+
* strip negatives and non-integers, but the helper re-validates so it
|
|
25
|
+
* survives being called directly from tests / ad-hoc tooling.
|
|
26
|
+
* - Confidence is the effective number of observations (post-decay). PR 4
|
|
27
|
+
* and PR 5 use it to decide whether the Memory Worth multiplier should
|
|
28
|
+
* actually be applied vs. left at 1.0 (i.e., "not enough signal yet").
|
|
29
|
+
*
|
|
30
|
+
* Out of scope here:
|
|
31
|
+
* - Mutating frontmatter (PR 3).
|
|
32
|
+
* - Recall integration / feature flag (PR 4).
|
|
33
|
+
* - Benchmark & default-flip (PR 5).
|
|
34
|
+
*/
|
|
35
|
+
/**
|
|
36
|
+
* Input to `computeMemoryWorth`.
|
|
37
|
+
*
|
|
38
|
+
* All fields are optional so a legacy (pre-PR-1) memory can be passed through
|
|
39
|
+
* without upstream guards — it will simply score to the neutral prior.
|
|
40
|
+
*/
|
|
41
|
+
interface ComputeMemoryWorthInput {
|
|
42
|
+
/** Count of sessions where this memory was recalled and the outcome was success. */
|
|
43
|
+
mw_success?: number;
|
|
44
|
+
/** Count of sessions where this memory was recalled and the outcome was failure. */
|
|
45
|
+
mw_fail?: number;
|
|
46
|
+
/**
|
|
47
|
+
* ISO timestamp of the most recent outcome observation for this memory.
|
|
48
|
+
* When provided together with `halfLifeMs`, observations decay exponentially
|
|
49
|
+
* toward the uniform prior as they age. Absent / unparseable timestamp →
|
|
50
|
+
* decay is skipped and raw counters are used directly.
|
|
51
|
+
*/
|
|
52
|
+
lastAccessed?: string | null;
|
|
53
|
+
/**
|
|
54
|
+
* Current wall-clock reference. Required in the signature (not defaulted to
|
|
55
|
+
* `Date.now()`) so the function stays pure and tests are deterministic.
|
|
56
|
+
*/
|
|
57
|
+
now: Date;
|
|
58
|
+
/**
|
|
59
|
+
* Half-life for outcome decay, in milliseconds. When `undefined` or `<= 0`,
|
|
60
|
+
* no decay is applied (raw counts are used). When positive, counter weights
|
|
61
|
+
* are multiplied by `2^(-age / halfLifeMs)`.
|
|
62
|
+
*/
|
|
63
|
+
halfLifeMs?: number;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Output of `computeMemoryWorth`.
|
|
67
|
+
*
|
|
68
|
+
* `score` is the value recall callers multiply into their base score.
|
|
69
|
+
* `p_success` is the same number pre-clamped — exposed separately so
|
|
70
|
+
* observability surfaces can log the probability distinctly from the
|
|
71
|
+
* multiplier. `confidence` is the effective observation count after decay,
|
|
72
|
+
* useful for UIs that want to render "strong signal" vs. "tentative".
|
|
73
|
+
*/
|
|
74
|
+
interface MemoryWorthResult {
|
|
75
|
+
/**
|
|
76
|
+
* The Laplace-smoothed success probability, post-decay, clamped to
|
|
77
|
+
* `[0, 1]`. This is the multiplier PR 4 applies to the base recall score.
|
|
78
|
+
*/
|
|
79
|
+
score: number;
|
|
80
|
+
/**
|
|
81
|
+
* Same as `score` conceptually, surfaced separately so telemetry /
|
|
82
|
+
* xray surfaces can report probability independently of whatever final
|
|
83
|
+
* multiplier PR 4 chooses to apply.
|
|
84
|
+
*/
|
|
85
|
+
p_success: number;
|
|
86
|
+
/**
|
|
87
|
+
* Effective observation count (`s_eff + f_eff`). With decay enabled this is
|
|
88
|
+
* fractional; without decay it equals `mw_success + mw_fail` exactly.
|
|
89
|
+
* Zero indicates no signal — callers should treat the score as a prior.
|
|
90
|
+
*/
|
|
91
|
+
confidence: number;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Score a single memory's worth based on outcome history.
|
|
95
|
+
*
|
|
96
|
+
* Returns the neutral prior (`0.5`, `confidence=0`) for uninstrumented
|
|
97
|
+
* memories so the caller can treat "no data" and "data says 50/50"
|
|
98
|
+
* identically — neither should be penalized.
|
|
99
|
+
*/
|
|
100
|
+
declare function computeMemoryWorth(input: ComputeMemoryWorthInput): MemoryWorthResult;
|
|
101
|
+
|
|
102
|
+
export { type ComputeMemoryWorthInput, type MemoryWorthResult, computeMemoryWorth };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|