@remnic/core 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1 -0
- package/dist/abort-error.d.ts +32 -0
- package/dist/abort-error.js +11 -0
- package/dist/access-cli.d.ts +13 -3
- package/dist/access-cli.js +96 -80
- package/dist/access-cli.js.map +1 -1
- package/dist/access-http.d.ts +12 -4
- package/dist/access-http.js +25 -18
- package/dist/access-mcp.d.ts +32 -4
- package/dist/access-mcp.js +16 -1
- package/dist/access-schema.d.ts +28 -28
- package/dist/access-schema.js +1 -1
- package/dist/access-service-HmO1Trrx.d.ts +732 -0
- package/dist/access-service.d.ts +15 -601
- package/dist/access-service.js +21 -15
- package/dist/active-memory-bridge.d.ts +66 -0
- package/dist/active-memory-bridge.js +11 -0
- package/dist/active-memory-bridge.js.map +1 -0
- package/dist/active-recall.d.ts +96 -0
- package/dist/active-recall.js +308 -0
- package/dist/active-recall.js.map +1 -0
- package/dist/behavior-learner.js +1 -1
- package/dist/bootstrap.d.ts +6 -3
- package/dist/bootstrap.js +2 -2
- package/dist/boxes.js +2 -2
- package/dist/briefing.d.ts +169 -0
- package/dist/briefing.js +52 -0
- package/dist/briefing.js.map +1 -0
- package/dist/buffer.d.ts +19 -5
- package/dist/buffer.js +2 -2
- package/dist/calibration.js +6 -6
- package/dist/causal-behavior.js +5 -5
- package/dist/causal-chain.js +3 -3
- package/dist/causal-consolidation.d.ts +22 -2
- package/dist/causal-consolidation.js +36 -9
- package/dist/causal-consolidation.js.map +1 -1
- package/dist/causal-retrieval.js +6 -6
- package/dist/causal-trajectory-graph.js +1 -1
- package/dist/causal-trajectory.d.ts +14 -1
- package/dist/causal-trajectory.js +5 -1
- package/dist/{chunk-KWBU5S5U.js → chunk-2ODBA7MQ.js} +9 -3
- package/dist/chunk-2ODBA7MQ.js.map +1 -0
- package/dist/{chunk-ZJLY4QSU.js → chunk-37UIFYWO.js} +130 -6
- package/dist/chunk-37UIFYWO.js.map +1 -0
- package/dist/chunk-3PG3H5TD.js +7 -0
- package/dist/chunk-3PG3H5TD.js.map +1 -0
- package/dist/{chunk-NTTLPF7F.js → chunk-3QFQGRHO.js} +5 -5
- package/dist/{chunk-QDOSNLB4.js → chunk-3QHL5ABG.js} +17 -15
- package/dist/chunk-3QHL5ABG.js.map +1 -0
- package/dist/{chunk-6UJQNRIO.js → chunk-3SV6CQHO.js} +92 -33
- package/dist/chunk-3SV6CQHO.js.map +1 -0
- package/dist/{chunk-U4PV25RD.js → chunk-3WHVNEN7.js} +1 -1
- package/dist/chunk-3WHVNEN7.js.map +1 -0
- package/dist/{chunk-XUHI52HK.js → chunk-44ICJRF3.js} +98 -10
- package/dist/chunk-44ICJRF3.js.map +1 -0
- package/dist/{chunk-HG2NKWR2.js → chunk-47UU5PU2.js} +49 -10
- package/dist/chunk-47UU5PU2.js.map +1 -0
- package/dist/chunk-4DJQYKMN.js +187 -0
- package/dist/chunk-4DJQYKMN.js.map +1 -0
- package/dist/chunk-4KAN3GZ3.js +225 -0
- package/dist/chunk-4KAN3GZ3.js.map +1 -0
- package/dist/chunk-4LACOVZX.js +813 -0
- package/dist/chunk-4LACOVZX.js.map +1 -0
- package/dist/{chunk-ORZMT74A.js → chunk-4NRAJUDS.js} +11 -1
- package/dist/chunk-4NRAJUDS.js.map +1 -0
- package/dist/{chunk-B7LOFDVE.js → chunk-4WMCPJWX.js} +8 -3
- package/dist/chunk-4WMCPJWX.js.map +1 -0
- package/dist/{chunk-G3AG3KZN.js → chunk-5IZL4DCV.js} +2 -2
- package/dist/{chunk-BRK4ODMI.js → chunk-5NPGSAVB.js} +2 -2
- package/dist/{chunk-QANCTXQF.js → chunk-6LX5ORAS.js} +3 -3
- package/dist/chunk-6MKAMLQL.js +16 -0
- package/dist/chunk-6MKAMLQL.js.map +1 -0
- package/dist/{chunk-ESSMF2FR.js → chunk-6PFRXT4K.js} +15 -6
- package/dist/chunk-6PFRXT4K.js.map +1 -0
- package/dist/{chunk-UIYZ5T3I.js → chunk-6UJ47TVX.js} +8 -8
- package/dist/chunk-6ZH4TU6I.js +245 -0
- package/dist/chunk-6ZH4TU6I.js.map +1 -0
- package/dist/{chunk-L5RPWGFK.js → chunk-7DHTMOND.js} +2 -2
- package/dist/{chunk-L7WO3MZ4.js → chunk-7ECD5ATE.js} +2 -2
- package/dist/{chunk-Q6FETXJA.js → chunk-7SEAZFFB.js} +2 -2
- package/dist/{chunk-V4YC4LUK.js → chunk-7WQ6SLIE.js} +175 -63
- package/dist/chunk-7WQ6SLIE.js.map +1 -0
- package/dist/chunk-ALXMCZEU.js +332 -0
- package/dist/chunk-ALXMCZEU.js.map +1 -0
- package/dist/{chunk-TVVVQQAK.js → chunk-BLKTA7MM.js} +58 -24
- package/dist/chunk-BLKTA7MM.js.map +1 -0
- package/dist/{chunk-SCHEKPYH.js → chunk-C2EFFULQ.js} +1 -1
- package/dist/{chunk-GJR6D6KC.js → chunk-D654IBA6.js} +2 -2
- package/dist/{chunk-OTFNI3OO.js → chunk-DEPL3635.js} +1828 -401
- package/dist/chunk-DEPL3635.js.map +1 -0
- package/dist/{chunk-UYSKNO6E.js → chunk-DHHP2Z4X.js} +15 -4
- package/dist/chunk-DHHP2Z4X.js.map +1 -0
- package/dist/{chunk-UV2FO7J4.js → chunk-E6K4NIEU.js} +2 -2
- package/dist/{chunk-T4WRIV2C.js → chunk-EABGC2TL.js} +2 -2
- package/dist/chunk-EJI5XIBB.js +232 -0
- package/dist/chunk-EJI5XIBB.js.map +1 -0
- package/dist/{chunk-ONRU4L2N.js → chunk-FEMOX5AD.js} +2 -2
- package/dist/{chunk-IFFFR3MR.js → chunk-FSFEQI74.js} +3 -3
- package/dist/chunk-G4SK7DSQ.js +121 -0
- package/dist/chunk-G4SK7DSQ.js.map +1 -0
- package/dist/{chunk-WWIQTB2Y.js → chunk-GGD5W7TB.js} +9 -2
- package/dist/chunk-GGD5W7TB.js.map +1 -0
- package/dist/{chunk-QWUUMMIK.js → chunk-GV6NLQ4X.js} +1355 -80
- package/dist/chunk-GV6NLQ4X.js.map +1 -0
- package/dist/{chunk-2PO5ZRKV.js → chunk-GZCUW5IC.js} +16 -3
- package/dist/chunk-GZCUW5IC.js.map +1 -0
- package/dist/{chunk-AAI7JARD.js → chunk-HMDCOMYU.js} +8 -11
- package/dist/chunk-HMDCOMYU.js.map +1 -0
- package/dist/chunk-IQT3XTKW.js +121 -0
- package/dist/chunk-IQT3XTKW.js.map +1 -0
- package/dist/{chunk-J3BT33K7.js → chunk-ITRLGI2T.js} +5 -5
- package/dist/{chunk-BDFZXRSO.js → chunk-J4IYOZZ5.js} +15 -2
- package/dist/chunk-J4IYOZZ5.js.map +1 -0
- package/dist/{chunk-J47FNDR7.js → chunk-JIU55F3X.js} +7 -7
- package/dist/{chunk-MDDAA2AO.js → chunk-JL2PU6AI.js} +17 -6
- package/dist/chunk-JL2PU6AI.js.map +1 -0
- package/dist/{chunk-ZKYI7UVO.js → chunk-JR4ZC3G4.js} +2 -2
- package/dist/{chunk-UCYSTFZR.js → chunk-JRNQ3RNA.js} +2 -2
- package/dist/{chunk-GPGBSNKM.js → chunk-K4FLSOR5.js} +2 -2
- package/dist/chunk-KVE7R4CG.js +320 -0
- package/dist/chunk-KVE7R4CG.js.map +1 -0
- package/dist/chunk-LAYN4LDC.js +267 -0
- package/dist/chunk-LAYN4LDC.js.map +1 -0
- package/dist/{chunk-ISY75RLM.js → chunk-MBJHSA7F.js} +344 -7
- package/dist/chunk-MBJHSA7F.js.map +1 -0
- package/dist/{chunk-PGK3VUHN.js → chunk-MTLYEMJB.js} +3 -2
- package/dist/chunk-MTLYEMJB.js.map +1 -0
- package/dist/{chunk-QY2BHY5O.js → chunk-MVTHXUBX.js} +297 -34
- package/dist/chunk-MVTHXUBX.js.map +1 -0
- package/dist/{chunk-LP47L3ZX.js → chunk-N42IWANG.js} +5 -5
- package/dist/{chunk-YNI4S5WT.js → chunk-N53K2EXC.js} +2 -2
- package/dist/{chunk-763GUIOU.js → chunk-NBNN5GOB.js} +2 -2
- package/dist/{chunk-CXWFUJR2.js → chunk-NQEVYWX6.js} +195 -5
- package/dist/chunk-NQEVYWX6.js.map +1 -0
- package/dist/{chunk-KL4CP4SB.js → chunk-O5ETUNBT.js} +17 -5
- package/dist/chunk-O5ETUNBT.js.map +1 -0
- package/dist/{chunk-OOSWAUYB.js → chunk-ODWDQNRE.js} +2 -2
- package/dist/chunk-OIT5QGG4.js +80 -0
- package/dist/chunk-OIT5QGG4.js.map +1 -0
- package/dist/{chunk-HLBYLYRD.js → chunk-PAORGQRI.js} +70 -13
- package/dist/chunk-PAORGQRI.js.map +1 -0
- package/dist/chunk-PVGDJXVK.js +21 -0
- package/dist/chunk-PVGDJXVK.js.map +1 -0
- package/dist/{chunk-OTAVQCSF.js → chunk-PYXS46O7.js} +2 -2
- package/dist/chunk-QDW3E4RD.js +108 -0
- package/dist/chunk-QDW3E4RD.js.map +1 -0
- package/dist/{chunk-YNCQ7E4M.js → chunk-QDYXG4CS.js} +4 -3
- package/dist/chunk-QDYXG4CS.js.map +1 -0
- package/dist/{chunk-HLXVTBF3.js → chunk-QNJMBKFK.js} +3 -2
- package/dist/chunk-QNJMBKFK.js.map +1 -0
- package/dist/{chunk-4A24LIM2.js → chunk-S75M5ZRK.js} +2 -2
- package/dist/chunk-SYUK3VLY.js +789 -0
- package/dist/chunk-SYUK3VLY.js.map +1 -0
- package/dist/{chunk-QCCCQT3O.js → chunk-TBBDFYXW.js} +2 -2
- package/dist/chunk-TBBDFYXW.js.map +1 -0
- package/dist/chunk-U66YHYC7.js +31 -0
- package/dist/chunk-U66YHYC7.js.map +1 -0
- package/dist/{chunk-MWGVGUIS.js → chunk-UEYA6UC7.js} +36 -4
- package/dist/chunk-UEYA6UC7.js.map +1 -0
- package/dist/{chunk-M5KEYE5E.js → chunk-URB2WSKZ.js} +2 -2
- package/dist/chunk-UVJFDP7P.js +202 -0
- package/dist/chunk-UVJFDP7P.js.map +1 -0
- package/dist/chunk-W6SL7OFG.js +180 -0
- package/dist/chunk-W6SL7OFG.js.map +1 -0
- package/dist/chunk-WBSAYXVI.js +7945 -0
- package/dist/chunk-WBSAYXVI.js.map +1 -0
- package/dist/{chunk-M5ZBBBJI.js → chunk-XZ2TIKGC.js} +39 -9
- package/dist/chunk-XZ2TIKGC.js.map +1 -0
- package/dist/chunk-Y4FHOFJ2.js +140 -0
- package/dist/chunk-Y4FHOFJ2.js.map +1 -0
- package/dist/chunk-YDBIWGNI.js +298 -0
- package/dist/chunk-YDBIWGNI.js.map +1 -0
- package/dist/chunk-YNB73F22.js +137 -0
- package/dist/chunk-YNB73F22.js.map +1 -0
- package/dist/{chunk-IZME7KW2.js → chunk-ZVBB3T7V.js} +31 -12
- package/dist/chunk-ZVBB3T7V.js.map +1 -0
- package/dist/chunking.js +1 -1
- package/dist/citations.d.ts +67 -0
- package/dist/citations.js +13 -0
- package/dist/citations.js.map +1 -0
- package/dist/cli-BneVIEvh.d.ts +1240 -0
- package/dist/cli.d.ts +32 -1147
- package/dist/cli.js +150 -7092
- package/dist/cli.js.map +1 -1
- package/dist/codex-materialize-CQlLTzke.d.ts +139 -0
- package/dist/codex-thread-key.d.ts +3 -0
- package/dist/codex-thread-key.js +7 -0
- package/dist/codex-thread-key.js.map +1 -0
- package/dist/config.js +3 -2
- package/dist/connectors/codex/instructions.md +160 -0
- package/dist/connectors/codex/resources/namespace-cheatsheet.md +48 -0
- package/dist/contradiction-review-WIUBAR52.js +21 -0
- package/dist/contradiction-review-WIUBAR52.js.map +1 -0
- package/dist/contradiction-scan-GR33PONM.js +376 -0
- package/dist/contradiction-scan-GR33PONM.js.map +1 -0
- package/dist/day-summary.d.ts +7 -2
- package/dist/day-summary.js +5 -2
- package/dist/direct-answer-wiring.d.ts +77 -0
- package/dist/direct-answer-wiring.js +75 -0
- package/dist/direct-answer-wiring.js.map +1 -0
- package/dist/direct-answer.d.ts +106 -0
- package/dist/direct-answer.js +10 -0
- package/dist/direct-answer.js.map +1 -0
- package/dist/embedding-fallback.d.ts +96 -2
- package/dist/embedding-fallback.js +6 -4
- package/dist/{engine-2A6J4XEX.js → engine-5TIQBYZR.js} +10 -7
- package/dist/engine-5TIQBYZR.js.map +1 -0
- package/dist/entity-retrieval.d.ts +3 -2
- package/dist/entity-retrieval.js +10 -7
- package/dist/entity-schema.d.ts +11 -0
- package/dist/entity-schema.js +19 -0
- package/dist/entity-schema.js.map +1 -0
- package/dist/explicit-capture.d.ts +6 -3
- package/dist/explicit-capture.js +2 -2
- package/dist/extraction-judge.d.ts +66 -0
- package/dist/extraction-judge.js +18 -0
- package/dist/extraction-judge.js.map +1 -0
- package/dist/extraction.d.ts +1 -0
- package/dist/extraction.js +12 -10
- package/dist/fallback-llm.d.ts +11 -2
- package/dist/fallback-llm.js +4 -4
- package/dist/graph.js +1 -1
- package/dist/harmonic-retrieval.js +2 -1
- package/dist/importance.d.ts +11 -1
- package/dist/importance.js +3 -1
- package/dist/index.d.ts +1027 -9
- package/dist/index.js +3303 -349
- package/dist/index.js.map +1 -1
- package/dist/intent.d.ts +2 -1
- package/dist/intent.js +3 -1
- package/dist/lifecycle.js +1 -1
- package/dist/local-llm.d.ts +10 -3
- package/dist/local-llm.js +2 -2
- package/dist/logger.d.ts +1 -1
- package/dist/logger.js +1 -1
- package/dist/memory-cache.d.ts +2 -2
- package/dist/memory-cache.js +1 -1
- package/dist/{memory-projection-store-NxMkbocT.d.ts → memory-projection-store-DeSXPh1j.d.ts} +1 -1
- package/dist/memory-projection-store.d.ts +1 -1
- package/dist/model-registry.js +2 -2
- package/dist/models-json.js +2 -2
- package/dist/native-knowledge.js +2 -2
- package/dist/negative.js +2 -2
- package/dist/operator-toolkit.js +20 -15
- package/dist/{orchestrator-zTa-Qo-1.d.ts → orchestrator-DRYA6_lW.d.ts} +273 -9
- package/dist/orchestrator.d.ts +6 -3
- package/dist/orchestrator.js +76 -63
- package/dist/page-versioning.d.ts +77 -0
- package/dist/page-versioning.js +15 -0
- package/dist/page-versioning.js.map +1 -0
- package/dist/plugin-id.d.ts +37 -0
- package/dist/plugin-id.js +11 -0
- package/dist/plugin-id.js.map +1 -0
- package/dist/policy-runtime.js +2 -2
- package/dist/profiling.js +2 -2
- package/dist/qmd.d.ts +5 -2
- package/dist/qmd.js +4 -3
- package/dist/recall-audit.d.ts +20 -0
- package/dist/recall-audit.js +50 -0
- package/dist/recall-audit.js.map +1 -0
- package/dist/recall-mmr.d.ts +152 -0
- package/dist/recall-mmr.js +17 -0
- package/dist/recall-mmr.js.map +1 -0
- package/dist/recall-qos.js +2 -2
- package/dist/recall-state.d.ts +28 -1
- package/dist/recall-state.js +2 -2
- package/dist/relevance.js +2 -2
- package/dist/resolution-QBTDHTG7.js +100 -0
- package/dist/resolution-QBTDHTG7.js.map +1 -0
- package/dist/resolve-provider-secret.d.ts +24 -1
- package/dist/resolve-provider-secret.js +4 -2
- package/dist/resume-bundles.js +6 -5
- package/dist/retrieval-agents.js +2 -2
- package/dist/retrieval.js +2 -2
- package/dist/schemas.d.ts +412 -54
- package/dist/schemas.js +3 -1
- package/dist/sdk-compat.d.ts +2 -0
- package/dist/sdk-compat.js +6 -3
- package/dist/sdk-compat.js.map +1 -1
- package/dist/semantic-chunking.d.ts +87 -0
- package/dist/semantic-chunking.js +20 -0
- package/dist/semantic-chunking.js.map +1 -0
- package/dist/semantic-consolidation-DrvSYRdB.d.ts +119 -0
- package/dist/semantic-consolidation.d.ts +4 -42
- package/dist/semantic-consolidation.js +23 -2
- package/dist/semantic-rule-promotion.js +9 -6
- package/dist/semantic-rule-verifier.js +10 -7
- package/dist/session-observer-state.js +2 -2
- package/dist/session-toggles.d.ts +22 -0
- package/dist/session-toggles.js +116 -0
- package/dist/session-toggles.js.map +1 -0
- package/dist/skills-registry.d.ts +47 -0
- package/dist/skills-registry.js +48 -0
- package/dist/skills-registry.js.map +1 -0
- package/dist/source-attribution.d.ts +169 -0
- package/dist/source-attribution.js +27 -0
- package/dist/source-attribution.js.map +1 -0
- package/dist/storage.d.ts +171 -10
- package/dist/storage.js +16 -5
- package/dist/summarizer.js +7 -7
- package/dist/temporal-supersession.d.ts +127 -0
- package/dist/temporal-supersession.js +20 -0
- package/dist/temporal-supersession.js.map +1 -0
- package/dist/threading.js +2 -2
- package/dist/tier-migration.d.ts +2 -1
- package/dist/tier-routing.js +2 -2
- package/dist/tokens.d.ts +21 -1
- package/dist/tokens.js +5 -1
- package/dist/transcript.js +2 -2
- package/dist/types-DJhqDJUV.d.ts +50 -0
- package/dist/types.d.ts +529 -3
- package/dist/types.js +1 -1
- package/dist/utility-learner.js +2 -2
- package/dist/utility-runtime.js +3 -3
- package/dist/verified-recall.js +11 -8
- package/dist/whitespace.d.ts +4 -0
- package/dist/whitespace.js +9 -0
- package/dist/whitespace.js.map +1 -0
- package/package.json +14 -8
- package/dist/chunk-2CJCWDMR.js +0 -87
- package/dist/chunk-2CJCWDMR.js.map +0 -1
- package/dist/chunk-2PO5ZRKV.js.map +0 -1
- package/dist/chunk-6UJQNRIO.js.map +0 -1
- package/dist/chunk-AAI7JARD.js.map +0 -1
- package/dist/chunk-B7LOFDVE.js.map +0 -1
- package/dist/chunk-BDFZXRSO.js.map +0 -1
- package/dist/chunk-CXWFUJR2.js.map +0 -1
- package/dist/chunk-DORBM6OB.js +0 -81
- package/dist/chunk-DORBM6OB.js.map +0 -1
- package/dist/chunk-ESSMF2FR.js.map +0 -1
- package/dist/chunk-HG2NKWR2.js.map +0 -1
- package/dist/chunk-HLBYLYRD.js.map +0 -1
- package/dist/chunk-HLXVTBF3.js.map +0 -1
- package/dist/chunk-ISY75RLM.js.map +0 -1
- package/dist/chunk-IZME7KW2.js.map +0 -1
- package/dist/chunk-KL4CP4SB.js.map +0 -1
- package/dist/chunk-KWBU5S5U.js.map +0 -1
- package/dist/chunk-M5ZBBBJI.js.map +0 -1
- package/dist/chunk-MDDAA2AO.js.map +0 -1
- package/dist/chunk-MWGVGUIS.js.map +0 -1
- package/dist/chunk-ORZMT74A.js.map +0 -1
- package/dist/chunk-OTFNI3OO.js.map +0 -1
- package/dist/chunk-PGK3VUHN.js.map +0 -1
- package/dist/chunk-QCCCQT3O.js.map +0 -1
- package/dist/chunk-QDOSNLB4.js.map +0 -1
- package/dist/chunk-QPKFPHOO.js +0 -178
- package/dist/chunk-QPKFPHOO.js.map +0 -1
- package/dist/chunk-QWUUMMIK.js.map +0 -1
- package/dist/chunk-QY2BHY5O.js.map +0 -1
- package/dist/chunk-TVVVQQAK.js.map +0 -1
- package/dist/chunk-U4PV25RD.js.map +0 -1
- package/dist/chunk-UYSKNO6E.js.map +0 -1
- package/dist/chunk-V4YC4LUK.js.map +0 -1
- package/dist/chunk-WWIQTB2Y.js.map +0 -1
- package/dist/chunk-XUHI52HK.js.map +0 -1
- package/dist/chunk-YNCQ7E4M.js.map +0 -1
- package/dist/chunk-ZJLY4QSU.js.map +0 -1
- /package/dist/{engine-2A6J4XEX.js.map → abort-error.js.map} +0 -0
- /package/dist/{chunk-NTTLPF7F.js.map → chunk-3QFQGRHO.js.map} +0 -0
- /package/dist/{chunk-G3AG3KZN.js.map → chunk-5IZL4DCV.js.map} +0 -0
- /package/dist/{chunk-BRK4ODMI.js.map → chunk-5NPGSAVB.js.map} +0 -0
- /package/dist/{chunk-QANCTXQF.js.map → chunk-6LX5ORAS.js.map} +0 -0
- /package/dist/{chunk-UIYZ5T3I.js.map → chunk-6UJ47TVX.js.map} +0 -0
- /package/dist/{chunk-L5RPWGFK.js.map → chunk-7DHTMOND.js.map} +0 -0
- /package/dist/{chunk-L7WO3MZ4.js.map → chunk-7ECD5ATE.js.map} +0 -0
- /package/dist/{chunk-Q6FETXJA.js.map → chunk-7SEAZFFB.js.map} +0 -0
- /package/dist/{chunk-SCHEKPYH.js.map → chunk-C2EFFULQ.js.map} +0 -0
- /package/dist/{chunk-GJR6D6KC.js.map → chunk-D654IBA6.js.map} +0 -0
- /package/dist/{chunk-UV2FO7J4.js.map → chunk-E6K4NIEU.js.map} +0 -0
- /package/dist/{chunk-T4WRIV2C.js.map → chunk-EABGC2TL.js.map} +0 -0
- /package/dist/{chunk-ONRU4L2N.js.map → chunk-FEMOX5AD.js.map} +0 -0
- /package/dist/{chunk-IFFFR3MR.js.map → chunk-FSFEQI74.js.map} +0 -0
- /package/dist/{chunk-J3BT33K7.js.map → chunk-ITRLGI2T.js.map} +0 -0
- /package/dist/{chunk-J47FNDR7.js.map → chunk-JIU55F3X.js.map} +0 -0
- /package/dist/{chunk-ZKYI7UVO.js.map → chunk-JR4ZC3G4.js.map} +0 -0
- /package/dist/{chunk-UCYSTFZR.js.map → chunk-JRNQ3RNA.js.map} +0 -0
- /package/dist/{chunk-GPGBSNKM.js.map → chunk-K4FLSOR5.js.map} +0 -0
- /package/dist/{chunk-LP47L3ZX.js.map → chunk-N42IWANG.js.map} +0 -0
- /package/dist/{chunk-YNI4S5WT.js.map → chunk-N53K2EXC.js.map} +0 -0
- /package/dist/{chunk-763GUIOU.js.map → chunk-NBNN5GOB.js.map} +0 -0
- /package/dist/{chunk-OOSWAUYB.js.map → chunk-ODWDQNRE.js.map} +0 -0
- /package/dist/{chunk-OTAVQCSF.js.map → chunk-PYXS46O7.js.map} +0 -0
- /package/dist/{chunk-4A24LIM2.js.map → chunk-S75M5ZRK.js.map} +0 -0
- /package/dist/{chunk-M5KEYE5E.js.map → chunk-URB2WSKZ.js.map} +0 -0
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
} from "./chunk-Z5LAYHGJ.js";
|
|
4
4
|
import {
|
|
5
5
|
log
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-2ODBA7MQ.js";
|
|
7
7
|
|
|
8
8
|
// src/session-observer-state.ts
|
|
9
9
|
import path from "path";
|
|
@@ -273,4 +273,4 @@ export {
|
|
|
273
273
|
normalizeObserverBands,
|
|
274
274
|
SessionObserverState
|
|
275
275
|
};
|
|
276
|
-
//# sourceMappingURL=chunk-
|
|
276
|
+
//# sourceMappingURL=chunk-JR4ZC3G4.js.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
log
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-2ODBA7MQ.js";
|
|
4
4
|
|
|
5
5
|
// src/threading.ts
|
|
6
6
|
import { readdir, readFile, writeFile, mkdir } from "fs/promises";
|
|
@@ -281,4 +281,4 @@ var ThreadingManager = class {
|
|
|
281
281
|
export {
|
|
282
282
|
ThreadingManager
|
|
283
283
|
};
|
|
284
|
-
//# sourceMappingURL=chunk-
|
|
284
|
+
//# sourceMappingURL=chunk-JRNQ3RNA.js.map
|
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
} from "./chunk-V3RXWQIE.js";
|
|
5
5
|
import {
|
|
6
6
|
log
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-2ODBA7MQ.js";
|
|
8
8
|
|
|
9
9
|
// src/retrieval-agents.ts
|
|
10
10
|
import path from "path";
|
|
@@ -377,4 +377,4 @@ export {
|
|
|
377
377
|
augmentWithDirectAndTemporal,
|
|
378
378
|
parallelRetrieval
|
|
379
379
|
};
|
|
380
|
-
//# sourceMappingURL=chunk-
|
|
380
|
+
//# sourceMappingURL=chunk-K4FLSOR5.js.map
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import {
|
|
2
|
+
chunkContent
|
|
3
|
+
} from "./chunk-4WMCPJWX.js";
|
|
4
|
+
|
|
5
|
+
// src/semantic-chunking.ts
|
|
6
|
+
var DEFAULT_SEMANTIC_CHUNKING_CONFIG = {
|
|
7
|
+
targetTokens: 200,
|
|
8
|
+
minTokens: 100,
|
|
9
|
+
maxTokens: 400,
|
|
10
|
+
smoothingWindowSize: 3,
|
|
11
|
+
boundaryThresholdStdDevs: 1,
|
|
12
|
+
embeddingBatchSize: 32,
|
|
13
|
+
fallbackToRecursive: true
|
|
14
|
+
};
|
|
15
|
+
function cosineSimilarity(a, b) {
|
|
16
|
+
if (a.length !== b.length) {
|
|
17
|
+
throw new Error(
|
|
18
|
+
`cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
if (a.length === 0) return 0;
|
|
22
|
+
let dot = 0;
|
|
23
|
+
let magA = 0;
|
|
24
|
+
let magB = 0;
|
|
25
|
+
for (let i = 0; i < a.length; i++) {
|
|
26
|
+
dot += a[i] * b[i];
|
|
27
|
+
magA += a[i] * a[i];
|
|
28
|
+
magB += b[i] * b[i];
|
|
29
|
+
}
|
|
30
|
+
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
31
|
+
if (denom === 0) return 0;
|
|
32
|
+
return dot / denom;
|
|
33
|
+
}
|
|
34
|
+
function mean(series) {
|
|
35
|
+
if (series.length === 0) return 0;
|
|
36
|
+
let sum = 0;
|
|
37
|
+
for (const v of series) sum += v;
|
|
38
|
+
return sum / series.length;
|
|
39
|
+
}
|
|
40
|
+
function stddev(series) {
|
|
41
|
+
if (series.length === 0) return 0;
|
|
42
|
+
const m = mean(series);
|
|
43
|
+
let sumSq = 0;
|
|
44
|
+
for (const v of series) {
|
|
45
|
+
const d = v - m;
|
|
46
|
+
sumSq += d * d;
|
|
47
|
+
}
|
|
48
|
+
return Math.sqrt(sumSq / series.length);
|
|
49
|
+
}
|
|
50
|
+
function movingAverage(series, windowSize) {
|
|
51
|
+
if (series.length === 0) return [];
|
|
52
|
+
if (windowSize < 1) windowSize = 1;
|
|
53
|
+
if (windowSize % 2 === 0) windowSize = windowSize + 1;
|
|
54
|
+
const halfW = Math.floor(windowSize / 2);
|
|
55
|
+
const result = new Array(series.length);
|
|
56
|
+
for (let i = 0; i < series.length; i++) {
|
|
57
|
+
const lo = Math.max(0, i - halfW);
|
|
58
|
+
const hi = Math.min(series.length - 1, i + halfW);
|
|
59
|
+
let sum = 0;
|
|
60
|
+
for (let j = lo; j <= hi; j++) sum += series[j];
|
|
61
|
+
result[i] = sum / (hi - lo + 1);
|
|
62
|
+
}
|
|
63
|
+
return result;
|
|
64
|
+
}
|
|
65
|
+
function findLocalMinima(series, threshold) {
|
|
66
|
+
if (series.length <= 2) return [];
|
|
67
|
+
const minima = [];
|
|
68
|
+
for (let i = 1; i < series.length - 1; i++) {
|
|
69
|
+
if (series[i] < series[i - 1] && series[i] < series[i + 1] && series[i] < threshold) {
|
|
70
|
+
minima.push(i);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return minima;
|
|
74
|
+
}
|
|
75
|
+
function splitSentences(text) {
|
|
76
|
+
const sentences = [];
|
|
77
|
+
const sentenceRegex = /[^.!?]*[.!?]+(?:\s+|$)/g;
|
|
78
|
+
let match;
|
|
79
|
+
let lastIndex = 0;
|
|
80
|
+
while ((match = sentenceRegex.exec(text)) !== null) {
|
|
81
|
+
sentences.push(match[0].trim());
|
|
82
|
+
lastIndex = sentenceRegex.lastIndex;
|
|
83
|
+
}
|
|
84
|
+
if (lastIndex < text.length) {
|
|
85
|
+
const remaining = text.slice(lastIndex).trim();
|
|
86
|
+
if (remaining) {
|
|
87
|
+
sentences.push(remaining);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return sentences.filter((s) => s.length > 0);
|
|
91
|
+
}
|
|
92
|
+
function estimateTokens(text) {
|
|
93
|
+
return Math.ceil(text.length / 4);
|
|
94
|
+
}
|
|
95
|
+
async function batchEmbed(sentences, embedFn, batchSize) {
|
|
96
|
+
const allEmbeddings = [];
|
|
97
|
+
for (let i = 0; i < sentences.length; i += batchSize) {
|
|
98
|
+
const batch = sentences.slice(i, i + batchSize);
|
|
99
|
+
const batchResult = await embedFn(batch);
|
|
100
|
+
for (const vec of batchResult) {
|
|
101
|
+
allEmbeddings.push(vec);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return allEmbeddings;
|
|
105
|
+
}
|
|
106
|
+
function buildSegments(sentences, boundaries) {
|
|
107
|
+
const sorted = [...boundaries].sort((a, b) => a - b);
|
|
108
|
+
const segments = [];
|
|
109
|
+
let start = 0;
|
|
110
|
+
for (const b of sorted) {
|
|
111
|
+
const splitPoint = b + 1;
|
|
112
|
+
if (splitPoint > start && splitPoint <= sentences.length) {
|
|
113
|
+
segments.push(sentences.slice(start, splitPoint));
|
|
114
|
+
start = splitPoint;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (start < sentences.length) {
|
|
118
|
+
segments.push(sentences.slice(start));
|
|
119
|
+
}
|
|
120
|
+
return segments;
|
|
121
|
+
}
|
|
122
|
+
function mergeShortSegments(segments, minTokens) {
|
|
123
|
+
if (segments.length <= 1) return segments;
|
|
124
|
+
const merged = [];
|
|
125
|
+
let buffer = [];
|
|
126
|
+
for (let i = 0; i < segments.length; i++) {
|
|
127
|
+
buffer = [...buffer, ...segments[i]];
|
|
128
|
+
const tokenCount = estimateTokens(buffer.join(" "));
|
|
129
|
+
if (tokenCount >= minTokens || i === segments.length - 1) {
|
|
130
|
+
merged.push(buffer);
|
|
131
|
+
buffer = [];
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (buffer.length > 0) {
|
|
135
|
+
if (merged.length > 0) {
|
|
136
|
+
merged[merged.length - 1] = [...merged[merged.length - 1], ...buffer];
|
|
137
|
+
} else {
|
|
138
|
+
merged.push(buffer);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return merged;
|
|
142
|
+
}
|
|
143
|
+
function splitLongSegment(segment, maxTokens, targetTokens) {
|
|
144
|
+
const text = segment.join(" ");
|
|
145
|
+
const cappedTarget = Math.min(targetTokens, maxTokens);
|
|
146
|
+
const result = chunkContent(text, {
|
|
147
|
+
targetTokens: cappedTarget,
|
|
148
|
+
minTokens: Math.min(cappedTarget, maxTokens),
|
|
149
|
+
overlapSentences: 0
|
|
150
|
+
});
|
|
151
|
+
return result.chunks.map((c) => ({
|
|
152
|
+
content: c.content,
|
|
153
|
+
index: c.index,
|
|
154
|
+
tokenCount: c.tokenCount,
|
|
155
|
+
boundaryScore: 0
|
|
156
|
+
}));
|
|
157
|
+
}
|
|
158
|
+
async function semanticChunkContent(content, embedFn, config) {
|
|
159
|
+
const cfg = {
|
|
160
|
+
...DEFAULT_SEMANTIC_CHUNKING_CONFIG,
|
|
161
|
+
...config
|
|
162
|
+
};
|
|
163
|
+
const batchSize = Math.max(1, cfg.embeddingBatchSize);
|
|
164
|
+
if (!content || content.trim().length === 0) {
|
|
165
|
+
return {
|
|
166
|
+
chunked: false,
|
|
167
|
+
chunks: [],
|
|
168
|
+
boundaries: [],
|
|
169
|
+
method: "semantic"
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
const sentences = splitSentences(content);
|
|
173
|
+
if (sentences.length <= 1) {
|
|
174
|
+
const tokenCount = estimateTokens(content);
|
|
175
|
+
return {
|
|
176
|
+
chunked: false,
|
|
177
|
+
chunks: [
|
|
178
|
+
{
|
|
179
|
+
content: content.trim(),
|
|
180
|
+
index: 0,
|
|
181
|
+
tokenCount,
|
|
182
|
+
boundaryScore: 1
|
|
183
|
+
}
|
|
184
|
+
],
|
|
185
|
+
boundaries: [],
|
|
186
|
+
method: "semantic"
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
const totalTokens = estimateTokens(content);
|
|
190
|
+
if (totalTokens <= cfg.minTokens) {
|
|
191
|
+
return {
|
|
192
|
+
chunked: false,
|
|
193
|
+
chunks: [
|
|
194
|
+
{
|
|
195
|
+
content: content.trim(),
|
|
196
|
+
index: 0,
|
|
197
|
+
tokenCount: totalTokens,
|
|
198
|
+
boundaryScore: 1
|
|
199
|
+
}
|
|
200
|
+
],
|
|
201
|
+
boundaries: [],
|
|
202
|
+
method: "semantic"
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
let embeddings;
|
|
206
|
+
try {
|
|
207
|
+
embeddings = await batchEmbed(sentences, embedFn, batchSize);
|
|
208
|
+
} catch {
|
|
209
|
+
if (cfg.fallbackToRecursive) {
|
|
210
|
+
return buildRecursiveFallback(content, cfg);
|
|
211
|
+
}
|
|
212
|
+
throw new Error(
|
|
213
|
+
"Semantic chunking failed: embedding function threw and fallbackToRecursive is disabled"
|
|
214
|
+
);
|
|
215
|
+
}
|
|
216
|
+
if (embeddings.length !== sentences.length) {
|
|
217
|
+
if (cfg.fallbackToRecursive) {
|
|
218
|
+
return buildRecursiveFallback(content, cfg);
|
|
219
|
+
}
|
|
220
|
+
throw new Error(
|
|
221
|
+
`Semantic chunking failed: expected ${sentences.length} embeddings but received ${embeddings.length}`
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
const similarities = [];
|
|
225
|
+
for (let i = 0; i < sentences.length - 1; i++) {
|
|
226
|
+
similarities.push(cosineSimilarity(embeddings[i], embeddings[i + 1]));
|
|
227
|
+
}
|
|
228
|
+
if (similarities.length <= 1) {
|
|
229
|
+
if (totalTokens > cfg.maxTokens) {
|
|
230
|
+
return buildRecursiveFallback(content, cfg);
|
|
231
|
+
}
|
|
232
|
+
return {
|
|
233
|
+
chunked: false,
|
|
234
|
+
chunks: [
|
|
235
|
+
{
|
|
236
|
+
content: content.trim(),
|
|
237
|
+
index: 0,
|
|
238
|
+
tokenCount: totalTokens,
|
|
239
|
+
boundaryScore: similarities.length === 1 ? similarities[0] : 1
|
|
240
|
+
}
|
|
241
|
+
],
|
|
242
|
+
boundaries: [],
|
|
243
|
+
method: "semantic"
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
const smoothed = movingAverage(similarities, cfg.smoothingWindowSize);
|
|
247
|
+
const m = mean(smoothed);
|
|
248
|
+
const s = stddev(smoothed);
|
|
249
|
+
const threshold = m - cfg.boundaryThresholdStdDevs * s;
|
|
250
|
+
const rawBoundaries = findLocalMinima(smoothed, threshold);
|
|
251
|
+
let segments = buildSegments(sentences, rawBoundaries);
|
|
252
|
+
segments = mergeShortSegments(segments, cfg.minTokens);
|
|
253
|
+
const chunks = [];
|
|
254
|
+
const finalBoundaries = [];
|
|
255
|
+
let sentenceOffset = 0;
|
|
256
|
+
for (let segIdx = 0; segIdx < segments.length; segIdx++) {
|
|
257
|
+
const segment = segments[segIdx];
|
|
258
|
+
const segText = segment.join(" ");
|
|
259
|
+
const segTokens = estimateTokens(segText);
|
|
260
|
+
if (segTokens > cfg.maxTokens) {
|
|
261
|
+
const subChunks = splitLongSegment(segment, cfg.maxTokens, cfg.targetTokens);
|
|
262
|
+
for (const sc of subChunks) {
|
|
263
|
+
chunks.push({
|
|
264
|
+
...sc,
|
|
265
|
+
index: chunks.length
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
} else {
|
|
269
|
+
const trailingSentenceIdx = sentenceOffset + segment.length - 1;
|
|
270
|
+
let bScore = 1;
|
|
271
|
+
if (trailingSentenceIdx < similarities.length && segIdx < segments.length - 1) {
|
|
272
|
+
bScore = smoothed[trailingSentenceIdx] ?? similarities[trailingSentenceIdx] ?? 1;
|
|
273
|
+
}
|
|
274
|
+
chunks.push({
|
|
275
|
+
content: segText,
|
|
276
|
+
index: chunks.length,
|
|
277
|
+
tokenCount: segTokens,
|
|
278
|
+
boundaryScore: bScore
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
if (segIdx < segments.length - 1) {
|
|
282
|
+
finalBoundaries.push(sentenceOffset + segment.length - 1);
|
|
283
|
+
}
|
|
284
|
+
sentenceOffset += segment.length;
|
|
285
|
+
}
|
|
286
|
+
return {
|
|
287
|
+
chunked: chunks.length > 1,
|
|
288
|
+
chunks,
|
|
289
|
+
boundaries: finalBoundaries,
|
|
290
|
+
method: "semantic"
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
function buildRecursiveFallback(content, cfg) {
|
|
294
|
+
const cappedTarget = Math.min(cfg.targetTokens, cfg.maxTokens);
|
|
295
|
+
const result = chunkContent(content, {
|
|
296
|
+
targetTokens: cappedTarget,
|
|
297
|
+
minTokens: Math.min(cfg.minTokens, cappedTarget),
|
|
298
|
+
overlapSentences: 0
|
|
299
|
+
});
|
|
300
|
+
return {
|
|
301
|
+
chunked: result.chunked,
|
|
302
|
+
chunks: result.chunks.map((c) => ({
|
|
303
|
+
...c,
|
|
304
|
+
boundaryScore: 0
|
|
305
|
+
})),
|
|
306
|
+
boundaries: [],
|
|
307
|
+
method: "recursive-fallback"
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
export {
|
|
312
|
+
DEFAULT_SEMANTIC_CHUNKING_CONFIG,
|
|
313
|
+
cosineSimilarity,
|
|
314
|
+
mean,
|
|
315
|
+
stddev,
|
|
316
|
+
movingAverage,
|
|
317
|
+
findLocalMinima,
|
|
318
|
+
semanticChunkContent
|
|
319
|
+
};
|
|
320
|
+
//# sourceMappingURL=chunk-KVE7R4CG.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/semantic-chunking.ts"],"sourcesContent":["/**\n * Semantic Chunking with Smoothing-Based Topic Boundaries (Issue #368)\n *\n * An optional alternative to the recursive chunker in chunking.ts.\n * Uses sentence embeddings + cosine similarity + smoothing to detect\n * natural topic boundaries, producing more coherent chunks.\n */\n\nimport { chunkContent, type Chunk, type ChunkResult } from \"./chunking.js\";\n\n// ---------------------------------------------------------------------------\n// Configuration\n// ---------------------------------------------------------------------------\n\nexport interface SemanticChunkingConfig {\n /** Target tokens per chunk. Default: 200. */\n targetTokens: number;\n /** Minimum tokens for a segment before merging with neighbor. Default: 100. */\n minTokens: number;\n /** Maximum tokens for a segment before recursive splitting. Default: 400. */\n maxTokens: number;\n /** Window size for the moving-average smoothing filter. Default: 3. */\n smoothingWindowSize: number;\n /** How many standard deviations below the mean constitutes a boundary. Default: 1.0. */\n boundaryThresholdStdDevs: number;\n /** Batch size for embedding requests. Default: 32. */\n embeddingBatchSize: number;\n /** Fall back to recursive chunking when embeddings are unavailable. Default: true. */\n fallbackToRecursive: boolean;\n}\n\nexport const DEFAULT_SEMANTIC_CHUNKING_CONFIG: SemanticChunkingConfig = {\n targetTokens: 200,\n minTokens: 100,\n maxTokens: 400,\n smoothingWindowSize: 3,\n boundaryThresholdStdDevs: 1.0,\n embeddingBatchSize: 32,\n fallbackToRecursive: true,\n};\n\n// ---------------------------------------------------------------------------\n// Result types\n// ---------------------------------------------------------------------------\n\nexport interface SemanticChunk extends Chunk {\n /** Optional topic hint derived from position. */\n topicLabel?: string;\n /** Cosine similarity score at the trailing boundary of this chunk. */\n boundaryScore: number;\n}\n\nexport interface SemanticChunkResult {\n /** Whether content was split into multiple chunks. */\n chunked: boolean;\n /** The chunks produced. */\n chunks: SemanticChunk[];\n /** Sentence indices where topic splits occurred. */\n boundaries: number[];\n /** Which algorithm produced the result. */\n method: \"semantic\" | \"recursive-fallback\";\n}\n\n// ---------------------------------------------------------------------------\n// Embedding function signature\n// ---------------------------------------------------------------------------\n\n/** Caller-provided function that embeds an array of texts, returning vectors. */\nexport type EmbedFn = (texts: string[]) => Promise<number[][]>;\n\n// ---------------------------------------------------------------------------\n// Math utilities (exported for testing)\n// ---------------------------------------------------------------------------\n\n/**\n * Cosine similarity between two vectors.\n * Returns a value in [-1, 1]. Identical direction = 1, orthogonal = 0.\n *\n * NOTE: This duplicates cosineSimilarity in recall-mmr.ts and embedding-fallback.ts.\n * Consider extracting to a shared math utility in a future refactor.\n */\nexport function cosineSimilarity(a: number[], b: number[]): number {\n if (a.length !== b.length) {\n throw new Error(\n `cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`,\n );\n }\n if (a.length === 0) return 0;\n\n let dot = 0;\n let magA = 0;\n let magB = 0;\n for (let i = 0; i < a.length; i++) {\n dot += a[i] * b[i];\n magA += a[i] * a[i];\n magB += b[i] * b[i];\n }\n\n const denom = Math.sqrt(magA) * Math.sqrt(magB);\n if (denom === 0) return 0;\n return dot / denom;\n}\n\n/**\n * Arithmetic mean of a numeric series.\n */\nexport function mean(series: number[]): number {\n if (series.length === 0) return 0;\n let sum = 0;\n for (const v of series) sum += v;\n return sum / series.length;\n}\n\n/**\n * Population standard deviation of a numeric series.\n */\nexport function stddev(series: number[]): number {\n if (series.length === 0) return 0;\n const m = mean(series);\n let sumSq = 0;\n for (const v of series) {\n const d = v - m;\n sumSq += d * d;\n }\n return Math.sqrt(sumSq / series.length);\n}\n\n/**\n * Simple moving average over a 1D series.\n * The window is centered: for window size W, each output[i] averages\n * series[i - floor(W/2) .. i + floor(W/2)], clamped to bounds.\n *\n * Even window sizes are rounded up to the next odd value so the window\n * is symmetric around the center point (Finding 4, PR #420).\n */\nexport function movingAverage(series: number[], windowSize: number): number[] {\n if (series.length === 0) return [];\n if (windowSize < 1) windowSize = 1;\n // Round even values up to the next odd so the window is symmetric.\n if (windowSize % 2 === 0) windowSize = windowSize + 1;\n\n const halfW = Math.floor(windowSize / 2);\n const result: number[] = new Array(series.length);\n\n for (let i = 0; i < series.length; i++) {\n const lo = Math.max(0, i - halfW);\n const hi = Math.min(series.length - 1, i + halfW);\n let sum = 0;\n for (let j = lo; j <= hi; j++) sum += series[j];\n result[i] = sum / (hi - lo + 1);\n }\n return result;\n}\n\n/**\n * Find indices in the series that are local minima AND below the threshold.\n * A local minimum is a point lower than both its immediate neighbors\n * (or lower-or-equal at series boundaries).\n */\nexport function findLocalMinima(\n series: number[],\n threshold: number,\n): number[] {\n if (series.length <= 2) return [];\n\n const minima: number[] = [];\n for (let i = 1; i < series.length - 1; i++) {\n if (\n series[i] < series[i - 1] &&\n series[i] < series[i + 1] &&\n series[i] < threshold\n ) {\n minima.push(i);\n }\n }\n return minima;\n}\n\n// ---------------------------------------------------------------------------\n// Sentence tokenizer\n// ---------------------------------------------------------------------------\n\n/**\n * Split text into sentences at punctuation boundaries.\n * Preserves punctuation with the preceding sentence.\n */\nfunction splitSentences(text: string): string[] {\n const sentences: string[] = [];\n const sentenceRegex = /[^.!?]*[.!?]+(?:\\s+|$)/g;\n\n let match: RegExpExecArray | null;\n let lastIndex = 0;\n\n while ((match = sentenceRegex.exec(text)) !== null) {\n sentences.push(match[0].trim());\n lastIndex = sentenceRegex.lastIndex;\n }\n\n if (lastIndex < text.length) {\n const remaining = text.slice(lastIndex).trim();\n if (remaining) {\n sentences.push(remaining);\n }\n }\n\n return sentences.filter((s) => s.length > 0);\n}\n\n// ---------------------------------------------------------------------------\n// Token estimation\n// ---------------------------------------------------------------------------\n\n/** Rough token estimate: ~4 chars per token for English. */\nfunction estimateTokens(text: string): number {\n return Math.ceil(text.length / 4);\n}\n\n// ---------------------------------------------------------------------------\n// Core semantic chunking\n// ---------------------------------------------------------------------------\n\n/**\n * Batch-embed sentences using the provided embed function.\n * Respects the configured batch size.\n */\nasync function batchEmbed(\n sentences: string[],\n embedFn: EmbedFn,\n batchSize: number,\n): Promise<number[][]> {\n const allEmbeddings: number[][] = [];\n\n for (let i = 0; i < sentences.length; i += batchSize) {\n const batch = sentences.slice(i, i + batchSize);\n const batchResult = await embedFn(batch);\n for (const vec of batchResult) {\n allEmbeddings.push(vec);\n }\n }\n\n return allEmbeddings;\n}\n\n/**\n * Build segments from boundary indices.\n * boundaries are sentence indices at which splits occur (i.e., the split\n * happens AFTER the boundary index sentence).\n */\nfunction buildSegments(\n sentences: string[],\n boundaries: number[],\n): string[][] {\n const sorted = [...boundaries].sort((a, b) => a - b);\n const segments: string[][] = [];\n let start = 0;\n\n for (const b of sorted) {\n // Split after sentence at index b: segment is [start .. b]\n const splitPoint = b + 1;\n if (splitPoint > start && splitPoint <= sentences.length) {\n segments.push(sentences.slice(start, splitPoint));\n start = splitPoint;\n }\n }\n\n // Remaining sentences\n if (start < sentences.length) {\n segments.push(sentences.slice(start));\n }\n\n return segments;\n}\n\n/**\n * Merge short segments (below minTokens) with their neighbor.\n * Prefers merging forward; falls back to merging backward.\n */\nfunction mergeShortSegments(\n segments: string[][],\n minTokens: number,\n): string[][] {\n if (segments.length <= 1) return segments;\n\n const merged: string[][] = [];\n let buffer: string[] = [];\n\n for (let i = 0; i < segments.length; i++) {\n buffer = [...buffer, ...segments[i]];\n const tokenCount = estimateTokens(buffer.join(\" \"));\n\n if (tokenCount >= minTokens || i === segments.length - 1) {\n merged.push(buffer);\n buffer = [];\n }\n }\n\n // If the last merge left a dangling buffer, attach it to the last segment\n if (buffer.length > 0) {\n if (merged.length > 0) {\n merged[merged.length - 1] = [...merged[merged.length - 1], ...buffer];\n } else {\n merged.push(buffer);\n }\n }\n\n return merged;\n}\n\n/**\n * Split an oversized segment using recursive chunking.\n */\nfunction splitLongSegment(\n segment: string[],\n maxTokens: number,\n targetTokens: number,\n): SemanticChunk[] {\n const text = segment.join(\" \");\n // Cap targetTokens to maxTokens so recursive splitting never produces\n // segments larger than the configured maximum (Finding 2, PR #420).\n const cappedTarget = Math.min(targetTokens, maxTokens);\n const result: ChunkResult = chunkContent(text, {\n targetTokens: cappedTarget,\n minTokens: Math.min(cappedTarget, maxTokens),\n overlapSentences: 0,\n });\n\n return result.chunks.map((c) => ({\n content: c.content,\n index: c.index,\n tokenCount: c.tokenCount,\n boundaryScore: 0,\n }));\n}\n\n/**\n * Semantic chunking with smoothing-based topic boundary detection.\n *\n * @param content - Full text to chunk.\n * @param embedFn - Async function that embeds an array of texts.\n * @param config - Optional partial config overrides.\n * @returns SemanticChunkResult\n */\nexport async function semanticChunkContent(\n content: string,\n embedFn: EmbedFn,\n config?: Partial<SemanticChunkingConfig>,\n): Promise<SemanticChunkResult> {\n const cfg: SemanticChunkingConfig = {\n ...DEFAULT_SEMANTIC_CHUNKING_CONFIG,\n ...config,\n };\n\n // Guard against non-positive batch size which would cause an infinite loop\n const batchSize = Math.max(1, cfg.embeddingBatchSize);\n\n // --- Empty / trivially short input ---\n if (!content || content.trim().length === 0) {\n return {\n chunked: false,\n chunks: [],\n boundaries: [],\n method: \"semantic\",\n };\n }\n\n const sentences = splitSentences(content);\n\n if (sentences.length <= 1) {\n const tokenCount = estimateTokens(content);\n return {\n chunked: false,\n chunks: [\n {\n content: content.trim(),\n index: 0,\n tokenCount,\n boundaryScore: 1,\n },\n ],\n boundaries: [],\n method: \"semantic\",\n };\n }\n\n // If total tokens is short enough, return as single chunk\n const totalTokens = estimateTokens(content);\n if (totalTokens <= cfg.minTokens) {\n return {\n chunked: false,\n chunks: [\n {\n content: content.trim(),\n index: 0,\n tokenCount: totalTokens,\n boundaryScore: 1,\n },\n ],\n boundaries: [],\n method: \"semantic\",\n };\n }\n\n // --- Attempt embedding ---\n let embeddings: number[][];\n try {\n embeddings = await batchEmbed(sentences, embedFn, batchSize);\n } catch {\n // Embedding failed — fall back if configured\n if (cfg.fallbackToRecursive) {\n return buildRecursiveFallback(content, cfg);\n }\n throw new Error(\n \"Semantic chunking failed: embedding function threw and fallbackToRecursive is disabled\",\n );\n }\n\n if (embeddings.length !== sentences.length) {\n if (cfg.fallbackToRecursive) {\n return buildRecursiveFallback(content, cfg);\n }\n throw new Error(\n `Semantic chunking failed: expected ${sentences.length} embeddings but received ${embeddings.length}`,\n );\n }\n\n // --- Compute pairwise cosine similarity ---\n const similarities: number[] = [];\n for (let i = 0; i < sentences.length - 1; i++) {\n similarities.push(cosineSimilarity(embeddings[i], embeddings[i + 1]));\n }\n\n // If only one pair (2 sentences), nothing to smooth or split meaningfully.\n // However, if the combined content exceeds maxTokens, apply recursive splitting.\n if (similarities.length <= 1) {\n if (totalTokens > cfg.maxTokens) {\n return buildRecursiveFallback(content, cfg);\n }\n return {\n chunked: false,\n chunks: [\n {\n content: content.trim(),\n index: 0,\n tokenCount: totalTokens,\n boundaryScore: similarities.length === 1 ? similarities[0] : 1,\n },\n ],\n boundaries: [],\n method: \"semantic\",\n };\n }\n\n // --- Smooth the similarity series ---\n const smoothed = movingAverage(similarities, cfg.smoothingWindowSize);\n\n // --- Detect boundaries: local minima below (mean - k * stddev) ---\n const m = mean(smoothed);\n const s = stddev(smoothed);\n const threshold = m - cfg.boundaryThresholdStdDevs * s;\n const rawBoundaries = findLocalMinima(smoothed, threshold);\n\n // --- Build segments, merge short, split long ---\n let segments = buildSegments(sentences, rawBoundaries);\n segments = mergeShortSegments(segments, cfg.minTokens);\n\n // --- Convert segments to chunks, splitting oversized ones ---\n const chunks: SemanticChunk[] = [];\n const finalBoundaries: number[] = [];\n let sentenceOffset = 0;\n\n for (let segIdx = 0; segIdx < segments.length; segIdx++) {\n const segment = segments[segIdx];\n const segText = segment.join(\" \");\n const segTokens = estimateTokens(segText);\n\n if (segTokens > cfg.maxTokens) {\n // Recursive split for oversized segment\n const subChunks = splitLongSegment(segment, cfg.maxTokens, cfg.targetTokens);\n for (const sc of subChunks) {\n chunks.push({\n ...sc,\n index: chunks.length,\n });\n }\n } else {\n // Compute boundary score: the similarity at the trailing edge\n const trailingSentenceIdx = sentenceOffset + segment.length - 1;\n let bScore = 1;\n if (\n trailingSentenceIdx < similarities.length &&\n segIdx < segments.length - 1\n ) {\n bScore = smoothed[trailingSentenceIdx] ?? similarities[trailingSentenceIdx] ?? 1;\n }\n\n chunks.push({\n content: segText,\n index: chunks.length,\n tokenCount: segTokens,\n boundaryScore: bScore,\n });\n }\n\n // Record boundaries (all but the last segment produce a boundary)\n if (segIdx < segments.length - 1) {\n finalBoundaries.push(sentenceOffset + segment.length - 1);\n }\n sentenceOffset += segment.length;\n }\n\n return {\n chunked: chunks.length > 1,\n chunks,\n boundaries: finalBoundaries,\n method: \"semantic\",\n };\n}\n\n// ---------------------------------------------------------------------------\n// Recursive fallback helper\n// ---------------------------------------------------------------------------\n\nfunction buildRecursiveFallback(\n content: string,\n cfg: SemanticChunkingConfig,\n): SemanticChunkResult {\n // Cap targetTokens to maxTokens so the recursive fallback path honours the\n // same constraint as splitLongSegment (PR #439 post-merge cursor[bot] finding).\n const cappedTarget = Math.min(cfg.targetTokens, cfg.maxTokens);\n const result: ChunkResult = chunkContent(content, {\n targetTokens: cappedTarget,\n minTokens: Math.min(cfg.minTokens, cappedTarget),\n overlapSentences: 0,\n });\n\n return {\n chunked: result.chunked,\n chunks: result.chunks.map((c) => ({\n ...c,\n boundaryScore: 0,\n })),\n boundaries: [],\n method: \"recursive-fallback\",\n };\n}\n"],"mappings":";;;;;AA+BO,IAAM,mCAA2D;AAAA,EACtE,cAAc;AAAA,EACd,WAAW;AAAA,EACX,WAAW;AAAA,EACX,qBAAqB;AAAA,EACrB,0BAA0B;AAAA,EAC1B,oBAAoB;AAAA,EACpB,qBAAqB;AACvB;AA0CO,SAAS,iBAAiB,GAAa,GAAqB;AACjE,MAAI,EAAE,WAAW,EAAE,QAAQ;AACzB,UAAM,IAAI;AAAA,MACR,6CAA6C,EAAE,MAAM,OAAO,EAAE,MAAM;AAAA,IACtE;AAAA,EACF;AACA,MAAI,EAAE,WAAW,EAAG,QAAO;AAE3B,MAAI,MAAM;AACV,MAAI,OAAO;AACX,MAAI,OAAO;AACX,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,WAAO,EAAE,CAAC,IAAI,EAAE,CAAC;AACjB,YAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;AAClB,YAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;AAAA,EACpB;AAEA,QAAM,QAAQ,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI;AAC9C,MAAI,UAAU,EAAG,QAAO;AACxB,SAAO,MAAM;AACf;AAKO,SAAS,KAAK,QAA0B;AAC7C,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,MAAM;AACV,aAAW,KAAK,OAAQ,QAAO;AAC/B,SAAO,MAAM,OAAO;AACtB;AAKO,SAAS,OAAO,QAA0B;AAC/C,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,QAAM,IAAI,KAAK,MAAM;AACrB,MAAI,QAAQ;AACZ,aAAW,KAAK,QAAQ;AACtB,UAAM,IAAI,IAAI;AACd,aAAS,IAAI;AAAA,EACf;AACA,SAAO,KAAK,KAAK,QAAQ,OAAO,MAAM;AACxC;AAUO,SAAS,cAAc,QAAkB,YAA8B;AAC5E,MAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AACjC,MAAI,aAAa,EAAG,cAAa;AAEjC,MAAI,aAAa,MAAM,EAAG,cAAa,aAAa;AAEpD,QAAM,QAAQ,KAAK,MAAM,aAAa,CAAC;AACvC,QAAM,SAAmB,IAAI,MAAM,OAAO,MAAM;AAEhD,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,UAAM,KAAK,KAAK,IAAI,GAAG,IAAI,KAAK;AAChC,UAAM,KAAK,KAAK,IAAI,OAAO,SAAS,GAAG,IAAI,KAAK;AAChD,QAAI,MAAM;AACV,aAAS,IAAI,IAAI,KAAK,IAAI,IAAK,QAAO,OAAO,CAAC;AAC9C,WAAO,CAAC,IAAI,OAAO,KAAK,KAAK;AAAA,EAC/B;AACA,SAAO;AACT;AAOO,SAAS,gBACd,QACA,WACU;AACV,MAAI,OAAO,UAAU,EAAG,QAAO,CAAC;AAEhC,QAAM,SAAmB,CAAC;AAC1B,WAAS,IAAI,GAAG,IAAI,OAAO,SAAS,GAAG,KAAK;AAC1C,QACE,OAAO,CAAC,IAAI,OAAO,IAAI,CAAC,KACxB,OAAO,CAAC,IAAI,OAAO,IAAI,CAAC,KACxB,OAAO,CAAC,IAAI,WACZ;AACA,aAAO,KAAK,CAAC;AAAA,IACf;AAAA,EACF;AACA,SAAO;AACT;AAUA,SAAS,eAAe,MAAwB;AAC9C,QAAM,YAAsB,CAAC;AAC7B,QAAM,gBAAgB;AAEtB,MAAI;AACJ,MAAI,YAAY;AAEhB,UAAQ,QAAQ,cAAc,KAAK,IAAI,OAAO,MAAM;AAClD,cAAU,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC;AAC9B,gBAAY,cAAc;AAAA,EAC5B;AAEA,MAAI,YAAY,KAAK,QAAQ;AAC3B,UAAM,YAAY,KAAK,MAAM,SAAS,EAAE,KAAK;AAC7C,QAAI,WAAW;AACb,gBAAU,KAAK,SAAS;AAAA,IAC1B;AAAA,EACF;AAEA,SAAO,UAAU,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAC7C;AAOA,SAAS,eAAe,MAAsB;AAC5C,SAAO,KAAK,KAAK,KAAK,SAAS,CAAC;AAClC;AAUA,eAAe,WACb,WACA,SACA,WACqB;AACrB,QAAM,gBAA4B,CAAC;AAEnC,WAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK,WAAW;AACpD,UAAM,QAAQ,UAAU,MAAM,GAAG,IAAI,SAAS;AAC9C,UAAM,cAAc,MAAM,QAAQ,KAAK;AACvC,eAAW,OAAO,aAAa;AAC7B,oBAAc,KAAK,GAAG;AAAA,IACxB;AAAA,EACF;AAEA,SAAO;AACT;AAOA,SAAS,cACP,WACA,YACY;AACZ,QAAM,SAAS,CAAC,GAAG,UAAU,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AACnD,QAAM,WAAuB,CAAC;AAC9B,MAAI,QAAQ;AAEZ,aAAW,KAAK,QAAQ;AAEtB,UAAM,aAAa,IAAI;AACvB,QAAI,aAAa,SAAS,cAAc,UAAU,QAAQ;AACxD,eAAS,KAAK,UAAU,MAAM,OAAO,UAAU,CAAC;AAChD,cAAQ;AAAA,IACV;AAAA,EACF;AAGA,MAAI,QAAQ,UAAU,QAAQ;AAC5B,aAAS,KAAK,UAAU,MAAM,KAAK,CAAC;AAAA,EACtC;AAEA,SAAO;AACT;AAMA,SAAS,mBACP,UACA,WACY;AACZ,MAAI,SAAS,UAAU,EAAG,QAAO;AAEjC,QAAM,SAAqB,CAAC;AAC5B,MAAI,SAAmB,CAAC;AAExB,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,aAAS,CAAC,GAAG,QAAQ,GAAG,SAAS,CAAC,CAAC;AACnC,UAAM,aAAa,eAAe,OAAO,KAAK,GAAG,CAAC;AAElD,QAAI,cAAc,aAAa,MAAM,SAAS,SAAS,GAAG;AACxD,aAAO,KAAK,MAAM;AAClB,eAAS,CAAC;AAAA,IACZ;AAAA,EACF;AAGA,MAAI,OAAO,SAAS,GAAG;AACrB,QAAI,OAAO,SAAS,GAAG;AACrB,aAAO,OAAO,SAAS,CAAC,IAAI,CAAC,GAAG,OAAO,OAAO,SAAS,CAAC,GAAG,GAAG,MAAM;AAAA,IACtE,OAAO;AACL,aAAO,KAAK,MAAM;AAAA,IACpB;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,iBACP,SACA,WACA,cACiB;AACjB,QAAM,OAAO,QAAQ,KAAK,GAAG;AAG7B,QAAM,eAAe,KAAK,IAAI,cAAc,SAAS;AACrD,QAAM,SAAsB,aAAa,MAAM;AAAA,IAC7C,cAAc;AAAA,IACd,WAAW,KAAK,IAAI,cAAc,SAAS;AAAA,IAC3C,kBAAkB;AAAA,EACpB,CAAC;AAED,SAAO,OAAO,OAAO,IAAI,CAAC,OAAO;AAAA,IAC/B,SAAS,EAAE;AAAA,IACX,OAAO,EAAE;AAAA,IACT,YAAY,EAAE;AAAA,IACd,eAAe;AAAA,EACjB,EAAE;AACJ;AAUA,eAAsB,qBACpB,SACA,SACA,QAC8B;AAC9B,QAAM,MAA8B;AAAA,IAClC,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAGA,QAAM,YAAY,KAAK,IAAI,GAAG,IAAI,kBAAkB;AAGpD,MAAI,CAAC,WAAW,QAAQ,KAAK,EAAE,WAAW,GAAG;AAC3C,WAAO;AAAA,MACL,SAAS;AAAA,MACT,QAAQ,CAAC;AAAA,MACT,YAAY,CAAC;AAAA,MACb,QAAQ;AAAA,IACV;AAAA,EACF;AAEA,QAAM,YAAY,eAAe,OAAO;AAExC,MAAI,UAAU,UAAU,GAAG;AACzB,UAAM,aAAa,eAAe,OAAO;AACzC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,QAAQ;AAAA,QACN;AAAA,UACE,SAAS,QAAQ,KAAK;AAAA,UACtB,OAAO;AAAA,UACP;AAAA,UACA,eAAe;AAAA,QACjB;AAAA,MACF;AAAA,MACA,YAAY,CAAC;AAAA,MACb,QAAQ;AAAA,IACV;AAAA,EACF;AAGA,QAAM,cAAc,eAAe,OAAO;AAC1C,MAAI,eAAe,IAAI,WAAW;AAChC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,QAAQ;AAAA,QACN;AAAA,UACE,SAAS,QAAQ,KAAK;AAAA,UACtB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,eAAe;AAAA,QACjB;AAAA,MACF;AAAA,MACA,YAAY,CAAC;AAAA,MACb,QAAQ;AAAA,IACV;AAAA,EACF;AAGA,MAAI;AACJ,MAAI;AACF,iBAAa,MAAM,WAAW,WAAW,SAAS,SAAS;AAAA,EAC7D,QAAQ;AAEN,QAAI,IAAI,qBAAqB;AAC3B,aAAO,uBAAuB,SAAS,GAAG;AAAA,IAC5C;AACA,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,MAAI,WAAW,WAAW,UAAU,QAAQ;AAC1C,QAAI,IAAI,qBAAqB;AAC3B,aAAO,uBAAuB,SAAS,GAAG;AAAA,IAC5C;AACA,UAAM,IAAI;AAAA,MACR,sCAAsC,UAAU,MAAM,4BAA4B,WAAW,MAAM;AAAA,IACrG;AAAA,EACF;AAGA,QAAM,eAAyB,CAAC;AAChC,WAAS,IAAI,GAAG,IAAI,UAAU,SAAS,GAAG,KAAK;AAC7C,iBAAa,KAAK,iBAAiB,WAAW,CAAC,GAAG,WAAW,IAAI,CAAC,CAAC,CAAC;AAAA,EACtE;AAIA,MAAI,aAAa,UAAU,GAAG;AAC5B,QAAI,cAAc,IAAI,WAAW;AAC/B,aAAO,uBAAuB,SAAS,GAAG;AAAA,IAC5C;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,QAAQ;AAAA,QACN;AAAA,UACE,SAAS,QAAQ,KAAK;AAAA,UACtB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,eAAe,aAAa,WAAW,IAAI,aAAa,CAAC,IAAI;AAAA,QAC/D;AAAA,MACF;AAAA,MACA,YAAY,CAAC;AAAA,MACb,QAAQ;AAAA,IACV;AAAA,EACF;AAGA,QAAM,WAAW,cAAc,cAAc,IAAI,mBAAmB;AAGpE,QAAM,IAAI,KAAK,QAAQ;AACvB,QAAM,IAAI,OAAO,QAAQ;AACzB,QAAM,YAAY,IAAI,IAAI,2BAA2B;AACrD,QAAM,gBAAgB,gBAAgB,UAAU,SAAS;AAGzD,MAAI,WAAW,cAAc,WAAW,aAAa;AACrD,aAAW,mBAAmB,UAAU,IAAI,SAAS;AAGrD,QAAM,SAA0B,CAAC;AACjC,QAAM,kBAA4B,CAAC;AACnC,MAAI,iBAAiB;AAErB,WAAS,SAAS,GAAG,SAAS,SAAS,QAAQ,UAAU;AACvD,UAAM,UAAU,SAAS,MAAM;AAC/B,UAAM,UAAU,QAAQ,KAAK,GAAG;AAChC,UAAM,YAAY,eAAe,OAAO;AAExC,QAAI,YAAY,IAAI,WAAW;AAE7B,YAAM,YAAY,iBAAiB,SAAS,IAAI,WAAW,IAAI,YAAY;AAC3E,iBAAW,MAAM,WAAW;AAC1B,eAAO,KAAK;AAAA,UACV,GAAG;AAAA,UACH,OAAO,OAAO;AAAA,QAChB,CAAC;AAAA,MACH;AAAA,IACF,OAAO;AAEL,YAAM,sBAAsB,iBAAiB,QAAQ,SAAS;AAC9D,UAAI,SAAS;AACb,UACE,sBAAsB,aAAa,UACnC,SAAS,SAAS,SAAS,GAC3B;AACA,iBAAS,SAAS,mBAAmB,KAAK,aAAa,mBAAmB,KAAK;AAAA,MACjF;AAEA,aAAO,KAAK;AAAA,QACV,SAAS;AAAA,QACT,OAAO,OAAO;AAAA,QACd,YAAY;AAAA,QACZ,eAAe;AAAA,MACjB,CAAC;AAAA,IACH;AAGA,QAAI,SAAS,SAAS,SAAS,GAAG;AAChC,sBAAgB,KAAK,iBAAiB,QAAQ,SAAS,CAAC;AAAA,IAC1D;AACA,sBAAkB,QAAQ;AAAA,EAC5B;AAEA,SAAO;AAAA,IACL,SAAS,OAAO,SAAS;AAAA,IACzB;AAAA,IACA,YAAY;AAAA,IACZ,QAAQ;AAAA,EACV;AACF;AAMA,SAAS,uBACP,SACA,KACqB;AAGrB,QAAM,eAAe,KAAK,IAAI,IAAI,cAAc,IAAI,SAAS;AAC7D,QAAM,SAAsB,aAAa,SAAS;AAAA,IAChD,cAAc;AAAA,IACd,WAAW,KAAK,IAAI,IAAI,WAAW,YAAY;AAAA,IAC/C,kBAAkB;AAAA,EACpB,CAAC;AAED,SAAO;AAAA,IACL,SAAS,OAAO;AAAA,IAChB,QAAQ,OAAO,OAAO,IAAI,CAAC,OAAO;AAAA,MAChC,GAAG;AAAA,MACH,eAAe;AAAA,IACjB,EAAE;AAAA,IACF,YAAY,CAAC;AAAA,IACb,QAAQ;AAAA,EACV;AACF;","names":[]}
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
import {
|
|
2
|
+
extractJsonCandidates
|
|
3
|
+
} from "./chunk-UZB5KHKX.js";
|
|
4
|
+
import {
|
|
5
|
+
normalizeProcedureSteps
|
|
6
|
+
} from "./chunk-QDW3E4RD.js";
|
|
7
|
+
import {
|
|
8
|
+
log
|
|
9
|
+
} from "./chunk-2ODBA7MQ.js";
|
|
10
|
+
|
|
11
|
+
// src/extraction-judge.ts
|
|
12
|
+
import { createHash } from "crypto";
|
|
13
|
+
var JUDGE_SYSTEM_PROMPT = `You are a memory curator evaluating whether extracted facts are **durable** \u2014 worth storing for long-term recall across sessions.
|
|
14
|
+
|
|
15
|
+
A fact is **durable** if it will still be useful 30+ days from now and is relevant across multiple sessions, not just the current task.
|
|
16
|
+
|
|
17
|
+
DURABLE examples (approve):
|
|
18
|
+
- Personal preferences, identities, or relationships
|
|
19
|
+
- Decisions with rationale that affect future work
|
|
20
|
+
- Corrections to previously held beliefs
|
|
21
|
+
- Principles, rules, or constraints the user wants respected
|
|
22
|
+
- Stable facts about projects, tools, or workflows
|
|
23
|
+
- Commitments, deadlines, or obligations
|
|
24
|
+
|
|
25
|
+
NOT DURABLE examples (reject):
|
|
26
|
+
- Transient task details ("currently debugging line 42")
|
|
27
|
+
- Ephemeral state ("the build is running now")
|
|
28
|
+
- Routine operations ("ran npm install")
|
|
29
|
+
- Conversational filler or acknowledgements
|
|
30
|
+
- Information that will be stale within hours
|
|
31
|
+
- Step-by-step instructions for a one-time task
|
|
32
|
+
|
|
33
|
+
Return a JSON array of objects with these fields:
|
|
34
|
+
- index: number (the candidate index)
|
|
35
|
+
- durable: boolean (true if the fact is durable)
|
|
36
|
+
- reason: string (brief explanation)
|
|
37
|
+
|
|
38
|
+
Rules:
|
|
39
|
+
1. Return exactly one verdict per input candidate, matched by index.
|
|
40
|
+
2. The reason field must be a short phrase (under 80 characters).
|
|
41
|
+
3. When in doubt lean toward durable \u2014 false negatives are worse than false positives.
|
|
42
|
+
4. Output valid JSON only. No markdown fences, no commentary.
|
|
43
|
+
|
|
44
|
+
Example output:
|
|
45
|
+
[{"index": 0, "durable": true, "reason": "Stable personal preference"}, {"index": 1, "durable": false, "reason": "Ephemeral build status"}]`;
|
|
46
|
+
var VERDICT_CACHE_MAX_SIZE = 1e4;
|
|
47
|
+
var defaultVerdictCache = /* @__PURE__ */ new Map();
|
|
48
|
+
function cacheKey(text, category) {
|
|
49
|
+
return createHash("sha256").update(`${text}\0${category}`).digest("hex");
|
|
50
|
+
}
|
|
51
|
+
function enforceMaxCacheSize(cache) {
|
|
52
|
+
if (cache.size <= VERDICT_CACHE_MAX_SIZE) return;
|
|
53
|
+
const deleteCount = Math.floor(cache.size / 2);
|
|
54
|
+
let deleted = 0;
|
|
55
|
+
for (const key of cache.keys()) {
|
|
56
|
+
if (deleted >= deleteCount) break;
|
|
57
|
+
cache.delete(key);
|
|
58
|
+
deleted++;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
var AUTO_APPROVE_CATEGORIES = /* @__PURE__ */ new Set(["correction", "principle"]);
|
|
62
|
+
var PROCEDURE_TRIGGER_RE = /(when you|whenever|before you|before running|always\s|first\b.*\bthen|to deploy|to ship|run these steps|follow these steps|how (i|we)\s|recipe for|workflow|each time you)/i;
|
|
63
|
+
function validateProcedureExtraction(input) {
|
|
64
|
+
const steps = normalizeProcedureSteps(input.procedureSteps);
|
|
65
|
+
if (steps.length < 2) {
|
|
66
|
+
return { durable: false, reason: "Procedure requires at least two steps with intents" };
|
|
67
|
+
}
|
|
68
|
+
const combined = [input.content, ...steps.map((s) => s.intent)].join(" ").toLowerCase();
|
|
69
|
+
if (!PROCEDURE_TRIGGER_RE.test(combined)) {
|
|
70
|
+
return { durable: false, reason: "Procedure missing explicit trigger phrasing" };
|
|
71
|
+
}
|
|
72
|
+
return { durable: true, reason: "Procedure structure validated" };
|
|
73
|
+
}
|
|
74
|
+
async function judgeFactDurability(candidates, config, localLlm, fallbackLlm, cache) {
|
|
75
|
+
const startMs = Date.now();
|
|
76
|
+
const verdicts = /* @__PURE__ */ new Map();
|
|
77
|
+
let cached = 0;
|
|
78
|
+
let judged = 0;
|
|
79
|
+
const verdictCache = cache ?? defaultVerdictCache;
|
|
80
|
+
if (candidates.length === 0) {
|
|
81
|
+
return { verdicts, cached, judged, elapsed: 0 };
|
|
82
|
+
}
|
|
83
|
+
const pendingIndices = [];
|
|
84
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
85
|
+
const c = candidates[i];
|
|
86
|
+
if (AUTO_APPROVE_CATEGORIES.has(c.category)) {
|
|
87
|
+
verdicts.set(i, {
|
|
88
|
+
durable: true,
|
|
89
|
+
reason: `Auto-approved: ${c.category} category bypasses judge`
|
|
90
|
+
});
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
if (c.importanceLevel === "critical") {
|
|
94
|
+
verdicts.set(i, {
|
|
95
|
+
durable: true,
|
|
96
|
+
reason: "Auto-approved: critical importance"
|
|
97
|
+
});
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
const key = cacheKey(c.text, c.category);
|
|
101
|
+
const cachedVerdict = verdictCache.get(key);
|
|
102
|
+
if (cachedVerdict) {
|
|
103
|
+
verdicts.set(i, cachedVerdict);
|
|
104
|
+
cached++;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
pendingIndices.push(i);
|
|
108
|
+
}
|
|
109
|
+
if (pendingIndices.length === 0) {
|
|
110
|
+
return { verdicts, cached, judged, elapsed: Date.now() - startMs };
|
|
111
|
+
}
|
|
112
|
+
const batchSize = config.extractionJudgeBatchSize;
|
|
113
|
+
for (let batchStart = 0; batchStart < pendingIndices.length; batchStart += batchSize) {
|
|
114
|
+
const batchIndices = pendingIndices.slice(batchStart, batchStart + batchSize);
|
|
115
|
+
const batchPayload = batchIndices.map((idx) => ({
|
|
116
|
+
index: idx,
|
|
117
|
+
text: candidates[idx].text,
|
|
118
|
+
category: candidates[idx].category,
|
|
119
|
+
confidence: candidates[idx].confidence
|
|
120
|
+
}));
|
|
121
|
+
const userPrompt = JSON.stringify(batchPayload);
|
|
122
|
+
try {
|
|
123
|
+
const llmResponse = await callJudgeLlm(
|
|
124
|
+
userPrompt,
|
|
125
|
+
config,
|
|
126
|
+
localLlm,
|
|
127
|
+
fallbackLlm
|
|
128
|
+
);
|
|
129
|
+
if (llmResponse) {
|
|
130
|
+
const parsed = parseJudgeResponse(llmResponse, batchIndices);
|
|
131
|
+
for (const [idx, verdict] of parsed.entries()) {
|
|
132
|
+
verdicts.set(idx, verdict);
|
|
133
|
+
judged++;
|
|
134
|
+
const c = candidates[idx];
|
|
135
|
+
verdictCache.set(cacheKey(c.text, c.category), verdict);
|
|
136
|
+
}
|
|
137
|
+
enforceMaxCacheSize(verdictCache);
|
|
138
|
+
}
|
|
139
|
+
} catch (err) {
|
|
140
|
+
log.warn(
|
|
141
|
+
`extraction-judge: LLM call failed, approving batch (fail-open): ${err instanceof Error ? err.message : String(err)}`
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
for (const idx of batchIndices) {
|
|
145
|
+
if (!verdicts.has(idx)) {
|
|
146
|
+
verdicts.set(idx, {
|
|
147
|
+
durable: true,
|
|
148
|
+
reason: "Approved by default (judge unavailable or parse error)"
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return { verdicts, cached, judged, elapsed: Date.now() - startMs };
|
|
154
|
+
}
|
|
155
|
+
async function callJudgeLlm(userPrompt, config, localLlm, fallbackLlm) {
|
|
156
|
+
const messages = [
|
|
157
|
+
{ role: "system", content: JUDGE_SYSTEM_PROMPT },
|
|
158
|
+
{ role: "user", content: userPrompt }
|
|
159
|
+
];
|
|
160
|
+
const modelOverride = config.extractionJudgeModel || void 0;
|
|
161
|
+
const skipLocal = config.modelSource === "gateway";
|
|
162
|
+
const agentId = config.modelSource === "gateway" ? config.gatewayAgentId || void 0 : void 0;
|
|
163
|
+
if (localLlm && !skipLocal) {
|
|
164
|
+
try {
|
|
165
|
+
const result = await localLlm.chatCompletion(messages, {
|
|
166
|
+
temperature: 0.1,
|
|
167
|
+
maxTokens: 2048,
|
|
168
|
+
responseFormat: { type: "json_object" },
|
|
169
|
+
timeoutMs: 1500,
|
|
170
|
+
operation: "extraction-judge",
|
|
171
|
+
...modelOverride ? { model: modelOverride } : {}
|
|
172
|
+
});
|
|
173
|
+
if (result?.content) {
|
|
174
|
+
return result.content;
|
|
175
|
+
}
|
|
176
|
+
} catch (err) {
|
|
177
|
+
log.debug(
|
|
178
|
+
`extraction-judge: local LLM failed, trying fallback: ${err instanceof Error ? err.message : String(err)}`
|
|
179
|
+
);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
if (fallbackLlm) {
|
|
183
|
+
try {
|
|
184
|
+
const result = await fallbackLlm.chatCompletion(
|
|
185
|
+
messages,
|
|
186
|
+
{
|
|
187
|
+
temperature: 0.1,
|
|
188
|
+
maxTokens: 2048,
|
|
189
|
+
timeoutMs: 1500,
|
|
190
|
+
...modelOverride ? { model: modelOverride } : {},
|
|
191
|
+
...agentId ? { agentId } : {}
|
|
192
|
+
}
|
|
193
|
+
);
|
|
194
|
+
if (result?.content) {
|
|
195
|
+
return result.content;
|
|
196
|
+
}
|
|
197
|
+
} catch (err) {
|
|
198
|
+
log.debug(
|
|
199
|
+
`extraction-judge: fallback LLM failed: ${err instanceof Error ? err.message : String(err)}`
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
function parseJudgeResponse(raw, expectedIndices) {
|
|
206
|
+
const result = /* @__PURE__ */ new Map();
|
|
207
|
+
const expectedSet = new Set(expectedIndices);
|
|
208
|
+
try {
|
|
209
|
+
let parsed;
|
|
210
|
+
try {
|
|
211
|
+
parsed = JSON.parse(raw);
|
|
212
|
+
} catch {
|
|
213
|
+
const candidates = extractJsonCandidates(raw);
|
|
214
|
+
if (candidates.length > 0) {
|
|
215
|
+
parsed = JSON.parse(candidates[0]);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
if (!Array.isArray(parsed)) {
|
|
219
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
220
|
+
const values = Object.values(parsed);
|
|
221
|
+
for (const v of values) {
|
|
222
|
+
if (Array.isArray(v)) {
|
|
223
|
+
parsed = v;
|
|
224
|
+
break;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
if (!Array.isArray(parsed)) {
|
|
229
|
+
log.debug("extraction-judge: response is not an array, cannot parse");
|
|
230
|
+
return result;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
for (const item of parsed) {
|
|
234
|
+
if (typeof item !== "object" || item === null || typeof item.index !== "number") {
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
const idx = item.index;
|
|
238
|
+
if (!expectedSet.has(idx)) continue;
|
|
239
|
+
const durable = typeof item.durable === "boolean" ? item.durable : true;
|
|
240
|
+
const reason = typeof item.reason === "string" ? item.reason.slice(0, 120) : "No reason provided";
|
|
241
|
+
result.set(idx, { durable, reason });
|
|
242
|
+
}
|
|
243
|
+
} catch (err) {
|
|
244
|
+
log.debug(
|
|
245
|
+
`extraction-judge: failed to parse response: ${err instanceof Error ? err.message : String(err)}`
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
return result;
|
|
249
|
+
}
|
|
250
|
+
function clearVerdictCache() {
|
|
251
|
+
defaultVerdictCache.clear();
|
|
252
|
+
}
|
|
253
|
+
function verdictCacheSize() {
|
|
254
|
+
return defaultVerdictCache.size;
|
|
255
|
+
}
|
|
256
|
+
function createVerdictCache() {
|
|
257
|
+
return /* @__PURE__ */ new Map();
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
export {
|
|
261
|
+
validateProcedureExtraction,
|
|
262
|
+
judgeFactDurability,
|
|
263
|
+
clearVerdictCache,
|
|
264
|
+
verdictCacheSize,
|
|
265
|
+
createVerdictCache
|
|
266
|
+
};
|
|
267
|
+
//# sourceMappingURL=chunk-LAYN4LDC.js.map
|