@psiclawops/hypermem 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (926) hide show
  1. package/ARCHITECTURE.md +2 -2
  2. package/CHANGELOG.md +10 -1
  3. package/docs/ROADMAP.md +12 -3
  4. package/memory-plugin/package.json +3 -3
  5. package/package.json +2 -1
  6. package/plugin/package.json +4 -4
  7. package/dist/adaptive-lifecycle.d.ts +0 -81
  8. package/dist/adaptive-lifecycle.d.ts.map +0 -1
  9. package/dist/adaptive-lifecycle.js +0 -190
  10. package/dist/background-indexer.d.ts +0 -161
  11. package/dist/background-indexer.d.ts.map +0 -1
  12. package/dist/background-indexer.js +0 -1263
  13. package/dist/budget-policy.d.ts +0 -22
  14. package/dist/budget-policy.d.ts.map +0 -1
  15. package/dist/budget-policy.js +0 -32
  16. package/dist/cache.d.ts +0 -148
  17. package/dist/cache.d.ts.map +0 -1
  18. package/dist/cache.js +0 -571
  19. package/dist/compaction-fence.d.ts +0 -97
  20. package/dist/compaction-fence.d.ts.map +0 -1
  21. package/dist/compaction-fence.js +0 -174
  22. package/dist/composition-snapshot-integrity.d.ts +0 -36
  23. package/dist/composition-snapshot-integrity.d.ts.map +0 -1
  24. package/dist/composition-snapshot-integrity.js +0 -131
  25. package/dist/composition-snapshot-runtime.d.ts +0 -59
  26. package/dist/composition-snapshot-runtime.d.ts.map +0 -1
  27. package/dist/composition-snapshot-runtime.js +0 -250
  28. package/dist/composition-snapshot-store.d.ts +0 -44
  29. package/dist/composition-snapshot-store.d.ts.map +0 -1
  30. package/dist/composition-snapshot-store.js +0 -117
  31. package/dist/compositor-utils.d.ts +0 -31
  32. package/dist/compositor-utils.d.ts.map +0 -1
  33. package/dist/compositor-utils.js +0 -47
  34. package/dist/compositor.d.ts +0 -518
  35. package/dist/compositor.d.ts.map +0 -1
  36. package/dist/compositor.js +0 -4280
  37. package/dist/content-hash.d.ts +0 -43
  38. package/dist/content-hash.d.ts.map +0 -1
  39. package/dist/content-hash.js +0 -75
  40. package/dist/content-type-classifier.d.ts +0 -41
  41. package/dist/content-type-classifier.d.ts.map +0 -1
  42. package/dist/content-type-classifier.js +0 -181
  43. package/dist/context-backfill.d.ts +0 -46
  44. package/dist/context-backfill.d.ts.map +0 -1
  45. package/dist/context-backfill.js +0 -113
  46. package/dist/context-store.d.ts +0 -131
  47. package/dist/context-store.d.ts.map +0 -1
  48. package/dist/context-store.js +0 -279
  49. package/dist/contradiction-audit-store.d.ts +0 -54
  50. package/dist/contradiction-audit-store.d.ts.map +0 -1
  51. package/dist/contradiction-audit-store.js +0 -88
  52. package/dist/contradiction-detector.d.ts +0 -78
  53. package/dist/contradiction-detector.d.ts.map +0 -1
  54. package/dist/contradiction-detector.js +0 -362
  55. package/dist/contradiction-resolution-policy.d.ts +0 -21
  56. package/dist/contradiction-resolution-policy.d.ts.map +0 -1
  57. package/dist/contradiction-resolution-policy.js +0 -17
  58. package/dist/cross-agent.d.ts +0 -74
  59. package/dist/cross-agent.d.ts.map +0 -1
  60. package/dist/cross-agent.js +0 -271
  61. package/dist/db.d.ts +0 -131
  62. package/dist/db.d.ts.map +0 -1
  63. package/dist/db.js +0 -410
  64. package/dist/degradation.d.ts +0 -102
  65. package/dist/degradation.d.ts.map +0 -1
  66. package/dist/degradation.js +0 -141
  67. package/dist/desired-state-store.d.ts +0 -100
  68. package/dist/desired-state-store.d.ts.map +0 -1
  69. package/dist/desired-state-store.js +0 -222
  70. package/dist/doc-chunk-store.d.ts +0 -159
  71. package/dist/doc-chunk-store.d.ts.map +0 -1
  72. package/dist/doc-chunk-store.js +0 -441
  73. package/dist/doc-chunker.d.ts +0 -99
  74. package/dist/doc-chunker.d.ts.map +0 -1
  75. package/dist/doc-chunker.js +0 -324
  76. package/dist/dreaming-promoter.d.ts +0 -124
  77. package/dist/dreaming-promoter.d.ts.map +0 -1
  78. package/dist/dreaming-promoter.js +0 -447
  79. package/dist/episode-store.d.ts +0 -49
  80. package/dist/episode-store.d.ts.map +0 -1
  81. package/dist/episode-store.js +0 -135
  82. package/dist/expertise-store.d.ts +0 -129
  83. package/dist/expertise-store.d.ts.map +0 -1
  84. package/dist/expertise-store.js +0 -342
  85. package/dist/fact-store.d.ts +0 -90
  86. package/dist/fact-store.d.ts.map +0 -1
  87. package/dist/fact-store.js +0 -283
  88. package/dist/fleet-store.d.ts +0 -144
  89. package/dist/fleet-store.d.ts.map +0 -1
  90. package/dist/fleet-store.js +0 -276
  91. package/dist/fos-mod.d.ts +0 -178
  92. package/dist/fos-mod.d.ts.map +0 -1
  93. package/dist/fos-mod.js +0 -416
  94. package/dist/hybrid-retrieval.d.ts +0 -102
  95. package/dist/hybrid-retrieval.d.ts.map +0 -1
  96. package/dist/hybrid-retrieval.js +0 -429
  97. package/dist/image-eviction.d.ts +0 -49
  98. package/dist/image-eviction.d.ts.map +0 -1
  99. package/dist/image-eviction.js +0 -251
  100. package/dist/index.d.ts +0 -727
  101. package/dist/index.d.ts.map +0 -1
  102. package/dist/index.js +0 -1495
  103. package/dist/keystone-scorer.d.ts +0 -51
  104. package/dist/keystone-scorer.d.ts.map +0 -1
  105. package/dist/keystone-scorer.js +0 -52
  106. package/dist/knowledge-graph.d.ts +0 -110
  107. package/dist/knowledge-graph.d.ts.map +0 -1
  108. package/dist/knowledge-graph.js +0 -305
  109. package/dist/knowledge-lint.d.ts +0 -31
  110. package/dist/knowledge-lint.d.ts.map +0 -1
  111. package/dist/knowledge-lint.js +0 -155
  112. package/dist/knowledge-store.d.ts +0 -75
  113. package/dist/knowledge-store.d.ts.map +0 -1
  114. package/dist/knowledge-store.js +0 -270
  115. package/dist/library-schema.d.ts +0 -31
  116. package/dist/library-schema.d.ts.map +0 -1
  117. package/dist/library-schema.js +0 -1357
  118. package/dist/message-store.d.ts +0 -181
  119. package/dist/message-store.d.ts.map +0 -1
  120. package/dist/message-store.js +0 -577
  121. package/dist/metrics-dashboard.d.ts +0 -131
  122. package/dist/metrics-dashboard.d.ts.map +0 -1
  123. package/dist/metrics-dashboard.js +0 -298
  124. package/dist/obsidian-exporter.d.ts +0 -57
  125. package/dist/obsidian-exporter.d.ts.map +0 -1
  126. package/dist/obsidian-exporter.js +0 -274
  127. package/dist/obsidian-watcher.d.ts +0 -147
  128. package/dist/obsidian-watcher.d.ts.map +0 -1
  129. package/dist/obsidian-watcher.js +0 -403
  130. package/dist/open-domain.d.ts +0 -46
  131. package/dist/open-domain.d.ts.map +0 -1
  132. package/dist/open-domain.js +0 -125
  133. package/dist/preference-store.d.ts +0 -54
  134. package/dist/preference-store.d.ts.map +0 -1
  135. package/dist/preference-store.js +0 -109
  136. package/dist/preservation-gate.d.ts +0 -82
  137. package/dist/preservation-gate.d.ts.map +0 -1
  138. package/dist/preservation-gate.js +0 -150
  139. package/dist/proactive-pass.d.ts +0 -63
  140. package/dist/proactive-pass.d.ts.map +0 -1
  141. package/dist/proactive-pass.js +0 -293
  142. package/dist/profiles.d.ts +0 -46
  143. package/dist/profiles.d.ts.map +0 -1
  144. package/dist/profiles.js +0 -262
  145. package/dist/provider-translator.d.ts +0 -50
  146. package/dist/provider-translator.d.ts.map +0 -1
  147. package/dist/provider-translator.js +0 -403
  148. package/dist/rate-limiter.d.ts +0 -76
  149. package/dist/rate-limiter.d.ts.map +0 -1
  150. package/dist/rate-limiter.js +0 -179
  151. package/dist/repair-tool-pairs.d.ts +0 -38
  152. package/dist/repair-tool-pairs.d.ts.map +0 -1
  153. package/dist/repair-tool-pairs.js +0 -209
  154. package/dist/replay-recovery.d.ts +0 -29
  155. package/dist/replay-recovery.d.ts.map +0 -1
  156. package/dist/replay-recovery.js +0 -82
  157. package/dist/reranker.d.ts +0 -95
  158. package/dist/reranker.d.ts.map +0 -1
  159. package/dist/reranker.js +0 -308
  160. package/dist/retrieval-policy.d.ts +0 -51
  161. package/dist/retrieval-policy.d.ts.map +0 -1
  162. package/dist/retrieval-policy.js +0 -77
  163. package/dist/schema.d.ts +0 -15
  164. package/dist/schema.d.ts.map +0 -1
  165. package/dist/schema.js +0 -327
  166. package/dist/secret-scanner.d.ts +0 -51
  167. package/dist/secret-scanner.d.ts.map +0 -1
  168. package/dist/secret-scanner.js +0 -248
  169. package/dist/seed.d.ts +0 -108
  170. package/dist/seed.d.ts.map +0 -1
  171. package/dist/seed.js +0 -179
  172. package/dist/session-flusher.d.ts +0 -53
  173. package/dist/session-flusher.d.ts.map +0 -1
  174. package/dist/session-flusher.js +0 -69
  175. package/dist/session-topic-map.d.ts +0 -41
  176. package/dist/session-topic-map.d.ts.map +0 -1
  177. package/dist/session-topic-map.js +0 -77
  178. package/dist/spawn-context.d.ts +0 -54
  179. package/dist/spawn-context.d.ts.map +0 -1
  180. package/dist/spawn-context.js +0 -159
  181. package/dist/system-store.d.ts +0 -73
  182. package/dist/system-store.d.ts.map +0 -1
  183. package/dist/system-store.js +0 -182
  184. package/dist/temporal-store.d.ts +0 -81
  185. package/dist/temporal-store.d.ts.map +0 -1
  186. package/dist/temporal-store.js +0 -149
  187. package/dist/tool-artifact-store.d.ts +0 -98
  188. package/dist/tool-artifact-store.d.ts.map +0 -1
  189. package/dist/tool-artifact-store.js +0 -244
  190. package/dist/topic-detector.d.ts +0 -35
  191. package/dist/topic-detector.d.ts.map +0 -1
  192. package/dist/topic-detector.js +0 -249
  193. package/dist/topic-store.d.ts +0 -51
  194. package/dist/topic-store.d.ts.map +0 -1
  195. package/dist/topic-store.js +0 -175
  196. package/dist/topic-synthesizer.d.ts +0 -71
  197. package/dist/topic-synthesizer.d.ts.map +0 -1
  198. package/dist/topic-synthesizer.js +0 -426
  199. package/dist/trigger-registry.d.ts +0 -63
  200. package/dist/trigger-registry.d.ts.map +0 -1
  201. package/dist/trigger-registry.js +0 -171
  202. package/dist/types.d.ts +0 -1085
  203. package/dist/types.d.ts.map +0 -1
  204. package/dist/types.js +0 -9
  205. package/dist/vector-store.d.ts +0 -180
  206. package/dist/vector-store.d.ts.map +0 -1
  207. package/dist/vector-store.js +0 -1041
  208. package/dist/version.d.ts +0 -34
  209. package/dist/version.d.ts.map +0 -1
  210. package/dist/version.js +0 -44
  211. package/dist/wiki-page-emitter.d.ts +0 -65
  212. package/dist/wiki-page-emitter.d.ts.map +0 -1
  213. package/dist/wiki-page-emitter.js +0 -258
  214. package/dist/work-store.d.ts +0 -112
  215. package/dist/work-store.d.ts.map +0 -1
  216. package/dist/work-store.js +0 -273
  217. package/memory-plugin/dist/index.d.ts +0 -24
  218. package/memory-plugin/dist/index.d.ts.map +0 -1
  219. package/memory-plugin/dist/index.js +0 -350
  220. package/memory-plugin/dist/index.js.map +0 -1
  221. package/node_modules/sqlite-vec/README.md +0 -1
  222. package/node_modules/sqlite-vec/index.cjs +0 -46
  223. package/node_modules/sqlite-vec/index.d.ts +0 -17
  224. package/node_modules/sqlite-vec/index.mjs +0 -47
  225. package/node_modules/sqlite-vec/package.json +0 -1
  226. package/node_modules/sqlite-vec-linux-x64/README.md +0 -1
  227. package/node_modules/sqlite-vec-linux-x64/package.json +0 -1
  228. package/node_modules/sqlite-vec-linux-x64/vec0.so +0 -0
  229. package/node_modules/zod/LICENSE +0 -21
  230. package/node_modules/zod/README.md +0 -208
  231. package/node_modules/zod/index.cjs +0 -33
  232. package/node_modules/zod/index.d.cts +0 -4
  233. package/node_modules/zod/index.d.ts +0 -4
  234. package/node_modules/zod/index.js +0 -4
  235. package/node_modules/zod/locales/index.cjs +0 -17
  236. package/node_modules/zod/locales/index.d.cts +0 -1
  237. package/node_modules/zod/locales/index.d.ts +0 -1
  238. package/node_modules/zod/locales/index.js +0 -1
  239. package/node_modules/zod/locales/package.json +0 -6
  240. package/node_modules/zod/mini/index.cjs +0 -32
  241. package/node_modules/zod/mini/index.d.cts +0 -3
  242. package/node_modules/zod/mini/index.d.ts +0 -3
  243. package/node_modules/zod/mini/index.js +0 -3
  244. package/node_modules/zod/mini/package.json +0 -6
  245. package/node_modules/zod/package.json +0 -135
  246. package/node_modules/zod/src/index.ts +0 -4
  247. package/node_modules/zod/src/locales/index.ts +0 -1
  248. package/node_modules/zod/src/mini/index.ts +0 -3
  249. package/node_modules/zod/src/v3/ZodError.ts +0 -330
  250. package/node_modules/zod/src/v3/benchmarks/datetime.ts +0 -58
  251. package/node_modules/zod/src/v3/benchmarks/discriminatedUnion.ts +0 -80
  252. package/node_modules/zod/src/v3/benchmarks/index.ts +0 -59
  253. package/node_modules/zod/src/v3/benchmarks/ipv4.ts +0 -57
  254. package/node_modules/zod/src/v3/benchmarks/object.ts +0 -69
  255. package/node_modules/zod/src/v3/benchmarks/primitives.ts +0 -162
  256. package/node_modules/zod/src/v3/benchmarks/realworld.ts +0 -63
  257. package/node_modules/zod/src/v3/benchmarks/string.ts +0 -55
  258. package/node_modules/zod/src/v3/benchmarks/union.ts +0 -80
  259. package/node_modules/zod/src/v3/errors.ts +0 -13
  260. package/node_modules/zod/src/v3/external.ts +0 -6
  261. package/node_modules/zod/src/v3/helpers/enumUtil.ts +0 -17
  262. package/node_modules/zod/src/v3/helpers/errorUtil.ts +0 -8
  263. package/node_modules/zod/src/v3/helpers/parseUtil.ts +0 -176
  264. package/node_modules/zod/src/v3/helpers/partialUtil.ts +0 -34
  265. package/node_modules/zod/src/v3/helpers/typeAliases.ts +0 -2
  266. package/node_modules/zod/src/v3/helpers/util.ts +0 -224
  267. package/node_modules/zod/src/v3/index.ts +0 -4
  268. package/node_modules/zod/src/v3/locales/en.ts +0 -124
  269. package/node_modules/zod/src/v3/standard-schema.ts +0 -113
  270. package/node_modules/zod/src/v3/tests/Mocker.ts +0 -54
  271. package/node_modules/zod/src/v3/tests/all-errors.test.ts +0 -157
  272. package/node_modules/zod/src/v3/tests/anyunknown.test.ts +0 -28
  273. package/node_modules/zod/src/v3/tests/array.test.ts +0 -71
  274. package/node_modules/zod/src/v3/tests/async-parsing.test.ts +0 -388
  275. package/node_modules/zod/src/v3/tests/async-refinements.test.ts +0 -46
  276. package/node_modules/zod/src/v3/tests/base.test.ts +0 -29
  277. package/node_modules/zod/src/v3/tests/bigint.test.ts +0 -55
  278. package/node_modules/zod/src/v3/tests/branded.test.ts +0 -53
  279. package/node_modules/zod/src/v3/tests/catch.test.ts +0 -220
  280. package/node_modules/zod/src/v3/tests/coerce.test.ts +0 -133
  281. package/node_modules/zod/src/v3/tests/complex.test.ts +0 -70
  282. package/node_modules/zod/src/v3/tests/custom.test.ts +0 -31
  283. package/node_modules/zod/src/v3/tests/date.test.ts +0 -32
  284. package/node_modules/zod/src/v3/tests/deepmasking.test.ts +0 -186
  285. package/node_modules/zod/src/v3/tests/default.test.ts +0 -112
  286. package/node_modules/zod/src/v3/tests/description.test.ts +0 -33
  287. package/node_modules/zod/src/v3/tests/discriminated-unions.test.ts +0 -315
  288. package/node_modules/zod/src/v3/tests/enum.test.ts +0 -80
  289. package/node_modules/zod/src/v3/tests/error.test.ts +0 -551
  290. package/node_modules/zod/src/v3/tests/firstparty.test.ts +0 -87
  291. package/node_modules/zod/src/v3/tests/firstpartyschematypes.test.ts +0 -21
  292. package/node_modules/zod/src/v3/tests/function.test.ts +0 -261
  293. package/node_modules/zod/src/v3/tests/generics.test.ts +0 -48
  294. package/node_modules/zod/src/v3/tests/instanceof.test.ts +0 -37
  295. package/node_modules/zod/src/v3/tests/intersection.test.ts +0 -110
  296. package/node_modules/zod/src/v3/tests/language-server.source.ts +0 -76
  297. package/node_modules/zod/src/v3/tests/language-server.test.ts +0 -207
  298. package/node_modules/zod/src/v3/tests/literal.test.ts +0 -36
  299. package/node_modules/zod/src/v3/tests/map.test.ts +0 -110
  300. package/node_modules/zod/src/v3/tests/masking.test.ts +0 -4
  301. package/node_modules/zod/src/v3/tests/mocker.test.ts +0 -19
  302. package/node_modules/zod/src/v3/tests/nan.test.ts +0 -24
  303. package/node_modules/zod/src/v3/tests/nativeEnum.test.ts +0 -87
  304. package/node_modules/zod/src/v3/tests/nullable.test.ts +0 -42
  305. package/node_modules/zod/src/v3/tests/number.test.ts +0 -176
  306. package/node_modules/zod/src/v3/tests/object-augmentation.test.ts +0 -29
  307. package/node_modules/zod/src/v3/tests/object-in-es5-env.test.ts +0 -29
  308. package/node_modules/zod/src/v3/tests/object.test.ts +0 -434
  309. package/node_modules/zod/src/v3/tests/optional.test.ts +0 -42
  310. package/node_modules/zod/src/v3/tests/parseUtil.test.ts +0 -23
  311. package/node_modules/zod/src/v3/tests/parser.test.ts +0 -41
  312. package/node_modules/zod/src/v3/tests/partials.test.ts +0 -243
  313. package/node_modules/zod/src/v3/tests/pickomit.test.ts +0 -111
  314. package/node_modules/zod/src/v3/tests/pipeline.test.ts +0 -29
  315. package/node_modules/zod/src/v3/tests/preprocess.test.ts +0 -186
  316. package/node_modules/zod/src/v3/tests/primitive.test.ts +0 -440
  317. package/node_modules/zod/src/v3/tests/promise.test.ts +0 -90
  318. package/node_modules/zod/src/v3/tests/readonly.test.ts +0 -194
  319. package/node_modules/zod/src/v3/tests/record.test.ts +0 -171
  320. package/node_modules/zod/src/v3/tests/recursive.test.ts +0 -197
  321. package/node_modules/zod/src/v3/tests/refine.test.ts +0 -313
  322. package/node_modules/zod/src/v3/tests/safeparse.test.ts +0 -27
  323. package/node_modules/zod/src/v3/tests/set.test.ts +0 -142
  324. package/node_modules/zod/src/v3/tests/standard-schema.test.ts +0 -83
  325. package/node_modules/zod/src/v3/tests/string.test.ts +0 -916
  326. package/node_modules/zod/src/v3/tests/transformer.test.ts +0 -233
  327. package/node_modules/zod/src/v3/tests/tuple.test.ts +0 -90
  328. package/node_modules/zod/src/v3/tests/unions.test.ts +0 -57
  329. package/node_modules/zod/src/v3/tests/validations.test.ts +0 -133
  330. package/node_modules/zod/src/v3/tests/void.test.ts +0 -15
  331. package/node_modules/zod/src/v3/types.ts +0 -5138
  332. package/node_modules/zod/src/v4/classic/checks.ts +0 -32
  333. package/node_modules/zod/src/v4/classic/coerce.ts +0 -27
  334. package/node_modules/zod/src/v4/classic/compat.ts +0 -70
  335. package/node_modules/zod/src/v4/classic/errors.ts +0 -82
  336. package/node_modules/zod/src/v4/classic/external.ts +0 -51
  337. package/node_modules/zod/src/v4/classic/from-json-schema.ts +0 -643
  338. package/node_modules/zod/src/v4/classic/index.ts +0 -5
  339. package/node_modules/zod/src/v4/classic/iso.ts +0 -90
  340. package/node_modules/zod/src/v4/classic/parse.ts +0 -82
  341. package/node_modules/zod/src/v4/classic/schemas.ts +0 -2409
  342. package/node_modules/zod/src/v4/classic/tests/anyunknown.test.ts +0 -26
  343. package/node_modules/zod/src/v4/classic/tests/apply.test.ts +0 -59
  344. package/node_modules/zod/src/v4/classic/tests/array.test.ts +0 -264
  345. package/node_modules/zod/src/v4/classic/tests/assignability.test.ts +0 -210
  346. package/node_modules/zod/src/v4/classic/tests/async-parsing.test.ts +0 -381
  347. package/node_modules/zod/src/v4/classic/tests/async-refinements.test.ts +0 -68
  348. package/node_modules/zod/src/v4/classic/tests/base.test.ts +0 -7
  349. package/node_modules/zod/src/v4/classic/tests/bigint.test.ts +0 -54
  350. package/node_modules/zod/src/v4/classic/tests/brand.test.ts +0 -106
  351. package/node_modules/zod/src/v4/classic/tests/catch.test.ts +0 -276
  352. package/node_modules/zod/src/v4/classic/tests/coalesce.test.ts +0 -20
  353. package/node_modules/zod/src/v4/classic/tests/codec-examples.test.ts +0 -573
  354. package/node_modules/zod/src/v4/classic/tests/codec.test.ts +0 -562
  355. package/node_modules/zod/src/v4/classic/tests/coerce.test.ts +0 -160
  356. package/node_modules/zod/src/v4/classic/tests/continuability.test.ts +0 -374
  357. package/node_modules/zod/src/v4/classic/tests/custom.test.ts +0 -40
  358. package/node_modules/zod/src/v4/classic/tests/date.test.ts +0 -62
  359. package/node_modules/zod/src/v4/classic/tests/datetime.test.ts +0 -302
  360. package/node_modules/zod/src/v4/classic/tests/default.test.ts +0 -365
  361. package/node_modules/zod/src/v4/classic/tests/describe-meta-checks.test.ts +0 -27
  362. package/node_modules/zod/src/v4/classic/tests/description.test.ts +0 -32
  363. package/node_modules/zod/src/v4/classic/tests/discriminated-unions.test.ts +0 -661
  364. package/node_modules/zod/src/v4/classic/tests/enum.test.ts +0 -285
  365. package/node_modules/zod/src/v4/classic/tests/error-utils.test.ts +0 -595
  366. package/node_modules/zod/src/v4/classic/tests/error.test.ts +0 -711
  367. package/node_modules/zod/src/v4/classic/tests/file.test.ts +0 -96
  368. package/node_modules/zod/src/v4/classic/tests/firstparty.test.ts +0 -179
  369. package/node_modules/zod/src/v4/classic/tests/fix-json-issue.test.ts +0 -26
  370. package/node_modules/zod/src/v4/classic/tests/from-json-schema.test.ts +0 -734
  371. package/node_modules/zod/src/v4/classic/tests/function.test.ts +0 -360
  372. package/node_modules/zod/src/v4/classic/tests/generics.test.ts +0 -72
  373. package/node_modules/zod/src/v4/classic/tests/hash.test.ts +0 -68
  374. package/node_modules/zod/src/v4/classic/tests/index.test.ts +0 -939
  375. package/node_modules/zod/src/v4/classic/tests/instanceof.test.ts +0 -60
  376. package/node_modules/zod/src/v4/classic/tests/intersection.test.ts +0 -198
  377. package/node_modules/zod/src/v4/classic/tests/json.test.ts +0 -109
  378. package/node_modules/zod/src/v4/classic/tests/lazy.test.ts +0 -227
  379. package/node_modules/zod/src/v4/classic/tests/literal.test.ts +0 -117
  380. package/node_modules/zod/src/v4/classic/tests/map.test.ts +0 -330
  381. package/node_modules/zod/src/v4/classic/tests/nan.test.ts +0 -21
  382. package/node_modules/zod/src/v4/classic/tests/nested-refine.test.ts +0 -168
  383. package/node_modules/zod/src/v4/classic/tests/nonoptional.test.ts +0 -101
  384. package/node_modules/zod/src/v4/classic/tests/nullable.test.ts +0 -22
  385. package/node_modules/zod/src/v4/classic/tests/number.test.ts +0 -270
  386. package/node_modules/zod/src/v4/classic/tests/object.test.ts +0 -640
  387. package/node_modules/zod/src/v4/classic/tests/optional.test.ts +0 -223
  388. package/node_modules/zod/src/v4/classic/tests/partial.test.ts +0 -427
  389. package/node_modules/zod/src/v4/classic/tests/pickomit.test.ts +0 -211
  390. package/node_modules/zod/src/v4/classic/tests/pipe.test.ts +0 -101
  391. package/node_modules/zod/src/v4/classic/tests/prefault.test.ts +0 -74
  392. package/node_modules/zod/src/v4/classic/tests/preprocess.test.ts +0 -282
  393. package/node_modules/zod/src/v4/classic/tests/primitive.test.ts +0 -175
  394. package/node_modules/zod/src/v4/classic/tests/promise.test.ts +0 -81
  395. package/node_modules/zod/src/v4/classic/tests/prototypes.test.ts +0 -23
  396. package/node_modules/zod/src/v4/classic/tests/readonly.test.ts +0 -252
  397. package/node_modules/zod/src/v4/classic/tests/record.test.ts +0 -632
  398. package/node_modules/zod/src/v4/classic/tests/recursive-types.test.ts +0 -582
  399. package/node_modules/zod/src/v4/classic/tests/refine.test.ts +0 -570
  400. package/node_modules/zod/src/v4/classic/tests/registries.test.ts +0 -243
  401. package/node_modules/zod/src/v4/classic/tests/set.test.ts +0 -181
  402. package/node_modules/zod/src/v4/classic/tests/standard-schema.test.ts +0 -134
  403. package/node_modules/zod/src/v4/classic/tests/string-formats.test.ts +0 -125
  404. package/node_modules/zod/src/v4/classic/tests/string.test.ts +0 -1175
  405. package/node_modules/zod/src/v4/classic/tests/stringbool.test.ts +0 -106
  406. package/node_modules/zod/src/v4/classic/tests/template-literal.test.ts +0 -771
  407. package/node_modules/zod/src/v4/classic/tests/to-json-schema-methods.test.ts +0 -438
  408. package/node_modules/zod/src/v4/classic/tests/to-json-schema.test.ts +0 -2990
  409. package/node_modules/zod/src/v4/classic/tests/transform.test.ts +0 -361
  410. package/node_modules/zod/src/v4/classic/tests/tuple.test.ts +0 -183
  411. package/node_modules/zod/src/v4/classic/tests/union.test.ts +0 -219
  412. package/node_modules/zod/src/v4/classic/tests/url.test.ts +0 -13
  413. package/node_modules/zod/src/v4/classic/tests/validations.test.ts +0 -283
  414. package/node_modules/zod/src/v4/classic/tests/void.test.ts +0 -12
  415. package/node_modules/zod/src/v4/core/api.ts +0 -1798
  416. package/node_modules/zod/src/v4/core/checks.ts +0 -1293
  417. package/node_modules/zod/src/v4/core/config.ts +0 -15
  418. package/node_modules/zod/src/v4/core/core.ts +0 -138
  419. package/node_modules/zod/src/v4/core/doc.ts +0 -44
  420. package/node_modules/zod/src/v4/core/errors.ts +0 -448
  421. package/node_modules/zod/src/v4/core/index.ts +0 -16
  422. package/node_modules/zod/src/v4/core/json-schema-generator.ts +0 -126
  423. package/node_modules/zod/src/v4/core/json-schema-processors.ts +0 -667
  424. package/node_modules/zod/src/v4/core/json-schema.ts +0 -147
  425. package/node_modules/zod/src/v4/core/parse.ts +0 -195
  426. package/node_modules/zod/src/v4/core/regexes.ts +0 -183
  427. package/node_modules/zod/src/v4/core/registries.ts +0 -105
  428. package/node_modules/zod/src/v4/core/schemas.ts +0 -4538
  429. package/node_modules/zod/src/v4/core/standard-schema.ts +0 -159
  430. package/node_modules/zod/src/v4/core/tests/extend.test.ts +0 -59
  431. package/node_modules/zod/src/v4/core/tests/index.test.ts +0 -46
  432. package/node_modules/zod/src/v4/core/tests/locales/be.test.ts +0 -124
  433. package/node_modules/zod/src/v4/core/tests/locales/en.test.ts +0 -22
  434. package/node_modules/zod/src/v4/core/tests/locales/es.test.ts +0 -181
  435. package/node_modules/zod/src/v4/core/tests/locales/he.test.ts +0 -379
  436. package/node_modules/zod/src/v4/core/tests/locales/nl.test.ts +0 -46
  437. package/node_modules/zod/src/v4/core/tests/locales/ru.test.ts +0 -128
  438. package/node_modules/zod/src/v4/core/tests/locales/tr.test.ts +0 -69
  439. package/node_modules/zod/src/v4/core/tests/locales/uz.test.ts +0 -83
  440. package/node_modules/zod/src/v4/core/tests/record-constructor.test.ts +0 -67
  441. package/node_modules/zod/src/v4/core/tests/recursive-tuples.test.ts +0 -45
  442. package/node_modules/zod/src/v4/core/to-json-schema.ts +0 -613
  443. package/node_modules/zod/src/v4/core/util.ts +0 -966
  444. package/node_modules/zod/src/v4/core/versions.ts +0 -5
  445. package/node_modules/zod/src/v4/core/zsf.ts +0 -323
  446. package/node_modules/zod/src/v4/index.ts +0 -4
  447. package/node_modules/zod/src/v4/locales/ar.ts +0 -115
  448. package/node_modules/zod/src/v4/locales/az.ts +0 -111
  449. package/node_modules/zod/src/v4/locales/be.ts +0 -176
  450. package/node_modules/zod/src/v4/locales/bg.ts +0 -128
  451. package/node_modules/zod/src/v4/locales/ca.ts +0 -116
  452. package/node_modules/zod/src/v4/locales/cs.ts +0 -118
  453. package/node_modules/zod/src/v4/locales/da.ts +0 -123
  454. package/node_modules/zod/src/v4/locales/de.ts +0 -116
  455. package/node_modules/zod/src/v4/locales/en.ts +0 -119
  456. package/node_modules/zod/src/v4/locales/eo.ts +0 -118
  457. package/node_modules/zod/src/v4/locales/es.ts +0 -141
  458. package/node_modules/zod/src/v4/locales/fa.ts +0 -126
  459. package/node_modules/zod/src/v4/locales/fi.ts +0 -121
  460. package/node_modules/zod/src/v4/locales/fr-CA.ts +0 -116
  461. package/node_modules/zod/src/v4/locales/fr.ts +0 -116
  462. package/node_modules/zod/src/v4/locales/he.ts +0 -246
  463. package/node_modules/zod/src/v4/locales/hu.ts +0 -117
  464. package/node_modules/zod/src/v4/locales/hy.ts +0 -164
  465. package/node_modules/zod/src/v4/locales/id.ts +0 -115
  466. package/node_modules/zod/src/v4/locales/index.ts +0 -49
  467. package/node_modules/zod/src/v4/locales/is.ts +0 -119
  468. package/node_modules/zod/src/v4/locales/it.ts +0 -116
  469. package/node_modules/zod/src/v4/locales/ja.ts +0 -114
  470. package/node_modules/zod/src/v4/locales/ka.ts +0 -123
  471. package/node_modules/zod/src/v4/locales/kh.ts +0 -7
  472. package/node_modules/zod/src/v4/locales/km.ts +0 -119
  473. package/node_modules/zod/src/v4/locales/ko.ts +0 -121
  474. package/node_modules/zod/src/v4/locales/lt.ts +0 -239
  475. package/node_modules/zod/src/v4/locales/mk.ts +0 -118
  476. package/node_modules/zod/src/v4/locales/ms.ts +0 -115
  477. package/node_modules/zod/src/v4/locales/nl.ts +0 -121
  478. package/node_modules/zod/src/v4/locales/no.ts +0 -116
  479. package/node_modules/zod/src/v4/locales/ota.ts +0 -117
  480. package/node_modules/zod/src/v4/locales/pl.ts +0 -118
  481. package/node_modules/zod/src/v4/locales/ps.ts +0 -126
  482. package/node_modules/zod/src/v4/locales/pt.ts +0 -116
  483. package/node_modules/zod/src/v4/locales/ru.ts +0 -176
  484. package/node_modules/zod/src/v4/locales/sl.ts +0 -118
  485. package/node_modules/zod/src/v4/locales/sv.ts +0 -119
  486. package/node_modules/zod/src/v4/locales/ta.ts +0 -118
  487. package/node_modules/zod/src/v4/locales/th.ts +0 -119
  488. package/node_modules/zod/src/v4/locales/tr.ts +0 -111
  489. package/node_modules/zod/src/v4/locales/ua.ts +0 -7
  490. package/node_modules/zod/src/v4/locales/uk.ts +0 -117
  491. package/node_modules/zod/src/v4/locales/ur.ts +0 -119
  492. package/node_modules/zod/src/v4/locales/uz.ts +0 -116
  493. package/node_modules/zod/src/v4/locales/vi.ts +0 -117
  494. package/node_modules/zod/src/v4/locales/yo.ts +0 -124
  495. package/node_modules/zod/src/v4/locales/zh-CN.ts +0 -116
  496. package/node_modules/zod/src/v4/locales/zh-TW.ts +0 -115
  497. package/node_modules/zod/src/v4/mini/checks.ts +0 -32
  498. package/node_modules/zod/src/v4/mini/coerce.ts +0 -27
  499. package/node_modules/zod/src/v4/mini/external.ts +0 -40
  500. package/node_modules/zod/src/v4/mini/index.ts +0 -3
  501. package/node_modules/zod/src/v4/mini/iso.ts +0 -66
  502. package/node_modules/zod/src/v4/mini/parse.ts +0 -14
  503. package/node_modules/zod/src/v4/mini/schemas.ts +0 -1916
  504. package/node_modules/zod/src/v4/mini/tests/apply.test.ts +0 -24
  505. package/node_modules/zod/src/v4/mini/tests/assignability.test.ts +0 -129
  506. package/node_modules/zod/src/v4/mini/tests/brand.test.ts +0 -94
  507. package/node_modules/zod/src/v4/mini/tests/checks.test.ts +0 -144
  508. package/node_modules/zod/src/v4/mini/tests/codec.test.ts +0 -529
  509. package/node_modules/zod/src/v4/mini/tests/computed.test.ts +0 -36
  510. package/node_modules/zod/src/v4/mini/tests/error.test.ts +0 -22
  511. package/node_modules/zod/src/v4/mini/tests/functions.test.ts +0 -5
  512. package/node_modules/zod/src/v4/mini/tests/index.test.ts +0 -963
  513. package/node_modules/zod/src/v4/mini/tests/number.test.ts +0 -95
  514. package/node_modules/zod/src/v4/mini/tests/object.test.ts +0 -227
  515. package/node_modules/zod/src/v4/mini/tests/prototypes.test.ts +0 -43
  516. package/node_modules/zod/src/v4/mini/tests/recursive-types.test.ts +0 -275
  517. package/node_modules/zod/src/v4/mini/tests/standard-schema.test.ts +0 -50
  518. package/node_modules/zod/src/v4/mini/tests/string.test.ts +0 -347
  519. package/node_modules/zod/src/v4-mini/index.ts +0 -3
  520. package/node_modules/zod/v3/ZodError.cjs +0 -138
  521. package/node_modules/zod/v3/ZodError.d.cts +0 -164
  522. package/node_modules/zod/v3/ZodError.d.ts +0 -164
  523. package/node_modules/zod/v3/ZodError.js +0 -133
  524. package/node_modules/zod/v3/errors.cjs +0 -17
  525. package/node_modules/zod/v3/errors.d.cts +0 -5
  526. package/node_modules/zod/v3/errors.d.ts +0 -5
  527. package/node_modules/zod/v3/errors.js +0 -9
  528. package/node_modules/zod/v3/external.cjs +0 -22
  529. package/node_modules/zod/v3/external.d.cts +0 -6
  530. package/node_modules/zod/v3/external.d.ts +0 -6
  531. package/node_modules/zod/v3/external.js +0 -6
  532. package/node_modules/zod/v3/helpers/enumUtil.cjs +0 -2
  533. package/node_modules/zod/v3/helpers/enumUtil.d.cts +0 -8
  534. package/node_modules/zod/v3/helpers/enumUtil.d.ts +0 -8
  535. package/node_modules/zod/v3/helpers/enumUtil.js +0 -1
  536. package/node_modules/zod/v3/helpers/errorUtil.cjs +0 -9
  537. package/node_modules/zod/v3/helpers/errorUtil.d.cts +0 -9
  538. package/node_modules/zod/v3/helpers/errorUtil.d.ts +0 -9
  539. package/node_modules/zod/v3/helpers/errorUtil.js +0 -6
  540. package/node_modules/zod/v3/helpers/parseUtil.cjs +0 -124
  541. package/node_modules/zod/v3/helpers/parseUtil.d.cts +0 -78
  542. package/node_modules/zod/v3/helpers/parseUtil.d.ts +0 -78
  543. package/node_modules/zod/v3/helpers/parseUtil.js +0 -109
  544. package/node_modules/zod/v3/helpers/partialUtil.cjs +0 -2
  545. package/node_modules/zod/v3/helpers/partialUtil.d.cts +0 -8
  546. package/node_modules/zod/v3/helpers/partialUtil.d.ts +0 -8
  547. package/node_modules/zod/v3/helpers/partialUtil.js +0 -1
  548. package/node_modules/zod/v3/helpers/typeAliases.cjs +0 -2
  549. package/node_modules/zod/v3/helpers/typeAliases.d.cts +0 -2
  550. package/node_modules/zod/v3/helpers/typeAliases.d.ts +0 -2
  551. package/node_modules/zod/v3/helpers/typeAliases.js +0 -1
  552. package/node_modules/zod/v3/helpers/util.cjs +0 -137
  553. package/node_modules/zod/v3/helpers/util.d.cts +0 -85
  554. package/node_modules/zod/v3/helpers/util.d.ts +0 -85
  555. package/node_modules/zod/v3/helpers/util.js +0 -133
  556. package/node_modules/zod/v3/index.cjs +0 -33
  557. package/node_modules/zod/v3/index.d.cts +0 -4
  558. package/node_modules/zod/v3/index.d.ts +0 -4
  559. package/node_modules/zod/v3/index.js +0 -4
  560. package/node_modules/zod/v3/locales/en.cjs +0 -112
  561. package/node_modules/zod/v3/locales/en.d.cts +0 -3
  562. package/node_modules/zod/v3/locales/en.d.ts +0 -3
  563. package/node_modules/zod/v3/locales/en.js +0 -109
  564. package/node_modules/zod/v3/package.json +0 -6
  565. package/node_modules/zod/v3/standard-schema.cjs +0 -2
  566. package/node_modules/zod/v3/standard-schema.d.cts +0 -102
  567. package/node_modules/zod/v3/standard-schema.d.ts +0 -102
  568. package/node_modules/zod/v3/standard-schema.js +0 -1
  569. package/node_modules/zod/v3/types.cjs +0 -3777
  570. package/node_modules/zod/v3/types.d.cts +0 -1034
  571. package/node_modules/zod/v3/types.d.ts +0 -1034
  572. package/node_modules/zod/v3/types.js +0 -3695
  573. package/node_modules/zod/v4/classic/checks.cjs +0 -33
  574. package/node_modules/zod/v4/classic/checks.d.cts +0 -1
  575. package/node_modules/zod/v4/classic/checks.d.ts +0 -1
  576. package/node_modules/zod/v4/classic/checks.js +0 -1
  577. package/node_modules/zod/v4/classic/coerce.cjs +0 -47
  578. package/node_modules/zod/v4/classic/coerce.d.cts +0 -17
  579. package/node_modules/zod/v4/classic/coerce.d.ts +0 -17
  580. package/node_modules/zod/v4/classic/coerce.js +0 -17
  581. package/node_modules/zod/v4/classic/compat.cjs +0 -61
  582. package/node_modules/zod/v4/classic/compat.d.cts +0 -50
  583. package/node_modules/zod/v4/classic/compat.d.ts +0 -50
  584. package/node_modules/zod/v4/classic/compat.js +0 -31
  585. package/node_modules/zod/v4/classic/errors.cjs +0 -74
  586. package/node_modules/zod/v4/classic/errors.d.cts +0 -30
  587. package/node_modules/zod/v4/classic/errors.d.ts +0 -30
  588. package/node_modules/zod/v4/classic/errors.js +0 -48
  589. package/node_modules/zod/v4/classic/external.cjs +0 -73
  590. package/node_modules/zod/v4/classic/external.d.cts +0 -15
  591. package/node_modules/zod/v4/classic/external.d.ts +0 -15
  592. package/node_modules/zod/v4/classic/external.js +0 -20
  593. package/node_modules/zod/v4/classic/from-json-schema.cjs +0 -610
  594. package/node_modules/zod/v4/classic/from-json-schema.d.cts +0 -12
  595. package/node_modules/zod/v4/classic/from-json-schema.d.ts +0 -12
  596. package/node_modules/zod/v4/classic/from-json-schema.js +0 -584
  597. package/node_modules/zod/v4/classic/index.cjs +0 -33
  598. package/node_modules/zod/v4/classic/index.d.cts +0 -4
  599. package/node_modules/zod/v4/classic/index.d.ts +0 -4
  600. package/node_modules/zod/v4/classic/index.js +0 -4
  601. package/node_modules/zod/v4/classic/iso.cjs +0 -60
  602. package/node_modules/zod/v4/classic/iso.d.cts +0 -22
  603. package/node_modules/zod/v4/classic/iso.d.ts +0 -22
  604. package/node_modules/zod/v4/classic/iso.js +0 -30
  605. package/node_modules/zod/v4/classic/package.json +0 -6
  606. package/node_modules/zod/v4/classic/parse.cjs +0 -41
  607. package/node_modules/zod/v4/classic/parse.d.cts +0 -31
  608. package/node_modules/zod/v4/classic/parse.d.ts +0 -31
  609. package/node_modules/zod/v4/classic/parse.js +0 -15
  610. package/node_modules/zod/v4/classic/schemas.cjs +0 -1272
  611. package/node_modules/zod/v4/classic/schemas.d.cts +0 -739
  612. package/node_modules/zod/v4/classic/schemas.d.ts +0 -739
  613. package/node_modules/zod/v4/classic/schemas.js +0 -1157
  614. package/node_modules/zod/v4/core/api.cjs +0 -1222
  615. package/node_modules/zod/v4/core/api.d.cts +0 -304
  616. package/node_modules/zod/v4/core/api.d.ts +0 -304
  617. package/node_modules/zod/v4/core/api.js +0 -1082
  618. package/node_modules/zod/v4/core/checks.cjs +0 -601
  619. package/node_modules/zod/v4/core/checks.d.cts +0 -278
  620. package/node_modules/zod/v4/core/checks.d.ts +0 -278
  621. package/node_modules/zod/v4/core/checks.js +0 -575
  622. package/node_modules/zod/v4/core/core.cjs +0 -83
  623. package/node_modules/zod/v4/core/core.d.cts +0 -70
  624. package/node_modules/zod/v4/core/core.d.ts +0 -70
  625. package/node_modules/zod/v4/core/core.js +0 -76
  626. package/node_modules/zod/v4/core/doc.cjs +0 -39
  627. package/node_modules/zod/v4/core/doc.d.cts +0 -14
  628. package/node_modules/zod/v4/core/doc.d.ts +0 -14
  629. package/node_modules/zod/v4/core/doc.js +0 -35
  630. package/node_modules/zod/v4/core/errors.cjs +0 -213
  631. package/node_modules/zod/v4/core/errors.d.cts +0 -220
  632. package/node_modules/zod/v4/core/errors.d.ts +0 -220
  633. package/node_modules/zod/v4/core/errors.js +0 -182
  634. package/node_modules/zod/v4/core/index.cjs +0 -47
  635. package/node_modules/zod/v4/core/index.d.cts +0 -16
  636. package/node_modules/zod/v4/core/index.d.ts +0 -16
  637. package/node_modules/zod/v4/core/index.js +0 -16
  638. package/node_modules/zod/v4/core/json-schema-generator.cjs +0 -99
  639. package/node_modules/zod/v4/core/json-schema-generator.d.cts +0 -65
  640. package/node_modules/zod/v4/core/json-schema-generator.d.ts +0 -65
  641. package/node_modules/zod/v4/core/json-schema-generator.js +0 -95
  642. package/node_modules/zod/v4/core/json-schema-processors.cjs +0 -648
  643. package/node_modules/zod/v4/core/json-schema-processors.d.cts +0 -49
  644. package/node_modules/zod/v4/core/json-schema-processors.d.ts +0 -49
  645. package/node_modules/zod/v4/core/json-schema-processors.js +0 -605
  646. package/node_modules/zod/v4/core/json-schema.cjs +0 -2
  647. package/node_modules/zod/v4/core/json-schema.d.cts +0 -88
  648. package/node_modules/zod/v4/core/json-schema.d.ts +0 -88
  649. package/node_modules/zod/v4/core/json-schema.js +0 -1
  650. package/node_modules/zod/v4/core/package.json +0 -6
  651. package/node_modules/zod/v4/core/parse.cjs +0 -131
  652. package/node_modules/zod/v4/core/parse.d.cts +0 -49
  653. package/node_modules/zod/v4/core/parse.d.ts +0 -49
  654. package/node_modules/zod/v4/core/parse.js +0 -93
  655. package/node_modules/zod/v4/core/regexes.cjs +0 -166
  656. package/node_modules/zod/v4/core/regexes.d.cts +0 -79
  657. package/node_modules/zod/v4/core/regexes.d.ts +0 -79
  658. package/node_modules/zod/v4/core/regexes.js +0 -133
  659. package/node_modules/zod/v4/core/registries.cjs +0 -56
  660. package/node_modules/zod/v4/core/registries.d.cts +0 -35
  661. package/node_modules/zod/v4/core/registries.d.ts +0 -35
  662. package/node_modules/zod/v4/core/registries.js +0 -51
  663. package/node_modules/zod/v4/core/schemas.cjs +0 -2124
  664. package/node_modules/zod/v4/core/schemas.d.cts +0 -1146
  665. package/node_modules/zod/v4/core/schemas.d.ts +0 -1146
  666. package/node_modules/zod/v4/core/schemas.js +0 -2093
  667. package/node_modules/zod/v4/core/standard-schema.cjs +0 -2
  668. package/node_modules/zod/v4/core/standard-schema.d.cts +0 -126
  669. package/node_modules/zod/v4/core/standard-schema.d.ts +0 -126
  670. package/node_modules/zod/v4/core/standard-schema.js +0 -1
  671. package/node_modules/zod/v4/core/to-json-schema.cjs +0 -446
  672. package/node_modules/zod/v4/core/to-json-schema.d.cts +0 -114
  673. package/node_modules/zod/v4/core/to-json-schema.d.ts +0 -114
  674. package/node_modules/zod/v4/core/to-json-schema.js +0 -437
  675. package/node_modules/zod/v4/core/util.cjs +0 -710
  676. package/node_modules/zod/v4/core/util.d.cts +0 -199
  677. package/node_modules/zod/v4/core/util.d.ts +0 -199
  678. package/node_modules/zod/v4/core/util.js +0 -651
  679. package/node_modules/zod/v4/core/versions.cjs +0 -8
  680. package/node_modules/zod/v4/core/versions.d.cts +0 -5
  681. package/node_modules/zod/v4/core/versions.d.ts +0 -5
  682. package/node_modules/zod/v4/core/versions.js +0 -5
  683. package/node_modules/zod/v4/index.cjs +0 -22
  684. package/node_modules/zod/v4/index.d.cts +0 -3
  685. package/node_modules/zod/v4/index.d.ts +0 -3
  686. package/node_modules/zod/v4/index.js +0 -3
  687. package/node_modules/zod/v4/locales/ar.cjs +0 -133
  688. package/node_modules/zod/v4/locales/ar.d.cts +0 -5
  689. package/node_modules/zod/v4/locales/ar.d.ts +0 -4
  690. package/node_modules/zod/v4/locales/ar.js +0 -106
  691. package/node_modules/zod/v4/locales/az.cjs +0 -132
  692. package/node_modules/zod/v4/locales/az.d.cts +0 -5
  693. package/node_modules/zod/v4/locales/az.d.ts +0 -4
  694. package/node_modules/zod/v4/locales/az.js +0 -105
  695. package/node_modules/zod/v4/locales/be.cjs +0 -183
  696. package/node_modules/zod/v4/locales/be.d.cts +0 -5
  697. package/node_modules/zod/v4/locales/be.d.ts +0 -4
  698. package/node_modules/zod/v4/locales/be.js +0 -156
  699. package/node_modules/zod/v4/locales/bg.cjs +0 -147
  700. package/node_modules/zod/v4/locales/bg.d.cts +0 -5
  701. package/node_modules/zod/v4/locales/bg.d.ts +0 -4
  702. package/node_modules/zod/v4/locales/bg.js +0 -120
  703. package/node_modules/zod/v4/locales/ca.cjs +0 -134
  704. package/node_modules/zod/v4/locales/ca.d.cts +0 -5
  705. package/node_modules/zod/v4/locales/ca.d.ts +0 -4
  706. package/node_modules/zod/v4/locales/ca.js +0 -107
  707. package/node_modules/zod/v4/locales/cs.cjs +0 -138
  708. package/node_modules/zod/v4/locales/cs.d.cts +0 -5
  709. package/node_modules/zod/v4/locales/cs.d.ts +0 -4
  710. package/node_modules/zod/v4/locales/cs.js +0 -111
  711. package/node_modules/zod/v4/locales/da.cjs +0 -142
  712. package/node_modules/zod/v4/locales/da.d.cts +0 -5
  713. package/node_modules/zod/v4/locales/da.d.ts +0 -4
  714. package/node_modules/zod/v4/locales/da.js +0 -115
  715. package/node_modules/zod/v4/locales/de.cjs +0 -135
  716. package/node_modules/zod/v4/locales/de.d.cts +0 -5
  717. package/node_modules/zod/v4/locales/de.d.ts +0 -4
  718. package/node_modules/zod/v4/locales/de.js +0 -108
  719. package/node_modules/zod/v4/locales/en.cjs +0 -136
  720. package/node_modules/zod/v4/locales/en.d.cts +0 -5
  721. package/node_modules/zod/v4/locales/en.d.ts +0 -4
  722. package/node_modules/zod/v4/locales/en.js +0 -109
  723. package/node_modules/zod/v4/locales/eo.cjs +0 -136
  724. package/node_modules/zod/v4/locales/eo.d.cts +0 -5
  725. package/node_modules/zod/v4/locales/eo.d.ts +0 -4
  726. package/node_modules/zod/v4/locales/eo.js +0 -109
  727. package/node_modules/zod/v4/locales/es.cjs +0 -159
  728. package/node_modules/zod/v4/locales/es.d.cts +0 -5
  729. package/node_modules/zod/v4/locales/es.d.ts +0 -4
  730. package/node_modules/zod/v4/locales/es.js +0 -132
  731. package/node_modules/zod/v4/locales/fa.cjs +0 -141
  732. package/node_modules/zod/v4/locales/fa.d.cts +0 -5
  733. package/node_modules/zod/v4/locales/fa.d.ts +0 -4
  734. package/node_modules/zod/v4/locales/fa.js +0 -114
  735. package/node_modules/zod/v4/locales/fi.cjs +0 -139
  736. package/node_modules/zod/v4/locales/fi.d.cts +0 -5
  737. package/node_modules/zod/v4/locales/fi.d.ts +0 -4
  738. package/node_modules/zod/v4/locales/fi.js +0 -112
  739. package/node_modules/zod/v4/locales/fr-CA.cjs +0 -134
  740. package/node_modules/zod/v4/locales/fr-CA.d.cts +0 -5
  741. package/node_modules/zod/v4/locales/fr-CA.d.ts +0 -4
  742. package/node_modules/zod/v4/locales/fr-CA.js +0 -107
  743. package/node_modules/zod/v4/locales/fr.cjs +0 -135
  744. package/node_modules/zod/v4/locales/fr.d.cts +0 -5
  745. package/node_modules/zod/v4/locales/fr.d.ts +0 -4
  746. package/node_modules/zod/v4/locales/fr.js +0 -108
  747. package/node_modules/zod/v4/locales/he.cjs +0 -241
  748. package/node_modules/zod/v4/locales/he.d.cts +0 -5
  749. package/node_modules/zod/v4/locales/he.d.ts +0 -4
  750. package/node_modules/zod/v4/locales/he.js +0 -214
  751. package/node_modules/zod/v4/locales/hu.cjs +0 -135
  752. package/node_modules/zod/v4/locales/hu.d.cts +0 -5
  753. package/node_modules/zod/v4/locales/hu.d.ts +0 -4
  754. package/node_modules/zod/v4/locales/hu.js +0 -108
  755. package/node_modules/zod/v4/locales/hy.cjs +0 -174
  756. package/node_modules/zod/v4/locales/hy.d.cts +0 -5
  757. package/node_modules/zod/v4/locales/hy.d.ts +0 -4
  758. package/node_modules/zod/v4/locales/hy.js +0 -147
  759. package/node_modules/zod/v4/locales/id.cjs +0 -133
  760. package/node_modules/zod/v4/locales/id.d.cts +0 -5
  761. package/node_modules/zod/v4/locales/id.d.ts +0 -4
  762. package/node_modules/zod/v4/locales/id.js +0 -106
  763. package/node_modules/zod/v4/locales/index.cjs +0 -104
  764. package/node_modules/zod/v4/locales/index.d.cts +0 -49
  765. package/node_modules/zod/v4/locales/index.d.ts +0 -49
  766. package/node_modules/zod/v4/locales/index.js +0 -49
  767. package/node_modules/zod/v4/locales/is.cjs +0 -136
  768. package/node_modules/zod/v4/locales/is.d.cts +0 -5
  769. package/node_modules/zod/v4/locales/is.d.ts +0 -4
  770. package/node_modules/zod/v4/locales/is.js +0 -109
  771. package/node_modules/zod/v4/locales/it.cjs +0 -135
  772. package/node_modules/zod/v4/locales/it.d.cts +0 -5
  773. package/node_modules/zod/v4/locales/it.d.ts +0 -4
  774. package/node_modules/zod/v4/locales/it.js +0 -108
  775. package/node_modules/zod/v4/locales/ja.cjs +0 -134
  776. package/node_modules/zod/v4/locales/ja.d.cts +0 -5
  777. package/node_modules/zod/v4/locales/ja.d.ts +0 -4
  778. package/node_modules/zod/v4/locales/ja.js +0 -107
  779. package/node_modules/zod/v4/locales/ka.cjs +0 -139
  780. package/node_modules/zod/v4/locales/ka.d.cts +0 -5
  781. package/node_modules/zod/v4/locales/ka.d.ts +0 -4
  782. package/node_modules/zod/v4/locales/ka.js +0 -112
  783. package/node_modules/zod/v4/locales/kh.cjs +0 -12
  784. package/node_modules/zod/v4/locales/kh.d.cts +0 -5
  785. package/node_modules/zod/v4/locales/kh.d.ts +0 -5
  786. package/node_modules/zod/v4/locales/kh.js +0 -5
  787. package/node_modules/zod/v4/locales/km.cjs +0 -137
  788. package/node_modules/zod/v4/locales/km.d.cts +0 -5
  789. package/node_modules/zod/v4/locales/km.d.ts +0 -4
  790. package/node_modules/zod/v4/locales/km.js +0 -110
  791. package/node_modules/zod/v4/locales/ko.cjs +0 -138
  792. package/node_modules/zod/v4/locales/ko.d.cts +0 -5
  793. package/node_modules/zod/v4/locales/ko.d.ts +0 -4
  794. package/node_modules/zod/v4/locales/ko.js +0 -111
  795. package/node_modules/zod/v4/locales/lt.cjs +0 -230
  796. package/node_modules/zod/v4/locales/lt.d.cts +0 -5
  797. package/node_modules/zod/v4/locales/lt.d.ts +0 -4
  798. package/node_modules/zod/v4/locales/lt.js +0 -203
  799. package/node_modules/zod/v4/locales/mk.cjs +0 -136
  800. package/node_modules/zod/v4/locales/mk.d.cts +0 -5
  801. package/node_modules/zod/v4/locales/mk.d.ts +0 -4
  802. package/node_modules/zod/v4/locales/mk.js +0 -109
  803. package/node_modules/zod/v4/locales/ms.cjs +0 -134
  804. package/node_modules/zod/v4/locales/ms.d.cts +0 -5
  805. package/node_modules/zod/v4/locales/ms.d.ts +0 -4
  806. package/node_modules/zod/v4/locales/ms.js +0 -107
  807. package/node_modules/zod/v4/locales/nl.cjs +0 -137
  808. package/node_modules/zod/v4/locales/nl.d.cts +0 -5
  809. package/node_modules/zod/v4/locales/nl.d.ts +0 -4
  810. package/node_modules/zod/v4/locales/nl.js +0 -110
  811. package/node_modules/zod/v4/locales/no.cjs +0 -135
  812. package/node_modules/zod/v4/locales/no.d.cts +0 -5
  813. package/node_modules/zod/v4/locales/no.d.ts +0 -4
  814. package/node_modules/zod/v4/locales/no.js +0 -108
  815. package/node_modules/zod/v4/locales/ota.cjs +0 -136
  816. package/node_modules/zod/v4/locales/ota.d.cts +0 -5
  817. package/node_modules/zod/v4/locales/ota.d.ts +0 -4
  818. package/node_modules/zod/v4/locales/ota.js +0 -109
  819. package/node_modules/zod/v4/locales/package.json +0 -6
  820. package/node_modules/zod/v4/locales/pl.cjs +0 -136
  821. package/node_modules/zod/v4/locales/pl.d.cts +0 -5
  822. package/node_modules/zod/v4/locales/pl.d.ts +0 -4
  823. package/node_modules/zod/v4/locales/pl.js +0 -109
  824. package/node_modules/zod/v4/locales/ps.cjs +0 -141
  825. package/node_modules/zod/v4/locales/ps.d.cts +0 -5
  826. package/node_modules/zod/v4/locales/ps.d.ts +0 -4
  827. package/node_modules/zod/v4/locales/ps.js +0 -114
  828. package/node_modules/zod/v4/locales/pt.cjs +0 -135
  829. package/node_modules/zod/v4/locales/pt.d.cts +0 -5
  830. package/node_modules/zod/v4/locales/pt.d.ts +0 -4
  831. package/node_modules/zod/v4/locales/pt.js +0 -108
  832. package/node_modules/zod/v4/locales/ru.cjs +0 -183
  833. package/node_modules/zod/v4/locales/ru.d.cts +0 -5
  834. package/node_modules/zod/v4/locales/ru.d.ts +0 -4
  835. package/node_modules/zod/v4/locales/ru.js +0 -156
  836. package/node_modules/zod/v4/locales/sl.cjs +0 -136
  837. package/node_modules/zod/v4/locales/sl.d.cts +0 -5
  838. package/node_modules/zod/v4/locales/sl.d.ts +0 -4
  839. package/node_modules/zod/v4/locales/sl.js +0 -109
  840. package/node_modules/zod/v4/locales/sv.cjs +0 -137
  841. package/node_modules/zod/v4/locales/sv.d.cts +0 -5
  842. package/node_modules/zod/v4/locales/sv.d.ts +0 -4
  843. package/node_modules/zod/v4/locales/sv.js +0 -110
  844. package/node_modules/zod/v4/locales/ta.cjs +0 -137
  845. package/node_modules/zod/v4/locales/ta.d.cts +0 -5
  846. package/node_modules/zod/v4/locales/ta.d.ts +0 -4
  847. package/node_modules/zod/v4/locales/ta.js +0 -110
  848. package/node_modules/zod/v4/locales/th.cjs +0 -137
  849. package/node_modules/zod/v4/locales/th.d.cts +0 -5
  850. package/node_modules/zod/v4/locales/th.d.ts +0 -4
  851. package/node_modules/zod/v4/locales/th.js +0 -110
  852. package/node_modules/zod/v4/locales/tr.cjs +0 -132
  853. package/node_modules/zod/v4/locales/tr.d.cts +0 -5
  854. package/node_modules/zod/v4/locales/tr.d.ts +0 -4
  855. package/node_modules/zod/v4/locales/tr.js +0 -105
  856. package/node_modules/zod/v4/locales/ua.cjs +0 -12
  857. package/node_modules/zod/v4/locales/ua.d.cts +0 -5
  858. package/node_modules/zod/v4/locales/ua.d.ts +0 -5
  859. package/node_modules/zod/v4/locales/ua.js +0 -5
  860. package/node_modules/zod/v4/locales/uk.cjs +0 -135
  861. package/node_modules/zod/v4/locales/uk.d.cts +0 -5
  862. package/node_modules/zod/v4/locales/uk.d.ts +0 -4
  863. package/node_modules/zod/v4/locales/uk.js +0 -108
  864. package/node_modules/zod/v4/locales/ur.cjs +0 -137
  865. package/node_modules/zod/v4/locales/ur.d.cts +0 -5
  866. package/node_modules/zod/v4/locales/ur.d.ts +0 -4
  867. package/node_modules/zod/v4/locales/ur.js +0 -110
  868. package/node_modules/zod/v4/locales/uz.cjs +0 -136
  869. package/node_modules/zod/v4/locales/uz.d.cts +0 -5
  870. package/node_modules/zod/v4/locales/uz.d.ts +0 -4
  871. package/node_modules/zod/v4/locales/uz.js +0 -109
  872. package/node_modules/zod/v4/locales/vi.cjs +0 -135
  873. package/node_modules/zod/v4/locales/vi.d.cts +0 -5
  874. package/node_modules/zod/v4/locales/vi.d.ts +0 -4
  875. package/node_modules/zod/v4/locales/vi.js +0 -108
  876. package/node_modules/zod/v4/locales/yo.cjs +0 -134
  877. package/node_modules/zod/v4/locales/yo.d.cts +0 -5
  878. package/node_modules/zod/v4/locales/yo.d.ts +0 -4
  879. package/node_modules/zod/v4/locales/yo.js +0 -107
  880. package/node_modules/zod/v4/locales/zh-CN.cjs +0 -136
  881. package/node_modules/zod/v4/locales/zh-CN.d.cts +0 -5
  882. package/node_modules/zod/v4/locales/zh-CN.d.ts +0 -4
  883. package/node_modules/zod/v4/locales/zh-CN.js +0 -109
  884. package/node_modules/zod/v4/locales/zh-TW.cjs +0 -134
  885. package/node_modules/zod/v4/locales/zh-TW.d.cts +0 -5
  886. package/node_modules/zod/v4/locales/zh-TW.d.ts +0 -4
  887. package/node_modules/zod/v4/locales/zh-TW.js +0 -107
  888. package/node_modules/zod/v4/mini/checks.cjs +0 -34
  889. package/node_modules/zod/v4/mini/checks.d.cts +0 -1
  890. package/node_modules/zod/v4/mini/checks.d.ts +0 -1
  891. package/node_modules/zod/v4/mini/checks.js +0 -1
  892. package/node_modules/zod/v4/mini/coerce.cjs +0 -52
  893. package/node_modules/zod/v4/mini/coerce.d.cts +0 -7
  894. package/node_modules/zod/v4/mini/coerce.d.ts +0 -7
  895. package/node_modules/zod/v4/mini/coerce.js +0 -22
  896. package/node_modules/zod/v4/mini/external.cjs +0 -63
  897. package/node_modules/zod/v4/mini/external.d.cts +0 -12
  898. package/node_modules/zod/v4/mini/external.d.ts +0 -12
  899. package/node_modules/zod/v4/mini/external.js +0 -14
  900. package/node_modules/zod/v4/mini/index.cjs +0 -32
  901. package/node_modules/zod/v4/mini/index.d.cts +0 -3
  902. package/node_modules/zod/v4/mini/index.d.ts +0 -3
  903. package/node_modules/zod/v4/mini/index.js +0 -3
  904. package/node_modules/zod/v4/mini/iso.cjs +0 -64
  905. package/node_modules/zod/v4/mini/iso.d.cts +0 -22
  906. package/node_modules/zod/v4/mini/iso.d.ts +0 -22
  907. package/node_modules/zod/v4/mini/iso.js +0 -34
  908. package/node_modules/zod/v4/mini/package.json +0 -6
  909. package/node_modules/zod/v4/mini/parse.cjs +0 -16
  910. package/node_modules/zod/v4/mini/parse.d.cts +0 -1
  911. package/node_modules/zod/v4/mini/parse.d.ts +0 -1
  912. package/node_modules/zod/v4/mini/parse.js +0 -1
  913. package/node_modules/zod/v4/mini/schemas.cjs +0 -1046
  914. package/node_modules/zod/v4/mini/schemas.d.cts +0 -427
  915. package/node_modules/zod/v4/mini/schemas.d.ts +0 -427
  916. package/node_modules/zod/v4/mini/schemas.js +0 -925
  917. package/node_modules/zod/v4/package.json +0 -6
  918. package/node_modules/zod/v4-mini/index.cjs +0 -32
  919. package/node_modules/zod/v4-mini/index.d.cts +0 -3
  920. package/node_modules/zod/v4-mini/index.d.ts +0 -3
  921. package/node_modules/zod/v4-mini/index.js +0 -3
  922. package/node_modules/zod/v4-mini/package.json +0 -6
  923. package/plugin/dist/index.d.ts +0 -208
  924. package/plugin/dist/index.d.ts.map +0 -1
  925. package/plugin/dist/index.js +0 -3474
  926. package/plugin/dist/index.js.map +0 -1
@@ -1,4280 +0,0 @@
1
- /**
2
- * hypermem Compositor
3
- *
4
- * Assembles context for LLM calls by orchestrating all four memory layers:
5
- * L1 Redis — hot session working memory (system, identity, recent msgs)
6
- * L2 Messages — conversation history from messages.db
7
- * L3 Vectors — semantic search across all indexed content
8
- * L4 Library — structured knowledge (facts, preferences, knowledge, episodes)
9
- *
10
- * Token-budgeted: never exceeds the budget, prioritizes by configured order.
11
- * Provider-neutral internally, translates at the output boundary.
12
- */
13
- import { createHash } from 'node:crypto';
14
- import { filterByScope } from './retrieval-policy.js';
15
- import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
16
- import { MessageStore } from './message-store.js';
17
- import { SessionTopicMap } from './session-topic-map.js';
18
- import { toProviderFormat, detectProvider as s4DetectProvider } from './provider-translator.js';
19
- import { DocChunkStore } from './doc-chunk-store.js';
20
- import { hybridSearch } from './hybrid-retrieval.js';
21
- import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence, getCompactionEligibility } from './compaction-fence.js';
22
- import { getActiveContext, getOrCreateActiveContext } from './context-store.js';
23
- import { rankKeystones, scoreKeystone } from './keystone-scorer.js';
24
- import { buildOrgRegistryFromDb, defaultOrgRegistry } from './cross-agent.js';
25
- import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOutputTier, buildActionVerificationSummary } from './fos-mod.js';
26
- import { KnowledgeStore } from './knowledge-store.js';
27
- import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
28
- import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
29
- import { TRIM_BUDGET_POLICY, resolveTrimBudgets } from './budget-policy.js';
30
- import { resolveAdaptiveLifecyclePolicy } from './adaptive-lifecycle.js';
31
- import { formatToolChainStub, parseToolChainStub, formatArtifactRef, isArtifactRef } from './degradation.js';
32
- import { ToolArtifactStore } from './tool-artifact-store.js';
33
- import { insertCompositionSnapshot, getLatestValidCompositionSnapshot, listCompositionSnapshots, MAX_WARM_RESTORE_REPAIR_DEPTH, } from './composition-snapshot-store.js';
34
- import { buildCompositionSnapshotSlots, restoreWarmSnapshotState, WARM_RESTORE_MEASUREMENT_GATES, } from './composition-snapshot-runtime.js';
35
- /**
36
- * Files that OpenClaw's contextInjection injects into the system prompt.
37
- * HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
38
- * Exported so plugin and other consumers can share the same dedup set.
39
- */
40
- export const OPENCLAW_BOOTSTRAP_FILES = new Set([
41
- 'SOUL.md', 'IDENTITY.md', 'USER.md', 'TOOLS.md',
42
- 'AGENTS.md', 'HEARTBEAT.md', 'MEMORY.md', 'BOOTSTRAP.md',
43
- ]);
44
- const CACHE_PREFIX_BOUNDARY_SLOT = 'cache-prefix-boundary';
45
- /**
46
- * Model context window sizes by provider/model string (or partial match).
47
- * Used as fallback when tokenBudget is not passed by the runtime.
48
- * Order matters: first match wins. Partial substring match on the model string.
49
- */
50
- const MODEL_CONTEXT_WINDOWS = [
51
- // Anthropic
52
- { pattern: 'claude-opus-4', tokens: 200_000 },
53
- { pattern: 'claude-sonnet-4', tokens: 200_000 },
54
- { pattern: 'claude-3-5', tokens: 200_000 },
55
- { pattern: 'claude-3-7', tokens: 200_000 },
56
- { pattern: 'claude', tokens: 200_000 },
57
- // OpenAI
58
- { pattern: 'gpt-5', tokens: 128_000 },
59
- { pattern: 'gpt-4o', tokens: 128_000 },
60
- { pattern: 'gpt-4', tokens: 128_000 },
61
- { pattern: 'o3', tokens: 128_000 },
62
- { pattern: 'o4', tokens: 128_000 },
63
- // Google
64
- { pattern: 'gemini-3.1-pro', tokens: 1_000_000 },
65
- { pattern: 'gemini-3.1-flash', tokens: 1_000_000 },
66
- { pattern: 'gemini-2.5-pro', tokens: 1_000_000 },
67
- { pattern: 'gemini-2', tokens: 1_000_000 },
68
- { pattern: 'gemini', tokens: 1_000_000 },
69
- // Zhipu / GLM
70
- { pattern: 'glm-5', tokens: 131_072 },
71
- { pattern: 'glm-4', tokens: 131_072 },
72
- // Alibaba / Qwen
73
- { pattern: 'qwen3', tokens: 262_144 },
74
- { pattern: 'qwen', tokens: 131_072 },
75
- // DeepSeek
76
- { pattern: 'deepseek-v3', tokens: 131_072 },
77
- { pattern: 'deepseek', tokens: 131_072 },
78
- ];
79
- const MODEL_MECW = [
80
- // Claude 200k: effective recall degrades above ~140k; clamp composite budget
81
- { pattern: 'claude', mecwFloor: 80_000, mecwCeiling: 140_000, preferredHistoryFraction: 0.35, preferredMemoryFraction: 0.45 },
82
- // Gemini 1M: reliable up to ~180k for grounded retrieval; less for recall
83
- { pattern: 'gemini', mecwFloor: 100_000, mecwCeiling: 180_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.45 },
84
- // OpenAI 128k: full window is trustable; use standard fractions
85
- { pattern: 'gpt', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
86
- { pattern: 'o3', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
87
- { pattern: 'o4', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
88
- // Smaller windows: full window is trustable
89
- { pattern: 'qwen3', mecwFloor: 262_144, mecwCeiling: 262_144, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
90
- { pattern: 'qwen', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
91
- { pattern: 'glm', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
92
- { pattern: 'deepseek', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
93
- ];
94
- /**
95
- * B4: Compute model-aware lane budget fractions.
96
- *
97
- * Resolves the effective historyFraction and memoryFraction for a compose pass
98
- * given the model and its effective budget. Uses the MECW catalog to blend
99
- * away from fixed fractions when the budget approaches the MECW ceiling,
100
- * so the compositor allocates proportionally for what the model can actually use.
101
- *
102
- * Returns:
103
- * historyFraction — fraction of effective budget to give history
104
- * memoryFraction — fraction of effective budget to give memory pool
105
- * mecwProfile — which MECW entry matched (undefined = no match / full window)
106
- * mecwApplied — true when MECW adjustment changed the fractions
107
- * mecwBlend — 0..1 blend factor (0 = below floor, 1 = at/above ceiling)
108
- */
109
- export function resolveModelLaneBudgets(model, effectiveBudget, configHistoryFraction, configMemoryFraction) {
110
- if (!model) {
111
- return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: undefined, mecwApplied: false, mecwBlend: 0 };
112
- }
113
- const normalized = model.toLowerCase();
114
- for (const entry of MODEL_MECW) {
115
- if (!normalized.includes(entry.pattern))
116
- continue;
117
- // Budget is at or below the floor — full window is safe, use config fractions
118
- if (effectiveBudget <= entry.mecwFloor) {
119
- return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: entry.pattern, mecwApplied: false, mecwBlend: 0 };
120
- }
121
- // Budget is at or above the ceiling — use preferred fractions fully
122
- if (effectiveBudget >= entry.mecwCeiling) {
123
- return { historyFraction: entry.preferredHistoryFraction, memoryFraction: entry.preferredMemoryFraction, mecwProfile: entry.pattern, mecwApplied: true, mecwBlend: 1 };
124
- }
125
- // Budget is between floor and ceiling — linear blend
126
- const blend = (effectiveBudget - entry.mecwFloor) / (entry.mecwCeiling - entry.mecwFloor);
127
- const historyFraction = configHistoryFraction + blend * (entry.preferredHistoryFraction - configHistoryFraction);
128
- const memoryFraction = configMemoryFraction + blend * (entry.preferredMemoryFraction - configMemoryFraction);
129
- return {
130
- historyFraction: Math.round(historyFraction * 1000) / 1000,
131
- memoryFraction: Math.round(memoryFraction * 1000) / 1000,
132
- mecwProfile: entry.pattern,
133
- mecwApplied: true,
134
- mecwBlend: Math.round(blend * 1000) / 1000,
135
- };
136
- }
137
- // No MECW entry matched — use config fractions unchanged
138
- return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: undefined, mecwApplied: false, mecwBlend: 0 };
139
- }
140
- /**
141
- * Resolve effective token budget from model string.
142
- * Returns the context window for the model, minus the configured reserve fraction
143
- * for output tokens and hypermem operational overhead.
144
- * Default reserve: 25% (leaves 75% for input context).
145
- * Falls back to defaultTokenBudget if no model match.
146
- */
147
- /**
148
- * Resolve effective input token budget for a model.
149
- *
150
- * Priority:
151
- * 1. If budgetFraction is set AND model window is detected: window × budgetFraction × (1 - reserve)
152
- * 2. If model window detected but no budgetFraction: window × (1 - reserve)
153
- * 3. Fallback to defaultTokenBudget (absolute number)
154
- */
155
- function resolveModelBudget(model, defaultBudget, reserve = 0.15, budgetFraction) {
156
- const window = resolveModelWindow(model, defaultBudget);
157
- // If we detected an actual model window (not the fallback derivation)
158
- if (model && budgetFraction != null) {
159
- const normalized = model.toLowerCase();
160
- for (const entry of MODEL_CONTEXT_WINDOWS) {
161
- if (normalized.includes(entry.pattern)) {
162
- return Math.floor(entry.tokens * budgetFraction * (1 - reserve));
163
- }
164
- }
165
- }
166
- // Original path: detected window × (1 - reserve), or absolute fallback
167
- if (!model)
168
- return defaultBudget;
169
- const normalized = model.toLowerCase();
170
- for (const entry of MODEL_CONTEXT_WINDOWS) {
171
- if (normalized.includes(entry.pattern)) {
172
- return Math.floor(entry.tokens * (1 - reserve));
173
- }
174
- }
175
- return defaultBudget;
176
- }
177
- /**
178
- * Resolve the raw context window size for a model (no reserve applied).
179
- * Used as totalWindow for dynamic reserve calculation.
180
- * Falls back to defaultBudget / 0.85 (reverse of 15% reserve default) if no match.
181
- */
182
- function resolveModelWindow(model, defaultBudget) {
183
- if (!model)
184
- return Math.floor(defaultBudget / 0.85);
185
- const normalized = model.toLowerCase();
186
- for (const entry of MODEL_CONTEXT_WINDOWS) {
187
- if (normalized.includes(entry.pattern)) {
188
- return entry.tokens;
189
- }
190
- }
191
- return Math.floor(defaultBudget / 0.85);
192
- }
193
- /**
194
- * Compute dynamic context window reserve based on recent turn cost.
195
- *
196
- * Reserve = clamp(avg_turn_cost × horizon / totalWindow, base, max)
197
- *
198
- * Returns the reserve fraction and diagnostics. When dynamic reserve
199
- * is clamped at max, sessionPressureHigh is set true so callers can
200
- * emit a warning or trigger checkpointing.
201
- */
202
- function computeDynamicReserve(recentMessages, totalWindow, config) {
203
- const base = config.reserveFraction ?? config.contextWindowReserve ?? 0.25;
204
- const horizon = config.dynamicReserveTurnHorizon ?? 5;
205
- const max = config.dynamicReserveMax ?? 0.50;
206
- const enabled = config.dynamicReserveEnabled ?? true;
207
- // Cold sessions (no message history) use a minimal floor so the full window
208
- // stays available. The static reserveFraction applies only once the session
209
- // has messages and dynamic sampling can compute a meaningful estimate.
210
- const COLD_SESSION_FLOOR = 0.15;
211
- if (!enabled || totalWindow <= 0) {
212
- return { reserve: COLD_SESSION_FLOOR, avgTurnCost: 0, dynamic: false, pressureHigh: false };
213
- }
214
- if (recentMessages.length === 0) {
215
- return { reserve: COLD_SESSION_FLOOR, avgTurnCost: 0, dynamic: false, pressureHigh: false };
216
- }
217
- // Sample the last 20 user+assistant messages for turn cost estimation.
218
- // Tool messages are excluded — they're already compressed by the gradient
219
- // and don't represent per-turn user intent cost.
220
- const sample = recentMessages
221
- .filter(m => m.role === 'user' || m.role === 'assistant')
222
- .slice(-20);
223
- if (sample.length === 0) {
224
- return { reserve: base, avgTurnCost: 0, dynamic: false, pressureHigh: false };
225
- }
226
- const totalCost = sample.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
227
- const avgTurnCost = Math.floor(totalCost / sample.length);
228
- const safetyTokens = avgTurnCost * horizon;
229
- const dynamicFrac = safetyTokens / totalWindow;
230
- if (dynamicFrac <= base) {
231
- return { reserve: base, avgTurnCost, dynamic: false, pressureHigh: false };
232
- }
233
- if (dynamicFrac >= max) {
234
- return { reserve: max, avgTurnCost, dynamic: true, pressureHigh: true };
235
- }
236
- return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
237
- }
238
- /**
239
- * Classify a session based on the ratio of tool messages in the recent sample.
240
- * 'tool-heavy': >= 20% of sampled messages carry tool calls or tool results.
241
- * 'plain-chat': below that threshold (text-only or occasional tool use).
242
- *
243
- * The 20% threshold is intentionally conservative: most tool-heavy agents
244
- * have tool messages on every assistant turn, so the ratio quickly exceeds
245
- * the threshold without false-positive risk for light tool users.
246
- */
247
- export function classifySessionType(messages) {
248
- if (messages.length === 0)
249
- return 'plain-chat';
250
- const toolCount = messages.filter(m => hasToolContent(m)).length;
251
- return toolCount / messages.length >= 0.20 ? 'tool-heavy' : 'plain-chat';
252
- }
253
- /**
254
- * Estimate the average token cost per message from a recent message sample.
255
- * Uses the same estimateMessageTokens heuristic as the compositor budget walk
256
- * so the returned depth is directly comparable to the historyFillCap check.
257
- *
258
- * Returns a conservative floor (100 tokens) when the sample is empty to avoid
259
- * returning Infinity when historyBudget is divided by density.
260
- */
261
- export function estimateObservedMsgDensity(messages) {
262
- if (messages.length === 0)
263
- return 100;
264
- const total = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
265
- return Math.max(1, Math.ceil(total / messages.length));
266
- }
267
- /**
268
- * Compute an adaptive history depth that pre-fits the session type.
269
- *
270
- * For plain-chat sessions: divides historyBudget by observed density to get a
271
- * depth that fills the budget without overflow, bounded by the default maximum.
272
- * Recall quality is preserved because the density estimate is honest for
273
- * text-only turns.
274
- *
275
- * For tool-heavy sessions: applies a post-gradient compression factor
276
- * (TOOL_GRADIENT_DENSITY_FACTOR = 0.30) to the observed pre-gradient density.
277
- * This accounts for the gradient transform collapsing large tool payloads to
278
- * prose stubs before the budget-fit walk runs. A tighter depth is chosen so
279
- * the gradient-compressed messages fit inside historyFillCap without triggering
280
- * a rescue trim.
281
- *
282
- * A 0.85 safety margin is applied to both paths so estimates that are
283
- * slightly off don't cause immediate overflow on the first warm compose.
284
- *
285
- * Min/max bounds ensure the compositor always sees a meaningful window:
286
- * - plain-chat min: 20 messages (enough for short recent context)
287
- * - tool-heavy min: 15 messages (recent tool context + a few prior turns)
288
- * - shared max: config.maxHistoryMessages (never exceed the DB fetch ceiling)
289
- */
290
- export function computeAdaptiveHistoryDepth(sessionType, observedDensity, historyBudgetTokens, maxHistoryMessages) {
291
- const SAFETY_MARGIN = 0.85;
292
- if (sessionType === 'tool-heavy') {
293
- // Tool-heavy: post-gradient density is much lower than pre-gradient.
294
- // Gradient tiers collapse T2/T3 payloads to compact stubs (15-30% of original).
295
- // Use a blended factor of 0.30 as the expected post-gradient density ratio.
296
- const TOOL_GRADIENT_DENSITY_FACTOR = 0.30;
297
- const postGradientDensity = Math.max(50, Math.floor(observedDensity * TOOL_GRADIENT_DENSITY_FACTOR));
298
- const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / postGradientDensity);
299
- return Math.min(maxHistoryMessages, Math.max(15, depth));
300
- }
301
- // Plain-chat: pre-gradient and post-gradient density are the same.
302
- // historyBudget / avgMsgCost gives the message count that fills the budget.
303
- const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / observedDensity);
304
- return Math.min(maxHistoryMessages, Math.max(20, depth));
305
- }
306
- // ─── Sprint 3: Unified Pressure Signal ───────────────────────────────────────────────────────
307
- /**
308
- * Canonical pressure labels shared across compose and compaction paths.
309
- * Use these constants when setting the `pressureSource` field so all consumers
310
- * can filter logs with a stable string without guessing spellings.
311
- */
312
- export const PRESSURE_SOURCE = {
313
- /** Compose path: pressure derived from (budget - remaining) after full slot assembly. */
314
- COMPOSE_POST_ASSEMBLY: 'compose:post-assembly',
315
- /** Compose path: pressure measured immediately before semantic recall runs. */
316
- COMPOSE_PRE_RECALL: 'compose:pre-recall',
317
- /** Compaction path: pressure from Redis token estimate / effectiveBudget. */
318
- COMPACT_REDIS_ESTIMATE: 'compact:redis-estimate',
319
- /** Compaction path: pressure from runtime-reported currentTokenCount / effectiveBudget. */
320
- COMPACT_RUNTIME_TOTAL: 'compact:runtime-total',
321
- /** Tool-loop assemble path: pressure from in-memory working message array / effectiveBudget. */
322
- TOOLLOOP_RUNTIME_ARRAY: 'toolloop:runtime-array',
323
- };
324
- /**
325
- * Compute a unified pressure fraction so compose and compaction paths report
326
- * the same numeric concept without drift.
327
- *
328
- * Always clamps to [0, Infinity) — callers get the raw fraction so they can
329
- * decide their own thresholds without us hardcoding them here.
330
- *
331
- * @param usedTokens Tokens consumed (numerator).
332
- * @param budgetTokens Effective budget (denominator). Must be > 0.
333
- * @param source Label from PRESSURE_SOURCE for telemetry (metadata only).
334
- * @returns { fraction, pct, source } where fraction = usedTokens / budgetTokens,
335
- * pct = Math.round(fraction * 100), source = canonical label.
336
- */
337
- export function computeUnifiedPressure(usedTokens, budgetTokens, source) {
338
- const fraction = budgetTokens > 0 ? usedTokens / budgetTokens : 0;
339
- const pct = Math.round(fraction * 100);
340
- return { fraction, pct, source };
341
- }
342
- /**
343
- * 0.9.0: adaptive lifecycle scales semantic-recall breadth in compose.
344
- *
345
- * Base fractions match the historical compositor constants so that a steady
346
- * (multiplier=1.0) call reproduces prior behavior exactly. Candidate limit is
347
- * clamped so even a critical-pressure pass keeps a usable retrieval window
348
- * and a /new surge does not blow up hybrid search cost.
349
- */
350
- export const RECALL_BREADTH_BASE = Object.freeze({
351
- mainBudgetFraction: 0.12,
352
- fallbackBudgetFraction: 0.10,
353
- candidateLimit: 10,
354
- candidateLimitMin: 6,
355
- candidateLimitMax: 16,
356
- });
357
- /**
358
- * Apply the adaptive lifecycle smartRecallMultiplier to recall breadth.
359
- * Pure helper — does not read state or mutate anything. Steady multiplier=1
360
- * preserves the historical (0.12, 0.10, limit=10) recall envelope.
361
- */
362
- export function scaleRecallBreadth(remainingTokens, multiplier) {
363
- const safeMultiplier = Number.isFinite(multiplier) && multiplier > 0 ? multiplier : 1;
364
- const remaining = Math.max(0, Math.floor(remainingTokens || 0));
365
- const mainBudgetTokens = Math.max(0, Math.floor(remaining * RECALL_BREADTH_BASE.mainBudgetFraction * safeMultiplier));
366
- const fallbackBudgetTokens = Math.max(0, Math.floor(remaining * RECALL_BREADTH_BASE.fallbackBudgetFraction * safeMultiplier));
367
- const limitRaw = Math.ceil(RECALL_BREADTH_BASE.candidateLimit * safeMultiplier);
368
- const candidateLimit = Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.max(RECALL_BREADTH_BASE.candidateLimitMin, limitRaw));
369
- return { mainBudgetTokens, fallbackBudgetTokens, candidateLimit, multiplier: safeMultiplier };
370
- }
371
- const DEFAULT_CONFIG = {
372
- // Primary budget controls
373
- budgetFraction: 0.703,
374
- reserveFraction: 0.25,
375
- historyFraction: 0.40,
376
- memoryFraction: 0.40,
377
- // Absolute fallback
378
- defaultTokenBudget: 90000,
379
- // History internals
380
- maxHistoryMessages: 250,
381
- warmHistoryBudgetFraction: 0.4,
382
- keystoneHistoryFraction: 0.2,
383
- keystoneMaxMessages: 15,
384
- keystoneMinSignificance: 0.5,
385
- // Memory internals
386
- maxFacts: 28,
387
- maxCrossSessionContext: 6000,
388
- // Tool gradient (internal)
389
- maxRecentToolPairs: 3,
390
- maxProseToolPairs: 10,
391
- // Dynamic reserve
392
- dynamicReserveTurnHorizon: 5,
393
- dynamicReserveMax: 0.50,
394
- dynamicReserveEnabled: true,
395
- };
396
- // Tool gradient thresholds — controls how aggressively tool results are
397
- // truncated as they age out of the recent window.
398
- // Recent-turn policy (2026-04-07): protect turn 0 + turn 1, budget against a
399
- // conservative 120k planning window, and only head+tail trim large (>40k)
400
- // recent results when projected occupancy crosses the orange zone.
401
- const TOOL_GRADIENT_T0_TURNS = 2; // current + 2 prior completed turns: full fidelity (matches OpenClaw keepLastAssistants: 3)
402
- const TOOL_GRADIENT_T1_TURNS = 4; // turns 2-4: moderate truncation (was 3)
403
- const TOOL_GRADIENT_T2_TURNS = 7; // turns 4-7: aggressive truncation (was 12)
404
- // T3 = turns 8+: one-liner stub
405
- const TOOL_GRADIENT_T1_CHAR_CAP = 6_000; // per-message cap (was 8k)
406
- const TOOL_GRADIENT_T1_TURN_CAP = 12_000; // per-turn-pair cap (was 16k)
407
- const TOOL_GRADIENT_T2_CHAR_CAP = 800; // per-message cap (was 1k)
408
- const TOOL_GRADIENT_T2_TURN_CAP = 3_000; // per-turn-pair cap (was 4k)
409
- const TOOL_GRADIENT_T3_CHAR_CAP = 150; // oldest tier: stub only (was 200)
410
- const TOOL_GRADIENT_T3_TURN_CAP = 800; // per-turn-pair cap (was 1k)
411
- const TOOL_GRADIENT_MAX_TAIL_CHARS = 3_000; // tail preserve budget for T1+
412
- const TOOL_GRADIENT_MIDDLE_MARKER = '\n[... tool output truncated ...]\n';
413
- const TOOL_PLANNING_BASELINE_WINDOW = 120_000;
414
- const TOOL_PLANNING_MIN_RESERVE_TOKENS = 24_000;
415
- const TOOL_PRESSURE_YELLOW = 0.75;
416
- const TOOL_PRESSURE_ORANGE = 0.80;
417
- const TOOL_PRESSURE_RED = 0.85;
418
- const TOOL_RECENT_OVERSIZE_CHAR_THRESHOLD = 40_000;
419
- const TOOL_RECENT_OVERSIZE_TARGET_CHARS = 40_000;
420
- const TOOL_RECENT_OVERSIZE_MAX_TAIL_CHARS = 12_000;
421
- const TOOL_TRIM_NOTE_PREFIX = '[hypermem_tool_result_trim';
422
- // ─── Trigger Registry ────────────────────────────────────────────
423
- // Moved to src/trigger-registry.ts (W5).
424
- // CollectionTrigger, DEFAULT_TRIGGERS, matchTriggers imported above.
425
- // Re-exported below for backward compatibility with existing consumers.
426
- export { DEFAULT_TRIGGERS, matchTriggers } from './trigger-registry.js';
427
- // ─── Test-only exports (not part of public API) ───────────────────────────
428
- // These are exported solely for unit testing. Do not use in production code.
429
- export { getTurnAge, applyToolGradient, appendToolSummary, truncateWithHeadTail, applyTierPayloadCap, evictLargeToolResults };
430
- function clusterNeutralMessages(messages) {
431
- const clusters = [];
432
- for (let i = 0; i < messages.length; i++) {
433
- const current = messages[i];
434
- const cluster = [current];
435
- if (current.toolCalls && current.toolCalls.length > 0) {
436
- const callIds = new Set(current.toolCalls.map(tc => tc.id).filter(Boolean));
437
- let j = i + 1;
438
- while (j < messages.length) {
439
- const candidate = messages[j];
440
- if (!candidate.toolResults || candidate.toolResults.length === 0)
441
- break;
442
- const resultIds = candidate.toolResults.map(tr => tr.callId).filter(Boolean);
443
- if (callIds.size > 0 && resultIds.length > 0 && !resultIds.some(id => callIds.has(id)))
444
- break;
445
- cluster.push(candidate);
446
- j++;
447
- }
448
- i = j - 1;
449
- }
450
- else if (current.toolResults && current.toolResults.length > 0) {
451
- let j = i + 1;
452
- while (j < messages.length) {
453
- const candidate = messages[j];
454
- if (!candidate.toolResults || candidate.toolResults.length === 0 || (candidate.toolCalls && candidate.toolCalls.length > 0))
455
- break;
456
- cluster.push(candidate);
457
- j++;
458
- }
459
- i = j - 1;
460
- }
461
- clusters.push({
462
- messages: cluster,
463
- tokenCost: cluster.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0),
464
- });
465
- }
466
- return clusters;
467
- }
468
- export function orderClustersForAdaptiveEviction(clusters, policy, opts = {}) {
469
- const plan = policy.evictionPlan;
470
- const protectedIndices = new Set();
471
- // Protect the most-recent user-role cluster (current-user-turn proxy when
472
- // the prompt is appended via history rather than as a separate message).
473
- for (let i = clusters.length - 1; i >= 0; i--) {
474
- if (clusters[i].messages.some(m => m.role === 'user')) {
475
- protectedIndices.add(i);
476
- break;
477
- }
478
- }
479
- // Protect dynamicBoundary clusters and pure-system clusters.
480
- for (let i = 0; i < clusters.length; i++) {
481
- const cluster = clusters[i];
482
- const hasDynamicBoundary = cluster.messages.some(m => {
483
- const meta = m.metadata;
484
- return meta?.dynamicBoundary === true;
485
- });
486
- if (hasDynamicBoundary)
487
- protectedIndices.add(i);
488
- if (cluster.messages.length > 0 && cluster.messages.every(m => m.role === 'system')) {
489
- protectedIndices.add(i);
490
- }
491
- }
492
- const totalMessages = clusters.reduce((sum, cluster) => sum + cluster.messages.length, 0);
493
- const stampedMessages = clusters.reduce((sum, cluster) => sum + cluster.messages.filter(m => typeof m.topicId === 'string').length, 0);
494
- const topicIdCoveragePct = totalMessages > 0
495
- ? Math.round((stampedMessages / totalMessages) * 10000) / 100
496
- : 0;
497
- const topicAwareDropOrder = [];
498
- const activeId = opts.activeTopicId;
499
- if (plan.preferTopicAwareDrop && activeId) {
500
- for (let i = 0; i < clusters.length; i++) {
501
- if (protectedIndices.has(i))
502
- continue;
503
- const cluster = clusters[i];
504
- // Tool clusters are handled by ballast reduction; skip from
505
- // topic-aware drop preference to keep tool chains atomic.
506
- const hasToolContent = cluster.messages.some(m => (m.toolCalls && m.toolCalls.length > 0)
507
- || (m.toolResults && m.toolResults.length > 0));
508
- if (hasToolContent)
509
- continue;
510
- // Inactive-topic predicate: every message in the cluster carries a
511
- // topicId distinct from the active topic. Messages without topicId
512
- // (legacy/unscoped) are not promoted to drop candidates so we don't
513
- // regress sessions that pre-date topic stamping.
514
- const tids = cluster.messages.map(m => m.topicId);
515
- if (tids.length === 0)
516
- continue;
517
- const allInactive = tids.every(tid => typeof tid === 'string' && tid !== activeId);
518
- if (allInactive)
519
- topicAwareDropOrder.push(i);
520
- }
521
- }
522
- let bypassReason;
523
- if (!activeId)
524
- bypassReason = 'no-active-topic';
525
- else if (stampedMessages === 0)
526
- bypassReason = 'no-stamped-clusters';
527
- else if (!plan.preferTopicAwareDrop)
528
- bypassReason = 'band-not-topic-aware';
529
- else if (topicAwareDropOrder.length === 0)
530
- bypassReason = 'no-eligible-inactive-topic-clusters';
531
- return {
532
- preferTopicAwareDrop: plan.preferTopicAwareDrop,
533
- topicAwareDropOrder,
534
- protectedIndices,
535
- telemetry: {
536
- topicAwareEligibleClusters: topicAwareDropOrder.length,
537
- topicAwareDroppedClusters: 0,
538
- protectedClusters: protectedIndices.size,
539
- topicIdCoveragePct,
540
- bypassReason,
541
- },
542
- };
543
- }
544
- /**
545
- * Public reshape helper: apply tool gradient then trim to fit within a token budget.
546
- *
547
- * Used by the plugin's budget-downshift pass to pre-process a Redis history window
548
- * after a model switch to a smaller context window, before the full compose pipeline
549
- * runs. Trims from oldest to newest until estimated token cost fits within
550
- * tokenBudget * 0.65 (using the standard char/4 heuristic).
551
- *
552
- * @param messages NeutralMessage array from the Redis hot window
553
- * @param tokenBudget Effective token budget for this session
554
- * @returns Trimmed message array ready for setWindow()
555
- */
556
- export function applyToolGradientToWindow(messages, tokenBudget, totalWindowTokens) {
557
- const reshaped = applyToolGradient(messages, { totalWindowTokens });
558
- const { softBudget: targetTokens } = resolveTrimBudgets(tokenBudget);
559
- const clusters = clusterNeutralMessages(reshaped);
560
- let totalTokens = clusters.reduce((sum, cluster) => sum + cluster.tokenCost, 0);
561
- let start = 0;
562
- // walk oldest to newest, drop until we fit
563
- while (totalTokens > targetTokens && start < clusters.length - 1) {
564
- totalTokens -= clusters[start].tokenCost;
565
- start++;
566
- }
567
- return clusters.slice(start).flatMap(cluster => cluster.messages);
568
- }
569
- /**
570
- * Canonical history must remain lossless for tool turns.
571
- *
572
- * If a window contains any structured tool calls or tool results, the caller
573
- * should treat applyToolGradientToWindow() as a view-only transform for the
574
- * current compose pass and avoid writing the reshaped messages back into the
575
- * canonical cache/history store.
576
- */
577
- export function canPersistReshapedHistory(messages) {
578
- return !messages.some(msg => hasToolContent(msg));
579
- }
580
- /**
581
- * Rough token estimation: ~4 chars per token for English text.
582
- * This is a heuristic — actual tokenization varies by model.
583
- * Good enough for budget management; exact count comes from the provider.
584
- */
585
- function estimateTokens(text) {
586
- if (!text)
587
- return 0;
588
- return Math.ceil(text.length / 4);
589
- }
590
- /**
591
- * Dense token estimation for tool content (JSON, code, base64).
592
- * Tool payloads are typically 2x denser than English prose.
593
- */
594
- function estimateToolTokens(text) {
595
- return Math.ceil(text.length / 2);
596
- }
597
- function estimateMessageTokens(msg) {
598
- let tokens = estimateTokens(msg.textContent);
599
- if (msg.toolCalls) {
600
- tokens += estimateToolTokens(JSON.stringify(msg.toolCalls)); // dense: /2 not /4
601
- }
602
- if (msg.toolResults) {
603
- tokens += estimateToolTokens(JSON.stringify(msg.toolResults)); // dense: /2 not /4
604
- }
605
- // Overhead per message (role, formatting)
606
- tokens += 4;
607
- return tokens;
608
- }
609
- function isDynamicBoundaryMessage(msg) {
610
- return Boolean(msg.metadata?.dynamicBoundary);
611
- }
612
- function getStablePrefixMessages(messages) {
613
- const prefix = [];
614
- for (const msg of messages) {
615
- if (msg.role !== 'system')
616
- break;
617
- if (isDynamicBoundaryMessage(msg))
618
- break;
619
- prefix.push(msg);
620
- }
621
- return prefix;
622
- }
623
- function computeStablePrefixHash(messages) {
624
- if (messages.length === 0)
625
- return undefined;
626
- const hash = createHash('sha256');
627
- for (const msg of messages) {
628
- hash.update(msg.textContent ?? '');
629
- hash.update('\n␞\n');
630
- }
631
- return hash.digest('hex');
632
- }
633
- function parseToolArgs(argumentsJson) {
634
- try {
635
- return JSON.parse(argumentsJson);
636
- }
637
- catch {
638
- return {};
639
- }
640
- }
641
- function toolLabelFromCall(name, args) {
642
- switch (name) {
643
- case 'read':
644
- return `read ${(args.path ?? args.file_path ?? args.filePath ?? 'file')}`;
645
- case 'write':
646
- return `write ${(args.path ?? args.file ?? args.filePath ?? 'file')}`;
647
- case 'edit':
648
- return `edit ${(args.path ?? args.file ?? args.filePath ?? 'file')}`;
649
- case 'exec':
650
- return `exec ${String(args.command ?? '').slice(0, 80) || 'command'}`;
651
- case 'web_search':
652
- return `web_search ${String(args.query ?? '').slice(0, 80) || 'query'}`;
653
- case 'web_fetch':
654
- return `web_fetch ${String(args.url ?? '').slice(0, 80) || 'url'}`;
655
- case 'sessions_send':
656
- return `sessions_send ${String(args.sessionKey ?? args.label ?? '').slice(0, 80) || 'target'}`;
657
- case 'memory_search':
658
- return `memory_search ${String(args.query ?? '').slice(0, 80) || 'query'}`;
659
- default:
660
- return name;
661
- }
662
- }
663
- /**
664
- * Strip OpenClaw's external-content security wrapper from tool results before truncation.
665
- * web_fetch results are wrapped in <<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT ... >>> blocks.
666
- * That preamble consumes the entire head budget in truncateWithHeadTail, leaving only
667
- * the security notice + last sentence visible — the actual body becomes the middle marker.
668
- * Strip the wrapper first so truncation operates on the real content.
669
- */
670
- function stripSecurityPreamble(content) {
671
- // Match: <<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT id="...">\n...\n<<<END_EXTERNAL_UNTRUSTED_CONTENT id="...">>>
672
- // Strip opening tag line and closing tag line; keep the content between.
673
- const stripped = content.replace(/^[\s\S]*?<<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT[^\n]*>>>?\n?/, '').replace(/\n?<<<END_EXTERNAL_UNTRUSTED_CONTENT[^\n]*>>>?[\s\S]*$/, '');
674
- // If stripping removed everything or nearly everything, return original.
675
- return stripped.trim().length > 20 ? stripped.trim() : content;
676
- }
677
- // Minimum floor: if trimming would leave less than 30% of original content, return a
678
- // stripped sentinel instead of a misleading fragment. A partial result that looks
679
- // complete is worse than a clear signal that the result was dropped.
680
- // Applied only in applyTierPayloadCap (pressure-driven trimming), not in structural
681
- // truncation paths where head+tail is always semantically useful.
682
- const TOOL_GRADIENT_MIN_USEFUL_FRACTION = 0.30;
683
- function truncateWithHeadTail(content, maxChars, maxTailChars = TOOL_GRADIENT_MAX_TAIL_CHARS) {
684
- if (content.length <= maxChars)
685
- return content;
686
- const tailBudget = Math.min(Math.floor(maxChars * 0.30), maxTailChars);
687
- const headBudget = Math.max(0, maxChars - tailBudget - TOOL_GRADIENT_MIDDLE_MARKER.length);
688
- return content.slice(0, headBudget) + TOOL_GRADIENT_MIDDLE_MARKER + content.slice(-tailBudget);
689
- }
690
- function truncateHead(content, maxChars) {
691
- if (content.length <= maxChars)
692
- return content;
693
- const marker = '…';
694
- const keep = Math.max(0, maxChars - marker.length);
695
- return content.slice(0, keep) + marker;
696
- }
697
- function firstNonEmptyLine(content) {
698
- const line = content.split('\n').find(l => l.trim().length > 0) ?? '';
699
- return line.trim();
700
- }
701
- function normalizeInline(text) {
702
- return text.replace(/\s+/g, ' ').trim();
703
- }
704
- function hostFromUrl(raw) {
705
- try {
706
- return new URL(raw).hostname;
707
- }
708
- catch {
709
- return raw;
710
- }
711
- }
712
- function extractTopHeading(content) {
713
- const heading = content.split('\n').find(line => /^#{1,3}\s+/.test(line.trim()));
714
- return heading ? heading.replace(/^#{1,3}\s+/, '').trim() : '';
715
- }
716
- function extractExitCode(content) {
717
- const match = content.match(/(?:exit code|exit|code)\s*[:=]?\s*(\d+)/i);
718
- return match ? match[1] : null;
719
- }
720
- function estimateSearchResultCount(content) {
721
- const jsonMatch = content.match(/"results"\s*:\s*\[/);
722
- if (jsonMatch) {
723
- const titles = content.match(/"title"\s*:/g);
724
- if (titles?.length)
725
- return titles.length;
726
- }
727
- const resultLines = content.match(/\bSource:\b|\bsiteName\b|\btitle\b/gi);
728
- return resultLines?.length ? Math.min(resultLines.length, 20) : null;
729
- }
730
- function summarizeOutcome(label, content, maxChars) {
731
- const firstLine = firstNonEmptyLine(content);
732
- const base = firstLine ? `${label} — ${firstLine}` : `${label} — ${content.length} chars`;
733
- return truncateHead(base, maxChars);
734
- }
735
- function summarizeToolInteraction(name, args, content, maxChars, compact = false) {
736
- const line = normalizeInline(firstNonEmptyLine(content));
737
- switch (name) {
738
- case 'read': {
739
- const path = String(args.path ?? args.file_path ?? args.filePath ?? 'file');
740
- const heading = extractTopHeading(content);
741
- const detail = heading || line || `${content.length} chars`;
742
- return truncateHead(`Read ${path} — ${detail}`, maxChars);
743
- }
744
- case 'exec': {
745
- const cmd = String(args.command ?? 'command').slice(0, compact ? 40 : 80);
746
- const exitCode = extractExitCode(content);
747
- const status = exitCode ? `exit ${exitCode}` : (/(error|failed|timeout|timed out)/i.test(content) ? 'failed' : 'completed');
748
- const detail = line && !/^exit\s+\d+$/i.test(line) ? `, ${line}` : '';
749
- return truncateHead(`Ran ${cmd} — ${status}${detail}`, maxChars);
750
- }
751
- case 'web_search': {
752
- const query = String(args.query ?? 'query').slice(0, compact ? 40 : 80);
753
- const count = estimateSearchResultCount(content);
754
- const heading = extractTopHeading(content);
755
- const detail = heading || line;
756
- const countText = count ? ` — ${count} results` : '';
757
- const summary = compact
758
- ? `Searched '${query}'${countText}`
759
- : `Searched '${query}'${countText}${detail ? `, top: ${detail}` : ''}`;
760
- return truncateHead(summary, maxChars);
761
- }
762
- case 'web_fetch': {
763
- const url = String(args.url ?? 'url');
764
- const host = hostFromUrl(url);
765
- const heading = extractTopHeading(content);
766
- const detail = heading || line || `${content.length} chars`;
767
- return truncateHead(`Fetched ${host} — ${detail}`, maxChars);
768
- }
769
- case 'memory_search': {
770
- const query = String(args.query ?? 'query').slice(0, compact ? 40 : 80);
771
- const count = estimateSearchResultCount(content);
772
- return truncateHead(`Searched memory for '${query}'${count ? ` — ${count} hits` : ''}${line ? `, top: ${line}` : ''}`, maxChars);
773
- }
774
- default: {
775
- const label = toolLabelFromCall(name, args);
776
- return compact
777
- ? truncateHead(`${label} — ${line || `${content.length} chars`}`, maxChars)
778
- : (() => {
779
- const prefix = `[${label}] `;
780
- const available = Math.max(40, maxChars - prefix.length);
781
- return prefix + truncateWithHeadTail(content, available);
782
- })();
783
- }
784
- }
785
- }
786
- function buildTier2Envelope(label, content, maxChars, name, args) {
787
- if (name && args)
788
- return summarizeToolInteraction(name, args, content, maxChars, false);
789
- const prefix = `[${label}] `;
790
- const available = Math.max(40, maxChars - prefix.length);
791
- return prefix + truncateWithHeadTail(content, available);
792
- }
793
- function buildTier3Envelope(label, content, maxChars, name, args) {
794
- if (name && args)
795
- return `[${summarizeToolInteraction(name, args, content, maxChars - 2, true)}]`;
796
- return `[${summarizeOutcome(label, content, maxChars - 2)}]`;
797
- }
798
- /**
799
- * Extract a heuristic prose summary from a tool call/result pair.
800
- * Used when tool payloads are removed but continuity should remain.
801
- */
802
- function extractToolProseSummary(msg, perResultCap, compact = false) {
803
- const parts = [];
804
- if (msg.toolCalls && msg.toolCalls.length > 0) {
805
- for (const tc of msg.toolCalls) {
806
- const args = parseToolArgs(tc.arguments);
807
- const label = toolLabelFromCall(tc.name, args);
808
- const resultContent = msg.toolResults?.find(r => r.callId === tc.id)?.content ?? '';
809
- if (resultContent) {
810
- parts.push(compact
811
- ? buildTier3Envelope(label, resultContent, perResultCap, tc.name, args)
812
- : buildTier2Envelope(label, resultContent, perResultCap, tc.name, args));
813
- }
814
- else {
815
- parts.push(compact ? `[${truncateHead(label, perResultCap - 2)}]` : label);
816
- }
817
- }
818
- }
819
- else if (msg.toolResults && msg.toolResults.length > 0) {
820
- for (const tr of msg.toolResults) {
821
- const label = tr.name || 'tool_result';
822
- const args = {};
823
- parts.push(compact
824
- ? buildTier3Envelope(label, tr.content ?? '', perResultCap, tr.name || 'tool_result', args)
825
- : buildTier2Envelope(label, tr.content ?? '', perResultCap, tr.name || 'tool_result', args));
826
- }
827
- }
828
- return truncateHead(parts.join('; '), Math.max(perResultCap, 120));
829
- }
830
- function appendToolSummary(textContent, summary) {
831
- const existing = textContent ?? '';
832
- if (!summary)
833
- return existing;
834
- return existing ? `${existing}\n[Tools: ${summary}]` : summary;
835
- }
836
- function getTurnAge(messages, index) {
837
- let turnAge = 0;
838
- for (let i = messages.length - 1; i > index; i--) {
839
- const candidate = messages[i];
840
- if (candidate?.role === 'user' && (!candidate.toolResults || candidate.toolResults.length === 0)) {
841
- turnAge++;
842
- }
843
- }
844
- return turnAge;
845
- }
846
- function hasToolContent(msg) {
847
- return Boolean((msg.toolCalls && msg.toolCalls.length > 0) || (msg.toolResults && msg.toolResults.length > 0));
848
- }
849
- function resolveToolPlanningWindow(totalWindowTokens) {
850
- const actualWindow = totalWindowTokens && totalWindowTokens > 0
851
- ? totalWindowTokens
852
- : TOOL_PLANNING_BASELINE_WINDOW;
853
- return Math.min(actualWindow, TOOL_PLANNING_BASELINE_WINDOW);
854
- }
855
- function computeToolPressureState(messages, totalWindowTokens) {
856
- const planningWindowTokens = resolveToolPlanningWindow(totalWindowTokens);
857
- const reserveTokens = Math.max(TOOL_PLANNING_MIN_RESERVE_TOKENS, Math.floor(planningWindowTokens * 0.10));
858
- const usedTokens = messages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
859
- const projectedTokens = usedTokens + reserveTokens;
860
- const occupancy = planningWindowTokens > 0 ? projectedTokens / planningWindowTokens : 1;
861
- let zone = 'green';
862
- if (occupancy > TOOL_PRESSURE_RED)
863
- zone = 'red';
864
- else if (occupancy > TOOL_PRESSURE_ORANGE)
865
- zone = 'orange';
866
- else if (occupancy > TOOL_PRESSURE_YELLOW)
867
- zone = 'yellow';
868
- return {
869
- planningWindowTokens,
870
- reserveTokens,
871
- projectedTokens,
872
- occupancy,
873
- zone,
874
- };
875
- }
876
- function isStructuredTrimNote(content) {
877
- return content.startsWith(TOOL_TRIM_NOTE_PREFIX);
878
- }
879
- function buildRecentTrimNote(originalChars, keptHeadChars, keptTailChars, pressure, resultId) {
880
- const parts = [
881
- TOOL_TRIM_NOTE_PREFIX,
882
- 'partial_result=true',
883
- 'reason=oversize_turn0_trim',
884
- `original_chars=${originalChars}`,
885
- `kept_head_chars=${keptHeadChars}`,
886
- `kept_tail_chars=${keptTailChars}`,
887
- `projected_occupancy_pct=${Math.round(pressure.occupancy * 100)}`,
888
- `planning_window_tokens=${pressure.planningWindowTokens}`,
889
- `reserve_tokens=${pressure.reserveTokens}`,
890
- 'retry_recommended=true',
891
- ];
892
- if (resultId)
893
- parts.push(`result_id=${resultId}`);
894
- parts.push(']');
895
- return parts.join(' ');
896
- }
897
- function countHeadTailChars(content) {
898
- const markerIdx = content.indexOf(TOOL_GRADIENT_MIDDLE_MARKER);
899
- if (markerIdx === -1) {
900
- return { headChars: content.length, tailChars: 0 };
901
- }
902
- return {
903
- headChars: markerIdx,
904
- tailChars: content.length - markerIdx - TOOL_GRADIENT_MIDDLE_MARKER.length,
905
- };
906
- }
907
- function trimRecentToolResult(content, pressure, resultId) {
908
- if (isStructuredTrimNote(content))
909
- return content;
910
- const stripped = stripSecurityPreamble(content);
911
- const baseOriginal = stripped.length > 0 ? stripped : content;
912
- const noteSkeleton = buildRecentTrimNote(baseOriginal.length, 0, 0, pressure, resultId);
913
- const availableChars = Math.max(2_000, TOOL_RECENT_OVERSIZE_TARGET_CHARS - noteSkeleton.length - 1);
914
- const truncated = truncateWithHeadTail(baseOriginal, availableChars, TOOL_RECENT_OVERSIZE_MAX_TAIL_CHARS);
915
- const { headChars, tailChars } = countHeadTailChars(truncated);
916
- const note = buildRecentTrimNote(baseOriginal.length, headChars, tailChars, pressure, resultId);
917
- return `${note}
918
- ${truncated}`;
919
- }
920
- function protectRecentToolContent(msg, pressure) {
921
- if (!msg.toolResults || msg.toolResults.length === 0)
922
- return msg;
923
- const shouldEmergencyTrim = pressure.zone === 'orange' || pressure.zone === 'red';
924
- const toolResults = msg.toolResults.map(result => {
925
- const content = result.content ?? '';
926
- if (!content)
927
- return result;
928
- if (!shouldEmergencyTrim)
929
- return result;
930
- if (content.length <= TOOL_RECENT_OVERSIZE_CHAR_THRESHOLD)
931
- return result;
932
- return {
933
- ...result,
934
- content: trimRecentToolResult(content, pressure, result.callId || result.name || undefined),
935
- };
936
- });
937
- return { ...msg, toolResults };
938
- }
939
- function applyTierPayloadCap(msg, perResultCap, perTurnCap, usedSoFar = 0, maxTailChars = TOOL_GRADIENT_MAX_TAIL_CHARS) {
940
- const toolResults = msg.toolResults?.map(result => {
941
- let content = result.content ?? '';
942
- if (content.length > perResultCap) {
943
- // Strip security preamble before truncation so it doesn't consume the head budget.
944
- // web_fetch results wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> blocks would otherwise
945
- // render the truncated result as: [security notice] + [middle marker] + [last line].
946
- const stripped = stripSecurityPreamble(content);
947
- // Floor check (TUNE-015): if the cap would leave less than 30% of the stripped content
948
- // AND less than 2000 chars absolute, return a sentinel instead of a misleading fragment.
949
- // Partial results that look complete are worse than a clear dropped-result signal.
950
- // The absolute floor prevents the sentinel from firing on large natural truncations
951
- // (e.g., 110k → 16k is a meaningful slice, not a misleading fragment).
952
- if (perResultCap < stripped.length * TOOL_GRADIENT_MIN_USEFUL_FRACTION && perResultCap < 2_000) {
953
- content = `[result too large for current context budget \u2014 ${stripped.length} chars stripped]`;
954
- }
955
- else {
956
- // Reserve space for the \n[trimmed] marker within the cap so the total
957
- // content length stays within perResultCap and doesn't overflow the
958
- // per-turn aggregate cap when multiple results are truncated.
959
- const TRIMMED_MARKER = '\n[trimmed]';
960
- content = truncateWithHeadTail(stripped, perResultCap - TRIMMED_MARKER.length, maxTailChars) + TRIMMED_MARKER;
961
- }
962
- }
963
- return { ...result, content };
964
- }) ?? null;
965
- let usedChars = usedSoFar + (toolResults?.reduce((sum, r) => sum + (r.content?.length ?? 0), 0) ?? 0);
966
- if (perTurnCap != null && usedChars > perTurnCap) {
967
- const downgradeSummary = extractToolProseSummary(msg, TOOL_GRADIENT_T2_CHAR_CAP, false);
968
- return {
969
- msg: {
970
- ...msg,
971
- textContent: appendToolSummary(msg.textContent, downgradeSummary),
972
- toolCalls: null,
973
- toolResults: null,
974
- },
975
- usedChars: usedSoFar + downgradeSummary.length,
976
- };
977
- }
978
- return {
979
- msg: { ...msg, toolResults },
980
- usedChars,
981
- };
982
- }
983
- /**
984
- * Evict tool results exceeding 800 tokens (~3200 chars) before the history
985
- * budget-fit loop. Large stale results waste budget; replace them with a
986
- * stub so consumers know the result existed and can re-run if needed.
987
- *
988
- * Applied to the already-gradient-processed history before window selection.
989
- * Does NOT affect turn 0 or turn 1.
990
- */
991
- const TOOL_RESULT_EVICTION_CHAR_THRESHOLD = 3_200; // ~800 tokens at 4 chars/token
992
- function evictLargeToolResults(messages) {
993
- return messages.map((msg, idx) => {
994
- // Never evict from the protected recent-turn window.
995
- const turnAge = getTurnAge(messages, idx);
996
- if (turnAge <= TOOL_GRADIENT_T0_TURNS)
997
- return msg;
998
- if (!msg.toolResults || msg.toolResults.length === 0)
999
- return msg;
1000
- const evicted = msg.toolResults.map(result => {
1001
- const content = result.content ?? '';
1002
- if (content.length <= TOOL_RESULT_EVICTION_CHAR_THRESHOLD)
1003
- return result;
1004
- const approxKTokens = Math.round(content.length / 4 / 1000);
1005
- return {
1006
- ...result,
1007
- content: formatToolChainStub({
1008
- name: result.name || 'tool_result',
1009
- id: result.callId || 'unknown',
1010
- status: 'ejected',
1011
- reason: 'eviction_oversize',
1012
- summary: `~${approxKTokens}k tokens, use memory_search or re-run if needed`,
1013
- }),
1014
- };
1015
- });
1016
- return { ...msg, toolResults: evicted };
1017
- });
1018
- }
1019
- // ─── C2: Oversized artifact handling ────────────────────────────────────────
1020
- /**
1021
- * C2: Resolve the artifact oversize threshold (in tokens) for the current compose pass.
1022
- *
1023
- * The threshold scales with the effective model budget from B4 so:
1024
- * - Small-window models (16k–32k effective) get a proportionally tighter threshold
1025
- * (threshold = budget × ARTIFACT_OVERSIZE_FRACTION, floor 500, ceiling 8000).
1026
- * - Large-window models (200k+) get a higher ceiling but it still stays bounded
1027
- * so artifacts never fill the lane unconditionally.
1028
- *
1029
- * ARTIFACT_BUDGET_FRACTION: fraction of the soft budget above which a single
1030
- * retrieved artifact/chunk is considered oversized. Default 0.10 (10%).
1031
- *
1032
- * Headroom preservation comes from replacing the oversized artifact with a cheap
1033
- * reference, not from shrinking the threshold itself.
1034
- */
1035
- const ARTIFACT_BUDGET_FRACTION = 0.10; // 10% of soft budget is the raw threshold
1036
- const ARTIFACT_THRESHOLD_FLOOR = 500; // never below 500 tokens (~2k chars)
1037
- const ARTIFACT_THRESHOLD_CEILING = 8_000; // never above 8k tokens (~32k chars)
1038
- export function resolveArtifactOversizeThreshold(effectiveBudget) {
1039
- const { softBudget } = resolveTrimBudgets(effectiveBudget);
1040
- const raw = Math.floor(softBudget * ARTIFACT_BUDGET_FRACTION);
1041
- return Math.min(ARTIFACT_THRESHOLD_CEILING, Math.max(ARTIFACT_THRESHOLD_FLOOR, raw));
1042
- }
1043
- function isExplicitNewSessionPrompt(prompt) {
1044
- return /^\/new(?:\s|$)/i.test((prompt ?? '').trim());
1045
- }
1046
- /**
1047
- * C2: Degrade an oversized doc chunk to a canonical ArtifactRef string.
1048
- *
1049
- * When a retrieved chunk's content exceeds the oversize threshold (in tokens),
1050
- * replace it with a fetchable canonical reference instead of injecting raw content.
1051
- * This preserves headroom in the lane instead of filling it with a large payload.
1052
- *
1053
- * Returns:
1054
- * - `null` → content is within the threshold; caller should inject as-is.
1055
- * - `string` → canonical artifact reference; caller should inject this instead of raw content.
1056
- *
1057
- * The sizeTokens reported in the reference is the ACTUAL estimated size so downstream
1058
- * tooling can make informed decisions about whether to fetch.
1059
- */
1060
- export function degradeOversizedDocChunk(chunkId, sourcePath, content, thresholdTokens) {
1061
- const contentTokens = estimateTokens(content);
1062
- if (contentTokens <= thresholdTokens)
1063
- return null;
1064
- const ref = {
1065
- id: chunkId,
1066
- path: sourcePath,
1067
- sizeTokens: contentTokens,
1068
- status: 'degraded',
1069
- reason: 'artifact_oversize',
1070
- fetchHint: 'memory_search or re-read source file',
1071
- };
1072
- return formatArtifactRef(ref);
1073
- }
1074
- /**
1075
- * C2: Resolve oversized artifacts in a history message array.
1076
- *
1077
- * Scans the message array and replaces user/assistant messages whose text content
1078
- * exceeds the model-aware artifact oversize threshold with canonical ArtifactRef
1079
- * strings. System messages, tool-call messages, and tool-result messages are always
1080
- * passed through unchanged.
1081
- *
1082
- * @param messages — neutral message array (already-assembled history window)
1083
- * @param effectiveBudget — effective model budget from B4 (drives the threshold)
1084
- * @returns { messages, refCount, tokensSaved }
1085
- */
1086
- export function resolveOversizedArtifacts(messages, effectiveBudget) {
1087
- const thresholdTokens = resolveArtifactOversizeThreshold(effectiveBudget);
1088
- let refCount = 0;
1089
- let tokensSaved = 0;
1090
- const out = messages.map(msg => {
1091
- // System messages are never degraded (they are in the stable prefix).
1092
- if (msg.role === 'system')
1093
- return msg;
1094
- // Tool content (calls/results) is C1's domain — never touch here.
1095
- if (msg.toolResults || msg.toolCalls)
1096
- return msg;
1097
- const text = msg.textContent ?? '';
1098
- // Already a ref — idempotent; don't re-degrade.
1099
- if (isArtifactRef(text))
1100
- return msg;
1101
- const contentTokens = estimateTokens(text);
1102
- if (contentTokens <= thresholdTokens)
1103
- return msg;
1104
- // Oversized — replace with canonical artifact reference.
1105
- const meta = msg;
1106
- const id = (typeof meta['_artifactId'] === 'string' ? meta['_artifactId'] : null)
1107
- ?? `msg-${createHash('sha1').update(`${msg.role}:${text}`).digest('hex').slice(0, 12)}`;
1108
- const path = (typeof meta['_artifactPath'] === 'string' ? meta['_artifactPath'] : null)
1109
- ?? '/unknown/artifact';
1110
- const ref = {
1111
- id,
1112
- path,
1113
- sizeTokens: contentTokens,
1114
- status: 'degraded',
1115
- reason: 'artifact_oversize',
1116
- fetchHint: 'memory_search',
1117
- };
1118
- const refText = formatArtifactRef(ref);
1119
- const refTokens = estimateTokens(refText);
1120
- tokensSaved += contentTokens - refTokens;
1121
- refCount++;
1122
- return { ...msg, textContent: refText };
1123
- });
1124
- return { messages: out, refCount, tokensSaved };
1125
- }
1126
- /**
1127
- * C1: Centralized tool-chain dependency ejection.
1128
- *
1129
- * Given a set of tool-use message indices that are being ejected from the
1130
- * context window, this function ensures that no orphaned tool-results survive:
1131
- *
1132
- * - For each ejected assistant message carrying toolCalls, collect the set
1133
- * of call IDs being removed.
1134
- * - Walk the remaining messages: if a message's toolResults reference any
1135
- * of those ejected IDs:
1136
- * a) If the message carries ONLY tool-results and no other text, co-eject
1137
- * it (remove it entirely). This is the zero-cost path.
1138
- * b) If the message also carries text content, replace only the dependent
1139
- * toolResults entries with canonical ToolChainStub strings so the
1140
- * message is not silently mutilated.
1141
- *
1142
- * The caller is responsible for removing the ejected messages by index BEFORE
1143
- * or AFTER calling this function; this function operates on the full array and
1144
- * marks the ejected indices for removal, returning the cleaned result.
1145
- *
1146
- * @param messages Full message array (order preserved)
1147
- * @param ejectIndices Set of indices into `messages` that are being ejected
1148
- * (these are the tool-use / assistant messages being removed).
1149
- * @param reason DegradationReason to embed in any canonical stubs.
1150
- * @returns Cleaned message array + telemetry counters.
1151
- */
1152
- export function resolveToolChainEjections(messages, ejectIndices, reason = 'eviction_oversize') {
1153
- // Collect all tool-call IDs that are being ejected.
1154
- const ejectedCallIds = new Set();
1155
- for (const idx of ejectIndices) {
1156
- const msg = messages[idx];
1157
- if (!msg)
1158
- continue;
1159
- if (msg.toolCalls) {
1160
- for (const tc of msg.toolCalls) {
1161
- if (tc.id)
1162
- ejectedCallIds.add(tc.id);
1163
- }
1164
- }
1165
- }
1166
- let coEjections = 0;
1167
- let stubReplacements = 0;
1168
- // If no call IDs were ejected, nothing to do beyond dropping the ejected messages.
1169
- if (ejectedCallIds.size === 0) {
1170
- const result = messages.filter((_, idx) => !ejectIndices.has(idx));
1171
- return { messages: result, coEjections, stubReplacements };
1172
- }
1173
- // Walk all messages and handle dependent tool-results.
1174
- const transformed = messages.map((msg, idx) => {
1175
- // Already being ejected — remove.
1176
- if (ejectIndices.has(idx))
1177
- return null;
1178
- if (!msg.toolResults || msg.toolResults.length === 0)
1179
- return msg;
1180
- // Determine which results in this message depend on ejected calls.
1181
- const dependentResultIds = msg.toolResults
1182
- .map(r => r.callId)
1183
- .filter((id) => Boolean(id) && ejectedCallIds.has(id));
1184
- if (dependentResultIds.length === 0)
1185
- return msg;
1186
- const dependentSet = new Set(dependentResultIds);
1187
- // Case (a): The message carries ONLY tool-results and no other text content,
1188
- // and ALL of its results are dependent on ejected calls.
1189
- // Co-eject the whole message — zero budget cost, no stub needed.
1190
- const hasText = Boolean(msg.textContent && msg.textContent.trim().length > 0);
1191
- const hasNonDependentResults = msg.toolResults.some(r => !dependentSet.has(r.callId));
1192
- if (!hasText && !hasNonDependentResults) {
1193
- coEjections++;
1194
- return null;
1195
- }
1196
- // Case (b): Message has text or unrelated results — stub only the dependent entries.
1197
- const stubbedResults = msg.toolResults.map(result => {
1198
- if (!result.callId || !dependentSet.has(result.callId))
1199
- return result;
1200
- const stubContent = formatToolChainStub({
1201
- name: result.name || 'tool_result',
1202
- id: result.callId || 'unknown',
1203
- status: 'ejected',
1204
- reason,
1205
- summary: 'parent tool-use ejected from context window',
1206
- });
1207
- stubReplacements++;
1208
- return { ...result, content: stubContent };
1209
- });
1210
- return { ...msg, toolResults: stubbedResults };
1211
- });
1212
- const result = transformed.filter((m) => m !== null);
1213
- return { messages: result, coEjections, stubReplacements };
1214
- }
1215
- /**
1216
- * Apply gradient tool treatment to a message array.
1217
- *
1218
- * Tiers are based on turn age, where turn age is the number of newer user
1219
- * messages after the current message.
1220
- */
1221
- function applyToolGradient(messages, opts) {
1222
- const result = [...messages];
1223
- const pressure = computeToolPressureState(messages, opts?.totalWindowTokens);
1224
- const perTurnUsage = new Map();
1225
- for (let i = result.length - 1; i >= 0; i--) {
1226
- const msg = result[i];
1227
- if (!hasToolContent(msg))
1228
- continue;
1229
- const turnAge = getTurnAge(result, i);
1230
- const usage = perTurnUsage.get(turnAge) ?? { t0: 0, t1: 0, t2: 0, t3: 0 };
1231
- if (turnAge <= TOOL_GRADIENT_T0_TURNS) {
1232
- // T0/T1: preserve full recent tool results unless we hit the conservative
1233
- // orange/red pressure zones and the payload itself is oversized (>40k).
1234
- result[i] = protectRecentToolContent(msg, pressure);
1235
- }
1236
- else if (turnAge <= TOOL_GRADIENT_T1_TURNS) {
1237
- const capped = applyTierPayloadCap(msg, TOOL_GRADIENT_T1_CHAR_CAP, TOOL_GRADIENT_T1_TURN_CAP, usage.t1);
1238
- usage.t1 = capped.usedChars;
1239
- result[i] = capped.msg;
1240
- }
1241
- else if (turnAge <= TOOL_GRADIENT_T2_TURNS) {
1242
- const summary = extractToolProseSummary(msg, TOOL_GRADIENT_T2_CHAR_CAP, false);
1243
- const allowed = Math.max(0, TOOL_GRADIENT_T2_TURN_CAP - usage.t2);
1244
- const boundedSummary = truncateHead(summary, Math.min(TOOL_GRADIENT_T2_CHAR_CAP, allowed || TOOL_GRADIENT_T3_CHAR_CAP));
1245
- usage.t2 += boundedSummary.length;
1246
- result[i] = {
1247
- ...msg,
1248
- textContent: appendToolSummary(msg.textContent, boundedSummary),
1249
- toolCalls: null,
1250
- toolResults: null,
1251
- };
1252
- }
1253
- else {
1254
- const summary = extractToolProseSummary(msg, TOOL_GRADIENT_T3_CHAR_CAP, true);
1255
- const allowed = Math.max(0, TOOL_GRADIENT_T3_TURN_CAP - usage.t3);
1256
- const boundedSummary = truncateHead(summary, Math.min(TOOL_GRADIENT_T3_CHAR_CAP, allowed || TOOL_GRADIENT_T3_CHAR_CAP));
1257
- usage.t3 += boundedSummary.length;
1258
- result[i] = {
1259
- ...msg,
1260
- textContent: appendToolSummary(msg.textContent, boundedSummary),
1261
- toolCalls: null,
1262
- toolResults: null,
1263
- };
1264
- }
1265
- perTurnUsage.set(turnAge, usage);
1266
- }
1267
- return result;
1268
- }
1269
- /** Guard: logRegistryStartup() fires only once per process, not per instance. */
1270
- let _registryLogged = false;
1271
- export class Compositor {
1272
- config;
1273
- cache;
1274
- vectorStore;
1275
- libraryDb;
1276
- triggerRegistry;
1277
- reranker;
1278
- rerankerMinCandidates;
1279
- rerankerMaxDocuments;
1280
- rerankerTopK;
1281
- /** Cached org registry loaded from fleet_agents at construction time. */
1282
- _orgRegistry;
1283
- constructor(deps, config) {
1284
- this.cache = deps.cache;
1285
- this.vectorStore = deps.vectorStore || null;
1286
- this.libraryDb = deps.libraryDb || null;
1287
- this.triggerRegistry = deps.triggerRegistry || DEFAULT_TRIGGERS;
1288
- this.reranker = deps.reranker ?? null;
1289
- this.rerankerMinCandidates = deps.rerankerMinCandidates ?? 2;
1290
- this.rerankerMaxDocuments = deps.rerankerMaxDocuments;
1291
- this.rerankerTopK = deps.rerankerTopK;
1292
- // Load org registry from DB on init; fall back to hardcoded if DB empty.
1293
- this._orgRegistry = this.libraryDb
1294
- ? buildOrgRegistryFromDb(this.libraryDb)
1295
- : defaultOrgRegistry();
1296
- this.config = { ...DEFAULT_CONFIG, ...config };
1297
- if (!_registryLogged) {
1298
- logRegistryStartup();
1299
- _registryLogged = true;
1300
- }
1301
- }
1302
- /**
1303
- * Set or replace the vector store after construction.
1304
- * Called by hypermem.create() once sqlite-vec is confirmed available.
1305
- */
1306
- setVectorStore(vs) {
1307
- this.vectorStore = vs;
1308
- }
1309
- /**
1310
- * Set or replace the reranker after construction.
1311
- * Called by hypermem.create() once the reranker config has been resolved.
1312
- */
1313
- setReranker(rr) {
1314
- this.reranker = rr;
1315
- }
1316
- /**
1317
- * Hot-reload the org registry from the fleet_agents table.
1318
- * Call after fleet membership changes (new agent, org restructure)
1319
- * to pick up the latest without a full restart.
1320
- * Falls back to the current cached registry if the DB is unavailable.
1321
- */
1322
- refreshOrgRegistry() {
1323
- if (this.libraryDb) {
1324
- this._orgRegistry = buildOrgRegistryFromDb(this.libraryDb);
1325
- }
1326
- return this._orgRegistry;
1327
- }
1328
- /**
1329
- * Return the currently cached org registry.
1330
- */
1331
- get orgRegistry() {
1332
- return this._orgRegistry;
1333
- }
1334
- /**
1335
- * Sprint 2.1: Hydrate tool-artifact stubs in the active turn.
1336
- *
1337
- * The active turn is the contiguous trailing block of tool-bearing messages
1338
- * at the tail of the assembled window (positional, NOT turn_id-based):
1339
- * - Walk backward from the last message
1340
- * - Collect tool-bearing messages (toolCalls != null OR toolResults != null)
1341
- * - Plus the bounding user message that opened the turn
1342
- * - Stop at the first plain message once at least one tool message was found
1343
- *
1344
- * For every toolResult stub with an `artifact=<id>` pointer, look up the
1345
- * full payload in ToolArtifactStore and replace the stub content in-place.
1346
- * Uses a single batched `WHERE id IN (...)` lookup (no N+1 queries).
1347
- * Touches `last_used_at` on every hydrated artifact in a single batch.
1348
- *
1349
- * Failure mode: if a lookup returns null (artifact missing), leave the stub
1350
- * unchanged and increment hydrationMisses.
1351
- *
1352
- * Returns diagnostics counters.
1353
- */
1354
- hydrateActiveTurnArtifacts(messages, db) {
1355
- if (messages.length === 0) {
1356
- return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
1357
- }
1358
- const store = new ToolArtifactStore(db);
1359
- // ── 1. Detect active turn (positional, backward walk) ─────────────────────
1360
- // Collect indices belonging to the active turn.
1361
- const activeTurnIndices = [];
1362
- let foundToolBearing = false;
1363
- for (let i = messages.length - 1; i >= 0; i--) {
1364
- const msg = messages[i];
1365
- const isToolBearing = msg.toolCalls != null || msg.toolResults != null;
1366
- if (isToolBearing) {
1367
- foundToolBearing = true;
1368
- activeTurnIndices.push(i);
1369
- }
1370
- else if (foundToolBearing) {
1371
- // First plain message after at least one tool-bearing message — this
1372
- // is the bounding user message that opened the turn. Include it and stop.
1373
- activeTurnIndices.push(i);
1374
- break;
1375
- }
1376
- else {
1377
- // Haven't found any tool-bearing messages yet — still in non-tool tail
1378
- // (e.g., the last message is a plain user message). No active turn.
1379
- break;
1380
- }
1381
- }
1382
- if (activeTurnIndices.length === 0 || !foundToolBearing) {
1383
- return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
1384
- }
1385
- // ── 2. Collect all artifactIds from stub toolResults in the active turn ───
1386
- // Map: artifactId -> array of [msgIndex, resultIndex] for in-place replacement
1387
- const artifactTargets = new Map();
1388
- for (const msgIdx of activeTurnIndices) {
1389
- const msg = messages[msgIdx];
1390
- if (!msg.toolResults)
1391
- continue;
1392
- for (let resultIdx = 0; resultIdx < msg.toolResults.length; resultIdx++) {
1393
- const result = msg.toolResults[resultIdx];
1394
- const stub = parseToolChainStub(result.content);
1395
- if (stub && stub.artifactId) {
1396
- const existing = artifactTargets.get(stub.artifactId) ?? [];
1397
- existing.push({ msgIdx, resultIdx });
1398
- artifactTargets.set(stub.artifactId, existing);
1399
- }
1400
- }
1401
- }
1402
- if (artifactTargets.size === 0) {
1403
- return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
1404
- }
1405
- // ── 3. Batch lookup ────────────────────────────────────────────────────────
1406
- const ids = Array.from(artifactTargets.keys());
1407
- const placeholders = ids.map(() => '?').join(', ');
1408
- const rows = db
1409
- .prepare(`SELECT * FROM tool_artifacts WHERE id IN (${placeholders})`)
1410
- .all(...ids);
1411
- // Build id -> payload map
1412
- const payloadMap = new Map();
1413
- for (const row of rows) {
1414
- payloadMap.set(row.id, row.payload);
1415
- }
1416
- // ── 4. Hydrate in-place ────────────────────────────────────────────────────
1417
- let artifactsHydrated = 0;
1418
- let hydrationBytes = 0;
1419
- let hydrationMisses = 0;
1420
- const touchIds = [];
1421
- for (const [artifactId, targets] of artifactTargets) {
1422
- const payload = payloadMap.get(artifactId);
1423
- if (payload == null) {
1424
- // Graceful miss — stub stays as-is
1425
- hydrationMisses += targets.length;
1426
- continue;
1427
- }
1428
- for (const { msgIdx, resultIdx } of targets) {
1429
- const msg = messages[msgIdx];
1430
- // Safety: if content doesn't look like a stub anymore (defensive idempotency check)
1431
- const existingContent = msg.toolResults[resultIdx].content;
1432
- if (!parseToolChainStub(existingContent)) {
1433
- // Already full content — pass through unchanged
1434
- continue;
1435
- }
1436
- // Replace stub with full payload
1437
- msg.toolResults[resultIdx] = {
1438
- ...msg.toolResults[resultIdx],
1439
- content: payload,
1440
- };
1441
- artifactsHydrated++;
1442
- hydrationBytes += Buffer.byteLength(payload, 'utf8');
1443
- }
1444
- touchIds.push(artifactId);
1445
- }
1446
- // ── 5. Batch touch last_used_at ───────────────────────────────────────────
1447
- if (touchIds.length > 0) {
1448
- const ts = new Date().toISOString();
1449
- const touchPlaceholders = touchIds.map(() => '?').join(', ');
1450
- try {
1451
- db.prepare(`UPDATE tool_artifacts SET last_used_at = ? WHERE id IN (${touchPlaceholders})`).run(ts, ...touchIds);
1452
- }
1453
- catch {
1454
- // Touch is best-effort — hydration still succeeded
1455
- }
1456
- }
1457
- return { artifactsHydrated, hydrationBytes, hydrationMisses };
1458
- }
1459
- /**
1460
- * Compose a complete message array for sending to an LLM.
1461
- *
1462
- * Orchestrates all four memory layers:
1463
- * 1. System prompt + identity (never truncated)
1464
- * 2. Conversation history (L1 Redis → L2 messages.db)
1465
- * 3. Active facts from library (L4)
1466
- * 4. Knowledge entries relevant to conversation (L4)
1467
- * 5. User preferences (L4)
1468
- * 6. Semantic recall via vector search (L3)
1469
- * 7. Cross-session context (L2)
1470
- *
1471
- * Each slot respects the remaining token budget.
1472
- */
1473
- async compose(request, db, libraryDb) {
1474
- const store = new MessageStore(db);
1475
- const libDb = libraryDb || this.libraryDb;
1476
- const toComposeOutputMessages = (inputMessages) => {
1477
- // When skipProviderTranslation is set, compose returns the neutral window
1478
- // typed as ProviderMessage[] by contract. The runtime translates later.
1479
- return request.skipProviderTranslation
1480
- ? inputMessages
1481
- : toProviderFormat(inputMessages, request.provider ?? request.model ?? null);
1482
- };
1483
- // ── C4: Window cache fast-exit ────────────────────────────
1484
- // If nothing has changed since the last compose (cursor.lastSentId >= newest
1485
- // message id in the DB), skip the full pipeline and return the cached window.
1486
- // Particularly effective for low-frequency sessions (heartbeat agents, council
1487
- // seats between rounds). TTL on the cache write remains 120s — this is a
1488
- // conservative early-exit before the TTL expires, not a TTL extension.
1489
- //
1490
- // B2: prevPrefixHash is set when a cached bundle is found but bypassed due to
1491
- // prefix-input mutation. It is surfaced in the full-compose diagnostics so
1492
- // callers can confirm the bypass fired correctly.
1493
- let _prevPrefixHashFromBypass;
1494
- if (request.includeHistory !== false && request.skipWindowCache !== true) {
1495
- try {
1496
- const newestRow = db.prepare('SELECT MAX(id) AS maxId FROM messages WHERE agent_id = ?').get(request.agentId);
1497
- const newestMsgId = newestRow?.maxId;
1498
- if (newestMsgId != null) {
1499
- const cachedBundle = await this.cache.getFreshWindowBundle(request.agentId, request.sessionKey, newestMsgId);
1500
- if (cachedBundle) {
1501
- // Validate the cached bundle is compatible with this request.
1502
- // A mismatch on any of these means we must do a full compose:
1503
- // - tokenBudget: cached total exceeds the requested cap
1504
- // - slot flags: caller disabled slots that the cache populated
1505
- // - historyDepth: caller wants fewer messages than the cache holds
1506
- const cachedTotal = cachedBundle.meta.totalTokens;
1507
- const budgetOk = !request.tokenBudget ||
1508
- cachedTotal <= request.tokenBudget * 1.05;
1509
- const factsOk = request.includeFacts !== false ||
1510
- (cachedBundle.meta.slots['facts'] ?? 0) === 0;
1511
- const libraryOk = request.includeLibrary !== false ||
1512
- (cachedBundle.meta.slots['library'] ?? 0) === 0;
1513
- const contextOk = request.includeContext !== false ||
1514
- (cachedBundle.meta.slots['context'] ?? 0) === 0;
1515
- // historyDepth constrains how many messages the caller wants;
1516
- // we can't slice a cached bundle safely, so skip cache.
1517
- const depthOk = !request.historyDepth;
1518
- // B2: Stable-prefix hash check.
1519
- // If the system/identity slots changed since this cache entry was
1520
- // written, the stable prefix is stale even if cursor freshness
1521
- // passes. Compute a cheap input hash from slot contents and compare
1522
- // against the one stored in the cache meta. If no stored hash exists
1523
- // (pre-B2 cache entries), fall through to prefix check on the
1524
- // cached message content itself.
1525
- let prefixInputOk = true;
1526
- const _cachedPrefixInputHash = cachedBundle.meta.prefixInputHash;
1527
- if (_cachedPrefixInputHash) {
1528
- const _sysSlot = await this.cache.getSlot(request.agentId, request.sessionKey, 'system');
1529
- const _idSlot = await this.cache.getSlot(request.agentId, request.sessionKey, 'identity');
1530
- const _incomingInputHash = createHash('sha256')
1531
- .update(_sysSlot ?? '')
1532
- .update('\n␞\n')
1533
- .update(_idSlot ?? '')
1534
- .digest('hex');
1535
- if (_incomingInputHash !== _cachedPrefixInputHash) {
1536
- prefixInputOk = false;
1537
- }
1538
- }
1539
- if (budgetOk && factsOk && libraryOk && contextOk && depthOk && prefixInputOk) {
1540
- const cachedSlots = {
1541
- system: cachedBundle.meta.slots['system'] ?? 0,
1542
- identity: cachedBundle.meta.slots['identity'] ?? 0,
1543
- history: cachedBundle.meta.slots['history'] ?? 0,
1544
- facts: cachedBundle.meta.slots['facts'] ?? 0,
1545
- context: cachedBundle.meta.slots['context'] ?? 0,
1546
- library: cachedBundle.meta.slots['library'] ?? 0,
1547
- };
1548
- // Sprint 2.1: hydrate active-turn artifact stubs before converting.
1549
- const cachedHydration = this.hydrateActiveTurnArtifacts(cachedBundle.messages, db);
1550
- return {
1551
- messages: toComposeOutputMessages(cachedBundle.messages),
1552
- tokenCount: cachedBundle.meta.totalTokens,
1553
- slots: cachedSlots,
1554
- truncated: false,
1555
- hasWarnings: cachedBundle.meta.warnings.length > 0,
1556
- warnings: cachedBundle.meta.warnings,
1557
- diagnostics: {
1558
- ...cachedBundle.meta.diagnostics,
1559
- windowCacheHit: true,
1560
- // Carry forward the stored prefixHash so callers can observe it.
1561
- prefixHash: cachedBundle.meta.prefixHash ?? cachedBundle.meta.diagnostics.prefixHash,
1562
- artifactsHydrated: cachedHydration.artifactsHydrated > 0 ? cachedHydration.artifactsHydrated : undefined,
1563
- hydrationBytes: cachedHydration.hydrationBytes > 0 ? cachedHydration.hydrationBytes : undefined,
1564
- hydrationMisses: cachedHydration.hydrationMisses > 0 ? cachedHydration.hydrationMisses : undefined,
1565
- },
1566
- };
1567
- }
1568
- // Incompatible request — fall through to full compose.
1569
- // Surface prevPrefixHash so the full compose diagnostics can report it.
1570
- _prevPrefixHashFromBypass = cachedBundle.meta.prefixHash ?? cachedBundle.meta.diagnostics.prefixHash;
1571
- }
1572
- }
1573
- }
1574
- catch {
1575
- // Cache fast-exit is best-effort, fall through to full compose
1576
- }
1577
- }
1578
- // Dynamic reserve: use a lightweight SQLite sample to estimate avg turn cost
1579
- // BEFORE assembling the full context. This gives us the reserve fraction we
1580
- // need to compute the effective token budget at the start of compose.
1581
- // Full history assembly happens later in the pipeline.
1582
- const totalWindow = resolveModelWindow(request.model, this.config.defaultTokenBudget);
1583
- const sampleConv = store.getConversation(request.sessionKey);
1584
- const sampleMessages = sampleConv
1585
- ? store.getRecentMessages(sampleConv.id, 40)
1586
- : [];
1587
- const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
1588
- const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve, this.config.budgetFraction);
1589
- // B4: Model-aware lane budgets.
1590
- // Resolve historyFraction and memoryFraction by blending config values toward
1591
- // model-preferred fractions when the effective budget approaches the MECW ceiling.
1592
- // This ensures the compositor doesn't allocate more history than the model can
1593
- // reliably reason over, and adjusts the memory pool proportionally.
1594
- const _b4ConfigHistoryFraction = this.config.historyFraction ?? 0.40;
1595
- const _b4ConfigMemoryFraction = this.config.memoryFraction ?? 0.40;
1596
- const { historyFraction: b4HistoryFraction, memoryFraction: b4MemoryFraction, mecwProfile: b4MecwProfile, mecwApplied: b4MecwApplied, mecwBlend: b4MecwBlend, } = resolveModelLaneBudgets(request.model, budget, _b4ConfigHistoryFraction, _b4ConfigMemoryFraction);
1597
- // C2: Compute the artifact oversize threshold once per compose pass from the
1598
- // effective model budget (from B4). Chunk injection paths consult this threshold
1599
- // to degrade retrieved payloads that would fill the lane instead of injecting them.
1600
- const c2ArtifactThresholdTokens = resolveArtifactOversizeThreshold(budget);
1601
- let c2ArtifactDegradations = 0;
1602
- // Sprint 4: Pre-compose history depth tightening.
1603
- // Classify the session and compute an adaptive depth from observed message
1604
- // density. This replaces the old fixed maxHistoryMessages ceiling that over-
1605
- // fed the compositor for tool-heavy sessions.
1606
- //
1607
- // If the caller already passed historyDepth (plugin assemble path), honour it
1608
- // as an explicit cap — the adaptive depth still applies as a lower bound so
1609
- // we never request more than the budget can absorb.
1610
- const s4SessionType = classifySessionType(sampleMessages);
1611
- const s4ObservedDensity = estimateObservedMsgDensity(sampleMessages);
1612
- const s4HistoryBudget = Math.floor(budget * b4HistoryFraction);
1613
- const s4AdaptiveDepth = computeAdaptiveHistoryDepth(s4SessionType, s4ObservedDensity, s4HistoryBudget, this.config.maxHistoryMessages);
1614
- // Effective depth: caller-provided historyDepth overrides adaptive when it is
1615
- // the tighter constraint; otherwise use the adaptive depth.
1616
- const s4EffectiveDepth = request.historyDepth
1617
- ? Math.min(request.historyDepth, s4AdaptiveDepth)
1618
- : s4AdaptiveDepth;
1619
- let remaining = budget;
1620
- // 0.9.0: resolve an early adaptive lifecycle posture for the
1621
- // compose-window cluster-drop pass. Pressure is estimated from the
1622
- // SQLite sample over the effective budget so the eviction-order
1623
- // decision routes through the same band classifier the rest of the
1624
- // 0.9.0 paths already use — no parallel pressure constants here.
1625
- const s09SampleTokens = sampleMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
1626
- const s09EvictionPressure = computeUnifiedPressure(s09SampleTokens, budget, PRESSURE_SOURCE.COMPOSE_PRE_RECALL);
1627
- let s09ObservedUserTurnCount = sampleMessages.filter(m => m.role === 'user').length;
1628
- const s09ForkedContextSeed = request.forkedContext?.enabled ? request.forkedContext : undefined;
1629
- const s09ForkedParentPressure = typeof s09ForkedContextSeed?.parentPressureFraction === 'number'
1630
- && Number.isFinite(s09ForkedContextSeed.parentPressureFraction)
1631
- ? s09ForkedContextSeed.parentPressureFraction
1632
- : undefined;
1633
- const s09EvictionPolicyPressure = s09ForkedContextSeed
1634
- && s09ObservedUserTurnCount === 0
1635
- && s09ForkedParentPressure != null
1636
- ? s09ForkedParentPressure
1637
- : s09EvictionPressure.fraction;
1638
- const evictionLifecyclePolicy = resolveAdaptiveLifecyclePolicy({
1639
- pressureFraction: s09EvictionPolicyPressure,
1640
- userTurnCount: s09ObservedUserTurnCount,
1641
- explicitNewSession: isExplicitNewSessionPrompt(request.prompt ?? null),
1642
- forkedContext: Boolean(s09ForkedContextSeed),
1643
- forkedParentPressureFraction: s09ForkedParentPressure,
1644
- forkedParentUserTurnCount: s09ForkedContextSeed?.parentUserTurnCount,
1645
- });
1646
- let adaptiveEvictionTopicAwareEligibleClusters = 0;
1647
- let adaptiveEvictionTopicAwareDroppedClusters = 0;
1648
- let adaptiveEvictionProtectedClusters = 0;
1649
- let adaptiveEvictionTopicIdCoveragePct = 0;
1650
- let adaptiveEvictionBypassReason;
1651
- let composeTopicSource = 'none';
1652
- let composeTopicState = 'history-disabled';
1653
- let composeTopicMessageCount = 0;
1654
- let composeTopicStampedMessageCount = 0;
1655
- // Phase 0 fence enforcement: resolve the compaction fence for this conversation.
1656
- // All downstream message queries use this as a lower bound to exclude zombie
1657
- // messages below the fence that should have been compacted.
1658
- let fenceMessageId;
1659
- if (sampleConv) {
1660
- try {
1661
- ensureCompactionFenceSchema(db);
1662
- const fence = getCompactionFence(db, sampleConv.id);
1663
- if (fence)
1664
- fenceMessageId = fence.fenceMessageId;
1665
- }
1666
- catch {
1667
- // Fence lookup is best-effort — never fail composition
1668
- }
1669
- }
1670
- const warnings = [];
1671
- const slots = {
1672
- system: 0,
1673
- identity: 0,
1674
- history: 0,
1675
- facts: 0,
1676
- context: 0,
1677
- library: 0,
1678
- };
1679
- const messages = [];
1680
- // ─── System Prompt (never truncated) ───────────────────────
1681
- const systemContent = await this.getSlotContent(request.agentId, request.sessionKey, 'system', db);
1682
- if (systemContent) {
1683
- const tokens = estimateTokens(systemContent);
1684
- messages.push({
1685
- role: 'system',
1686
- textContent: systemContent,
1687
- toolCalls: null,
1688
- toolResults: null,
1689
- });
1690
- slots.system = tokens;
1691
- remaining -= tokens;
1692
- }
1693
- // ─── Identity (never truncated) ────────────────────────────
1694
- const identityContent = await this.getSlotContent(request.agentId, request.sessionKey, 'identity', db);
1695
- if (identityContent) {
1696
- const tokens = estimateTokens(identityContent);
1697
- messages.push({
1698
- role: 'system',
1699
- textContent: identityContent,
1700
- toolCalls: null,
1701
- toolResults: null,
1702
- });
1703
- slots.identity = tokens;
1704
- remaining -= tokens;
1705
- }
1706
- const repairNoticeContent = await this.getSlotContent(request.agentId, request.sessionKey, 'repair_notice', db);
1707
- // ─── Warm-Restore Repair Notice (never suppressed) ─────────
1708
- // If a session was reconstructed from a snapshot, the repair notice must
1709
- // stay above restored conversation content even under budget pressure.
1710
- // This mirrors the system/identity invariant: history and memory slots may
1711
- // be trimmed, but the provenance notice is not optional operational state.
1712
- if (repairNoticeContent) {
1713
- const tokens = estimateTokens(repairNoticeContent);
1714
- messages.push({
1715
- role: 'system',
1716
- textContent: repairNoticeContent,
1717
- toolCalls: null,
1718
- toolResults: null,
1719
- metadata: { warmRestoreRepairNotice: true },
1720
- });
1721
- slots.system += tokens;
1722
- remaining -= tokens;
1723
- if (remaining < 0) {
1724
- warnings.push('Warm-restore repair notice exceeded budget but was retained as non-suppressible system context');
1725
- }
1726
- }
1727
- // ─── Stable Output Profile Prefix ──────────────────────────
1728
- // Keep deterministic output instructions on the static side of the cache
1729
- // boundary so Anthropic and OpenAI warm-prefix caching can reuse them.
1730
- if (remaining > 100 && request.includeLibrary !== false) {
1731
- const fosEnabled = this.config?.enableFOS !== false;
1732
- const modEnabled = this.config?.enableMOD !== false;
1733
- const outputTier = resolveOutputTier((this.config?.hyperformProfile ?? this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
1734
- const stableOutputParts = [];
1735
- if (outputTier.tier === 'light') {
1736
- stableOutputParts.push(renderLightFOS().join('\n'));
1737
- }
1738
- else if (libDb) {
1739
- if (outputTier.fos) {
1740
- const fos = getActiveFOS(libDb);
1741
- if (fos) {
1742
- const fosContent = renderFOS(fos).join('\n');
1743
- if (fosContent.trim())
1744
- stableOutputParts.push(fosContent);
1745
- }
1746
- }
1747
- if (outputTier.mod) {
1748
- const mod = matchMOD(request.model, libDb);
1749
- if (mod) {
1750
- const modContent = renderMOD(mod, null, request.model || '').join('\n');
1751
- if (modContent.trim())
1752
- stableOutputParts.push(modContent);
1753
- }
1754
- }
1755
- }
1756
- if (stableOutputParts.length > 0) {
1757
- const stableOutputContent = stableOutputParts.join('\n\n');
1758
- const stableOutputTokens = estimateTokens(stableOutputContent);
1759
- if (stableOutputTokens <= remaining) {
1760
- messages.push({
1761
- role: 'system',
1762
- textContent: stableOutputContent,
1763
- toolCalls: null,
1764
- toolResults: null,
1765
- });
1766
- slots.system += stableOutputTokens;
1767
- remaining -= stableOutputTokens;
1768
- }
1769
- }
1770
- }
1771
- // ─── Conversation History ──────────────────────────────────
1772
- let diagCrossTopicKeystones = 0;
1773
- // Sprint 4: hoisted so diagnostics block can read it regardless of includeHistory branch.
1774
- let s4RescueTrimFired = false;
1775
- // C1: total tool-chain degradation counters across history budget-fit and safety-valve passes.
1776
- let c1CoEjections = 0;
1777
- let c1StubReplacements = 0;
1778
- // Hoisted: activeTopicId/name resolved inside history block, used for window dual-write (VS-1) and wiki page injection
1779
- let composedActiveTopicId;
1780
- let composedActiveTopicName;
1781
- if (request.includeHistory !== false) {
1782
- // Phase 3 (Turn DAG): resolve active context for DAG-native reads.
1783
- // This is the primary branch-scoping mechanism; fence remains as transitional safety.
1784
- let activeContext = null;
1785
- try {
1786
- activeContext = getActiveContext(db, request.agentId, request.sessionKey);
1787
- }
1788
- catch {
1789
- // Context resolution is best-effort — fall back to fence-based reads
1790
- }
1791
- // P3.4: Look up the active topic for this session (non-fatal)
1792
- let activeTopicId;
1793
- let activeTopic;
1794
- if (!request.topicId) {
1795
- try {
1796
- const topicMap = new SessionTopicMap(db);
1797
- activeTopic = topicMap.getActiveTopic(request.sessionKey) || undefined;
1798
- if (activeTopic) {
1799
- activeTopicId = activeTopic.id;
1800
- composeTopicSource = 'session-topic-map';
1801
- }
1802
- }
1803
- catch {
1804
- // Topic lookup is best-effort — fall back to full history
1805
- }
1806
- }
1807
- else {
1808
- activeTopicId = request.topicId;
1809
- composeTopicSource = 'request-topic-id';
1810
- try {
1811
- activeTopic = db.prepare(`
1812
- SELECT id, name
1813
- FROM topics
1814
- WHERE session_key = ? AND id = ?
1815
- LIMIT 1
1816
- `).get(request.sessionKey, request.topicId);
1817
- }
1818
- catch {
1819
- // Topic lookup is best-effort — fall back to ID-only history fetch
1820
- }
1821
- }
1822
- // Hoist resolved topic id+name so the window dual-write and wiki injection sections can access them
1823
- composedActiveTopicId = activeTopicId;
1824
- composedActiveTopicName = activeTopic?.name;
1825
- const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, s4EffectiveDepth, // Sprint 4: adaptive depth (replaces fixed maxHistoryMessages)
1826
- store, activeTopicId, fenceMessageId, activeContext);
1827
- // Deduplicate history by StoredMessage.id (second line of defense after
1828
- // pushHistory() tail-check dedup). Guards against any duplicates that
1829
- // slipped through the warm path — e.g. bootstrap re-runs on existing sessions.
1830
- const seenIds = new Set();
1831
- const historyMessages = rawHistoryMessages.filter(m => {
1832
- const sm = m;
1833
- if (sm.id != null) {
1834
- if (seenIds.has(sm.id))
1835
- return false;
1836
- seenIds.add(sm.id);
1837
- }
1838
- return true;
1839
- });
1840
- s09ObservedUserTurnCount = Math.max(s09ObservedUserTurnCount, historyMessages.filter(m => m.role === 'user').length);
1841
- composeTopicMessageCount = historyMessages.length;
1842
- composeTopicStampedMessageCount = historyMessages.filter(m => typeof m.topicId === 'string').length;
1843
- // ── Transform-first: apply gradient tool treatment BEFORE budget math ──
1844
- // All tool payloads are in their final form before any token estimation.
1845
- // This ensures estimateMessageTokens() measures actual submission cost,
1846
- // not pre-transform cost (which caused overflow: dense tool JSON was
1847
- // undercounted at length/4 when it should be measured post-stub).
1848
- const transformedHistory = applyToolGradient(historyMessages, { totalWindowTokens: totalWindow });
1849
- // ── Evict large tool results (>800 tokens) before window selection ─────
1850
- // Replace oversized stale results with stubs so they don't burn budget.
1851
- // Current-turn results (turn age 0) are never evicted.
1852
- const evictedHistory = evictLargeToolResults(transformedHistory);
1853
- const c2ResolvedHistory = resolveOversizedArtifacts(evictedHistory, budget);
1854
- c2ArtifactDegradations += c2ResolvedHistory.refCount;
1855
- // ── Budget-fit: walk newest→oldest, drop whole clusters ─────────────
1856
- // Group tool_use + tool_result messages into clusters so they are kept
1857
- // or dropped as a unit. Breaking mid-cluster creates orphaned tool
1858
- // pairs that repairToolPairs has to strip downstream — wasting budget
1859
- // and leaving gaps in conversation continuity.
1860
- const budgetClusters = clusterNeutralMessages(c2ResolvedHistory.messages);
1861
- let historyTokens = 0;
1862
- const includedClusters = [];
1863
- // Pre-allocate history budget. historyFraction is a fraction of the
1864
- // effective token budget (post-reserve). Falls back to unbounded fill
1865
- // (remaining) when historyFraction is not set.
1866
- // B4: uses b4HistoryFraction (model-aware, blended from MECW catalog) instead
1867
- // of raw config.historyFraction so history doesn't overflow MECW ceiling.
1868
- const historyBudget = Math.floor(budget * b4HistoryFraction);
1869
- const historyFillCap = Math.min(historyBudget, remaining);
1870
- // 0.9.0: adaptive eviction ordering. For elevated/high/critical bands,
1871
- // drop inactive-topic non-tool clusters first when an active topic is
1872
- // known. Bootstrap/warmup/steady reproduce the historical newest-first
1873
- // sweep exactly (preferTopicAwareDrop=false → evictedByPlan stays empty).
1874
- const adaptiveOrdering = orderClustersForAdaptiveEviction(budgetClusters, evictionLifecyclePolicy, { activeTopicId });
1875
- adaptiveEvictionTopicAwareEligibleClusters = adaptiveOrdering.telemetry.topicAwareEligibleClusters;
1876
- adaptiveEvictionProtectedClusters = adaptiveOrdering.telemetry.protectedClusters;
1877
- adaptiveEvictionTopicIdCoveragePct = adaptiveOrdering.telemetry.topicIdCoveragePct;
1878
- adaptiveEvictionBypassReason = adaptiveOrdering.telemetry.bypassReason;
1879
- if (!activeTopicId)
1880
- composeTopicState = 'no-active-topic';
1881
- else if (composeTopicStampedMessageCount === 0)
1882
- composeTopicState = 'active-topic-missing-stamped-history';
1883
- else
1884
- composeTopicState = 'active-topic-ready';
1885
- const evictedByPlan = new Set();
1886
- let projectedTokens = budgetClusters.reduce((s, c) => s + c.tokenCost, 0);
1887
- if (adaptiveOrdering.preferTopicAwareDrop
1888
- && adaptiveOrdering.topicAwareDropOrder.length > 0
1889
- && projectedTokens <= historyFillCap) {
1890
- adaptiveEvictionBypassReason = 'within-budget';
1891
- }
1892
- if (adaptiveOrdering.preferTopicAwareDrop
1893
- && adaptiveOrdering.topicAwareDropOrder.length > 0
1894
- && projectedTokens > historyFillCap) {
1895
- for (const idx of adaptiveOrdering.topicAwareDropOrder) {
1896
- if (projectedTokens <= historyFillCap)
1897
- break;
1898
- if (adaptiveOrdering.protectedIndices.has(idx))
1899
- continue;
1900
- evictedByPlan.add(idx);
1901
- projectedTokens -= budgetClusters[idx].tokenCost;
1902
- }
1903
- adaptiveEvictionTopicAwareDroppedClusters = evictedByPlan.size;
1904
- }
1905
- let truncationCutIndex = -1;
1906
- for (let i = budgetClusters.length - 1; i >= 0; i--) {
1907
- if (evictedByPlan.has(i))
1908
- continue;
1909
- const cluster = budgetClusters[i];
1910
- if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
1911
- truncationCutIndex = i;
1912
- break;
1913
- }
1914
- includedClusters.unshift(cluster);
1915
- historyTokens += cluster.tokenCost;
1916
- }
1917
- if (truncationCutIndex >= 0 || evictedByPlan.size > 0) {
1918
- const droppedIndices = [];
1919
- if (truncationCutIndex >= 0) {
1920
- for (let i = 0; i <= truncationCutIndex; i++) {
1921
- if (!evictedByPlan.has(i))
1922
- droppedIndices.push(i);
1923
- }
1924
- }
1925
- for (const idx of evictedByPlan)
1926
- droppedIndices.push(idx);
1927
- const droppedClusters = droppedIndices.map(i => budgetClusters[i]);
1928
- const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
1929
- const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
1930
- if (droppedToolResultCount > 0) {
1931
- c1CoEjections += droppedToolResultCount;
1932
- console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
1933
- }
1934
- if (droppedMsgCount > 0) {
1935
- const c1Note = droppedToolResultCount > 0
1936
- ? ` [C1: ${droppedToolResultCount} co-ejected reason=budget_cluster_drop]`
1937
- : '';
1938
- const planNote = evictedByPlan.size > 0
1939
- ? ` [adaptive: band=${evictionLifecyclePolicy.band} topic-aware-dropped=${evictedByPlan.size}]`
1940
- : '';
1941
- const cutLabel = truncationCutIndex >= 0
1942
- ? `${truncationCutIndex + 1}/${budgetClusters.length}`
1943
- : `0/${budgetClusters.length}`;
1944
- warnings.push(`History truncated at cluster ${cutLabel} (${droppedMsgCount} messages dropped)${c1Note}${planNote}`);
1945
- if (truncationCutIndex >= 0)
1946
- s4RescueTrimFired = true;
1947
- }
1948
- }
1949
- const includedHistory = includedClusters.flatMap(c => c.messages);
1950
- // ── Keystone History Slot (P2.1) ──────────────────────────────────
1951
- // For long conversations (≥30 messages), inject high-signal older messages
1952
- // from before the recent window as recalled context. This lets the model
1953
- // see key decisions and specs that happened earlier in the conversation
1954
- // without them consuming the full recent history budget.
1955
- const keystoneFraction = this.config.keystoneHistoryFraction ?? 0.2;
1956
- const keystoneMaxMsgs = this.config.keystoneMaxMessages ?? 15;
1957
- let keystoneMessages = [];
1958
- let keystoneTokens = 0;
1959
- if (request.includeKeystones !== false && includedHistory.length >= 30 && keystoneFraction > 0) {
1960
- const keystoneResult = await this.buildKeystones(db, request.agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, request.prompt, libDb || undefined, fenceMessageId, activeContext);
1961
- if (keystoneResult) {
1962
- keystoneMessages = keystoneResult.keystoneMessages;
1963
- keystoneTokens = keystoneResult.keystoneTokens;
1964
- // Replace includedHistory and historyTokens with the trimmed versions
1965
- // (keystoneResult reflects the trimming done inside buildKeystones)
1966
- includedHistory.splice(0, includedHistory.length, ...keystoneResult.trimmedHistory);
1967
- historyTokens = keystoneResult.trimmedHistoryTokens;
1968
- warnings.push(`Keystone: injected ${keystoneMessages.length} recalled messages`);
1969
- }
1970
- }
1971
- // ── Cross-Topic Keystones (P3.5) ──────────────────────────────────
1972
- // Pull high-signal messages from OTHER topics in this session when their
1973
- // content is semantically relevant to the current topic. Non-fatal.
1974
- let crossTopicMessages = [];
1975
- let crossTopicTokens = 0;
1976
- if (request.includeKeystones !== false && activeTopic && this.vectorStore) {
1977
- try {
1978
- const rawCrossTopicKeystones = await this.getKeystonesByTopic(request.agentId, request.sessionKey, activeTopic, includedHistory, db, 3, fenceMessageId, activeContext);
1979
- if (rawCrossTopicKeystones.length > 0) {
1980
- // Token budget: cap the full cross-topic block at 15% of remaining,
1981
- // including the header line.
1982
- const crossTopicHeaderTokens = estimateTokens('## Cross-Topic Context');
1983
- const crossTopicBudget = Math.max(0, Math.floor(remaining * 0.15) - crossTopicHeaderTokens);
1984
- let used = 0;
1985
- for (const candidate of rawCrossTopicKeystones) {
1986
- const msg = {
1987
- role: candidate.role,
1988
- textContent: candidate.content,
1989
- toolCalls: null,
1990
- toolResults: null,
1991
- };
1992
- const msgTokens = estimateMessageTokens(msg);
1993
- if (used + msgTokens > crossTopicBudget)
1994
- continue;
1995
- crossTopicMessages.push(msg);
1996
- used += msgTokens;
1997
- }
1998
- crossTopicTokens = used;
1999
- diagCrossTopicKeystones = crossTopicMessages.length;
2000
- }
2001
- }
2002
- catch {
2003
- // Cross-topic retrieval is non-fatal — never block compose
2004
- }
2005
- }
2006
- // Push history with keystone separators if we have keystones.
2007
- if (keystoneMessages.length > 0 || crossTopicMessages.length > 0) {
2008
- // Cross-topic context (from other topics) — prepended before within-session keystones
2009
- if (crossTopicMessages.length > 0) {
2010
- messages.push({
2011
- role: 'system',
2012
- textContent: '## Cross-Topic Context',
2013
- toolCalls: null,
2014
- toolResults: null,
2015
- });
2016
- messages.push(...crossTopicMessages);
2017
- }
2018
- // Separator before recalled context (within-session keystones)
2019
- if (keystoneMessages.length > 0) {
2020
- messages.push({
2021
- role: 'system',
2022
- textContent: '## Recalled Context (high-signal older messages)',
2023
- toolCalls: null,
2024
- toolResults: null,
2025
- });
2026
- messages.push(...keystoneMessages);
2027
- }
2028
- // Separator before recent conversation
2029
- messages.push({
2030
- role: 'system',
2031
- textContent: '## Recent Conversation',
2032
- toolCalls: null,
2033
- toolResults: null,
2034
- });
2035
- messages.push(...includedHistory);
2036
- // Account for separator tokens in history slot
2037
- const crossTopicSepTokens = crossTopicMessages.length > 0
2038
- ? estimateTokens('## Cross-Topic Context')
2039
- : 0;
2040
- const keystoneSepTokens = keystoneMessages.length > 0
2041
- ? estimateTokens('## Recalled Context (high-signal older messages)')
2042
- : 0;
2043
- const recentSepTokens = estimateTokens('## Recent Conversation');
2044
- const sepTokens = crossTopicSepTokens + keystoneSepTokens + recentSepTokens;
2045
- slots.history = historyTokens + keystoneTokens + crossTopicTokens + sepTokens;
2046
- remaining -= (historyTokens + keystoneTokens + crossTopicTokens + sepTokens);
2047
- }
2048
- else {
2049
- messages.push(...includedHistory);
2050
- slots.history = historyTokens;
2051
- remaining -= historyTokens;
2052
- }
2053
- // Memory budget pool: facts, wiki, semantic recall, cross-session, and
2054
- // trigger-fired doc chunks all draw from this shared pool via `remaining`.
2055
- // B4: uses b4MemoryFraction (model-aware, blended from MECW catalog) instead
2056
- // of raw config.memoryFraction so the memory pool scales with what the model
2057
- // can effectively attend to within its MECW ceiling.
2058
- let memoryBudget;
2059
- {
2060
- memoryBudget = Math.floor(budget * b4MemoryFraction);
2061
- if (remaining > memoryBudget) {
2062
- remaining = memoryBudget;
2063
- }
2064
- }
2065
- // T1.3: Ghost message suppression.
2066
- // If the last message in the included history is a warm-seeded user message
2067
- // AND there's a subsequent message in SQLite that wasn't included (meaning
2068
- // the assistant already responded), drop it. This prevents the model from
2069
- // re-answering a question that was already handled in a prior session.
2070
- // Only triggers when: (1) message has _warmed flag, (2) it's role=user,
2071
- // (3) SQLite has messages after it (the response exists but wasn't included).
2072
- const lastIncluded = messages[messages.length - 1];
2073
- if (lastIncluded?.role === 'user') {
2074
- const sm = lastIncluded;
2075
- const meta = sm.metadata;
2076
- if (meta?._warmed && sm.id != null) {
2077
- // Check if there are any messages after this one in SQLite
2078
- try {
2079
- const hasMore = db.prepare('SELECT 1 FROM messages WHERE conversation_id = (SELECT conversation_id FROM messages WHERE id = ?) AND id > ? LIMIT 1').get(sm.id, sm.id);
2080
- if (hasMore) {
2081
- messages.pop();
2082
- warnings.push('Dropped trailing warm-seeded user message with existing response (ghost suppression)');
2083
- }
2084
- }
2085
- catch {
2086
- // Ghost check is best-effort — don't block compose
2087
- }
2088
- }
2089
- }
2090
- }
2091
- // ─── Cache-ordered context assembly ─────────────────────────
2092
- // Stable, reusable material is lifted above the cache boundary as its
2093
- // own system messages. Session-volatile material stays in the dynamic
2094
- // context block below that boundary.
2095
- const stablePrefixMessages = [];
2096
- const volatileContextParts = [];
2097
- let contextTokens = 0;
2098
- // ── C1: Content fingerprint dedup set ────────────────────
2099
- // Replaces fragile substring-match dedup across temporal, open-domain,
2100
- // semantic recall, and cross-session paths. O(1) lookup on a normalized
2101
- // 120-char prefix catches rephrased duplicates the old 60-char includes()
2102
- // match missed without needing a hash.
2103
- const contextFingerprints = new Set();
2104
- const fingerprintEntries = new Map();
2105
- // ── Compose-level diagnostics tracking vars ──────────────
2106
- let diagTriggerHits = 0;
2107
- let diagTriggerFallbackUsed = false;
2108
- let diagFactsIncluded = 0;
2109
- let diagSemanticResults = 0;
2110
- let diagDocChunkCollections = 0;
2111
- let diagScopeFiltered = 0;
2112
- let diagFingerprintDedups = 0;
2113
- let diagFingerprintCollisions = 0;
2114
- let diagRetrievalMode = 'none';
2115
- // Sprint 1: reranker telemetry captured from hybridSearch via onRerankerTelemetry
2116
- let diagRerankerStatus;
2117
- let diagRerankerCandidates;
2118
- let diagRerankerProvider;
2119
- function normalizeFingerprintText(text) {
2120
- return text.toLowerCase().replace(/\s+/g, ' ').trim();
2121
- }
2122
- function contentFingerprint(text) {
2123
- return normalizeFingerprintText(text).slice(0, 120);
2124
- }
2125
- function addFingerprint(text) {
2126
- const normalized = normalizeFingerprintText(text);
2127
- const fingerprint = normalized.slice(0, 120);
2128
- contextFingerprints.add(fingerprint);
2129
- const entries = fingerprintEntries.get(fingerprint) ?? new Set();
2130
- entries.add(normalized);
2131
- fingerprintEntries.set(fingerprint, entries);
2132
- }
2133
- function isDuplicate(text) {
2134
- const normalized = normalizeFingerprintText(text);
2135
- const fingerprint = normalized.slice(0, 120);
2136
- if (!contextFingerprints.has(fingerprint))
2137
- return false;
2138
- const entries = fingerprintEntries.get(fingerprint);
2139
- if (entries && !entries.has(normalized))
2140
- diagFingerprintCollisions += 1;
2141
- return true;
2142
- }
2143
- // ── Wiki Page (L4: Library — active topic synthesis) ──────
2144
- // Inject synthesized wiki page for the active topic before general knowledge.
2145
- // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
2146
- if (request.includeLibrary !== false && remaining > 300 && libDb && composedActiveTopicName) {
2147
- const wikiContent = this.buildWikiPageContext(request.agentId, composedActiveTopicName, libDb);
2148
- if (wikiContent) {
2149
- const tokens = estimateTokens(wikiContent);
2150
- if (tokens <= remaining) {
2151
- volatileContextParts.push(wikiContent);
2152
- contextTokens += tokens;
2153
- remaining -= tokens;
2154
- slots.library += tokens;
2155
- }
2156
- else if (remaining > 200) {
2157
- const truncated = this.truncateToTokens(wikiContent, remaining);
2158
- const truncTokens = estimateTokens(truncated);
2159
- volatileContextParts.push(truncated);
2160
- contextTokens += truncTokens;
2161
- remaining -= truncTokens;
2162
- slots.library += truncTokens;
2163
- }
2164
- }
2165
- }
2166
- // ── Facts (L4: Library) ──────────────────────────────────
2167
- // scope: agent — filtered by agentId via filterByScope after fetch
2168
- // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
2169
- if (request.includeFacts !== false && remaining > 500) {
2170
- const factSections = this.buildFactSectionsFromDb(request.agentId, request.sessionKey, libDb || db);
2171
- if (factSections !== null) {
2172
- const { stableContent, stableCount, volatileContent, volatileCount, filteredCount } = factSections;
2173
- diagFactsIncluded += stableCount + volatileCount;
2174
- diagScopeFiltered += filteredCount;
2175
- if (stableContent) {
2176
- const stableFactsBlock = `## Stable Facts\n${stableContent}`;
2177
- const tokens = estimateTokens(stableFactsBlock);
2178
- if (tokens <= remaining) {
2179
- stablePrefixMessages.push({
2180
- role: 'system',
2181
- textContent: stableFactsBlock,
2182
- toolCalls: null,
2183
- toolResults: null,
2184
- });
2185
- contextTokens += tokens;
2186
- remaining -= tokens;
2187
- slots.facts += tokens;
2188
- }
2189
- else if (remaining > 200) {
2190
- const truncated = this.truncateToTokens(stableFactsBlock, remaining);
2191
- const truncTokens = estimateTokens(truncated);
2192
- stablePrefixMessages.push({
2193
- role: 'system',
2194
- textContent: truncated,
2195
- toolCalls: null,
2196
- toolResults: null,
2197
- });
2198
- contextTokens += truncTokens;
2199
- remaining -= truncTokens;
2200
- slots.facts += truncTokens;
2201
- warnings.push('Stable facts truncated to fit memory budget');
2202
- }
2203
- for (const line of stableContent.split('\n')) {
2204
- if (line.startsWith('- ['))
2205
- addFingerprint(line);
2206
- }
2207
- }
2208
- if (volatileContent) {
2209
- const volatileFactsBlock = `## Active Facts\n${volatileContent}`;
2210
- const tokens = estimateTokens(volatileFactsBlock);
2211
- if (tokens <= remaining) {
2212
- volatileContextParts.push(volatileFactsBlock);
2213
- contextTokens += tokens;
2214
- remaining -= tokens;
2215
- slots.facts += tokens;
2216
- }
2217
- else if (remaining > 200) {
2218
- const truncated = this.truncateToTokens(volatileFactsBlock, remaining);
2219
- const truncTokens = estimateTokens(truncated);
2220
- volatileContextParts.push(truncated);
2221
- contextTokens += truncTokens;
2222
- remaining -= truncTokens;
2223
- slots.facts += truncTokens;
2224
- warnings.push('Active facts truncated to fit memory budget');
2225
- }
2226
- for (const line of volatileContent.split('\n')) {
2227
- if (line.startsWith('- ['))
2228
- addFingerprint(line);
2229
- }
2230
- }
2231
- }
2232
- // ── Temporal retrieval (L4: Library) ─────────────────────
2233
- // Fires when the query has temporal signals (before/after/when/last etc).
2234
- // Returns facts in time order from temporal_index. Deduplicates against
2235
- // facts already included above. Uses ingest_at as occurred_at proxy (v1).
2236
- const queryText = request.prompt ?? '';
2237
- if (request.includeSemanticRecall !== false && queryText && hasTemporalSignals(queryText) && libDb && remaining > 300) {
2238
- try {
2239
- const temporalStore = new TemporalStore(libDb);
2240
- const temporalFacts = temporalStore.timeRangeQuery({
2241
- agentId: request.agentId,
2242
- limit: 15,
2243
- order: 'DESC',
2244
- });
2245
- if (temporalFacts.length > 0) {
2246
- const beforeCount = temporalFacts.length;
2247
- const novel = temporalFacts.filter(f => !isDuplicate(f.content));
2248
- diagFingerprintDedups += beforeCount - novel.length;
2249
- if (novel.length > 0) {
2250
- const temporalBlock = novel
2251
- .map(f => {
2252
- const ts = new Date(f.occurredAt).toISOString().slice(0, 10);
2253
- const line = `[${ts}] ${f.content}`;
2254
- addFingerprint(f.content);
2255
- return line;
2256
- })
2257
- .join('\n');
2258
- const temporalSection = `## Temporal Context\n${temporalBlock}`;
2259
- const tempTokens = estimateTokens(temporalSection);
2260
- const tempBudget = Math.floor(remaining * 0.20);
2261
- if (tempTokens <= tempBudget) {
2262
- volatileContextParts.push(temporalSection);
2263
- contextTokens += tempTokens;
2264
- remaining -= tempTokens;
2265
- slots.facts = (slots.facts ?? 0) + tempTokens;
2266
- }
2267
- else {
2268
- const truncated = this.truncateToTokens(temporalSection, tempBudget);
2269
- const truncTokens = estimateTokens(truncated);
2270
- volatileContextParts.push(truncated);
2271
- contextTokens += truncTokens;
2272
- remaining -= truncTokens;
2273
- slots.facts = (slots.facts ?? 0) + truncTokens;
2274
- }
2275
- }
2276
- }
2277
- }
2278
- catch {
2279
- // Temporal index not yet available (migration pending) — skip silently
2280
- }
2281
- }
2282
- // ── Open-domain FTS retrieval (L4: Library) ──────────────────
2283
- // Fires when the query looks broad/exploratory with no topical anchor.
2284
- // Searches raw messages_fts — bypasses isQualityFact() quality gate so
2285
- // content filtered from library.db is still reachable for open-domain
2286
- // questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
2287
- if (request.includeSemanticRecall !== false && queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
2288
- try {
2289
- const rawOdResults = searchOpenDomain(db, queryText, '', 10);
2290
- const beforeOd = rawOdResults.length;
2291
- const odResults = rawOdResults.filter(r => !isDuplicate(r.content));
2292
- diagFingerprintDedups += beforeOd - odResults.length;
2293
- if (odResults.length > 0) {
2294
- const odBlock = odResults
2295
- .map(r => {
2296
- addFingerprint(r.content);
2297
- const ts = r.createdAt
2298
- ? new Date(r.createdAt).toISOString().slice(0, 10)
2299
- : '';
2300
- const prefix = ts ? `[${ts}] ` : '';
2301
- const snippet = r.content.length > 300
2302
- ? r.content.slice(0, 300) + '…'
2303
- : r.content;
2304
- return `${prefix}${snippet}`;
2305
- })
2306
- .join('\n');
2307
- const odSection = `## Open Domain Context\n${odBlock}`;
2308
- const odTokens = estimateTokens(odSection);
2309
- const odBudget = Math.floor(remaining * 0.20);
2310
- if (odTokens <= odBudget) {
2311
- volatileContextParts.push(odSection);
2312
- contextTokens += odTokens;
2313
- remaining -= odTokens;
2314
- slots.facts = (slots.facts ?? 0) + odTokens;
2315
- }
2316
- else {
2317
- const truncated = this.truncateToTokens(odSection, odBudget);
2318
- const truncTokens = estimateTokens(truncated);
2319
- volatileContextParts.push(truncated);
2320
- contextTokens += truncTokens;
2321
- remaining -= truncTokens;
2322
- slots.facts = (slots.facts ?? 0) + truncTokens;
2323
- }
2324
- }
2325
- }
2326
- catch {
2327
- // Open-domain FTS unavailable — skip silently
2328
- }
2329
- }
2330
- }
2331
- // ── Knowledge (L4: Library) ──────────────────────────────
2332
- // scope: agent — filtered by agent_id in the SQL query (existing behavior)
2333
- if (request.includeLibrary !== false && remaining > 500 && libDb) {
2334
- const knowledgeContent = this.buildKnowledgeFromDb(request.agentId, libDb);
2335
- if (knowledgeContent) {
2336
- const stableKnowledgeBlock = `## Knowledge\n${knowledgeContent}`;
2337
- const tokens = estimateTokens(stableKnowledgeBlock);
2338
- if (tokens <= remaining * 0.2) {
2339
- stablePrefixMessages.push({
2340
- role: 'system',
2341
- textContent: stableKnowledgeBlock,
2342
- toolCalls: null,
2343
- toolResults: null,
2344
- });
2345
- contextTokens += tokens;
2346
- remaining -= tokens;
2347
- slots.library += tokens;
2348
- }
2349
- else {
2350
- const truncated = this.truncateToTokens(stableKnowledgeBlock, Math.floor(remaining * 0.2));
2351
- const truncTokens = estimateTokens(truncated);
2352
- stablePrefixMessages.push({
2353
- role: 'system',
2354
- textContent: truncated,
2355
- toolCalls: null,
2356
- toolResults: null,
2357
- });
2358
- contextTokens += truncTokens;
2359
- remaining -= truncTokens;
2360
- slots.library += truncTokens;
2361
- warnings.push('Knowledge truncated to fit budget');
2362
- }
2363
- }
2364
- }
2365
- // ── Preferences (L4: Library) ────────────────────────────
2366
- // scope: agent — filtered by agent_id OR NULL in the SQL query (existing behavior)
2367
- if (request.includeLibrary !== false && remaining > 300 && libDb) {
2368
- const prefsContent = this.buildPreferencesFromDb(request.agentId, libDb);
2369
- if (prefsContent) {
2370
- const stablePrefsBlock = `## User Preferences\n${prefsContent}`;
2371
- const tokens = estimateTokens(stablePrefsBlock);
2372
- if (tokens <= remaining * 0.1) {
2373
- stablePrefixMessages.push({
2374
- role: 'system',
2375
- textContent: stablePrefsBlock,
2376
- toolCalls: null,
2377
- toolResults: null,
2378
- });
2379
- contextTokens += tokens;
2380
- remaining -= tokens;
2381
- slots.library += tokens;
2382
- }
2383
- }
2384
- }
2385
- // ── Semantic Recall (L3: Hybrid FTS5+KNN) ───────────────
2386
- // scope: agent — buildSemanticRecall filters by agentId internally
2387
- // Fires when either vector store or library DB is available.
2388
- // FTS5-only (no embeddings) still returns keyword matches.
2389
- // KNN-only (no FTS terms) still returns semantic matches.
2390
- // Both present → Reciprocal Rank Fusion.
2391
- // Use request.prompt as the retrieval query when available — it is the
2392
- // live current-turn text. Falling back to getLastUserMessage(messages)
2393
- // reads from the already-assembled history, which is one turn stale.
2394
- // 0.9.0: resolve adaptive lifecycle policy immediately before semantic recall
2395
- // so smartRecallMultiplier scales the recall token budget and candidate limit
2396
- // from the same policy object that compose diagnostics later report.
2397
- const composePreRecallPressure = computeUnifiedPressure(contextTokens, budget, PRESSURE_SOURCE.COMPOSE_PRE_RECALL);
2398
- const s09ComposePolicyPressure = s09ForkedContextSeed
2399
- && s09ObservedUserTurnCount === 0
2400
- && s09ForkedParentPressure != null
2401
- ? s09ForkedParentPressure
2402
- : composePreRecallPressure.fraction;
2403
- const composeLifecyclePolicy = resolveAdaptiveLifecyclePolicy({
2404
- pressureFraction: s09ComposePolicyPressure,
2405
- userTurnCount: s09ObservedUserTurnCount,
2406
- explicitNewSession: isExplicitNewSessionPrompt(request.prompt ?? this.getLastUserMessage(messages)),
2407
- forkedContext: Boolean(s09ForkedContextSeed),
2408
- forkedParentPressureFraction: s09ForkedParentPressure,
2409
- forkedParentUserTurnCount: s09ForkedContextSeed?.parentUserTurnCount,
2410
- });
2411
- const recallBreadth = scaleRecallBreadth(remaining, composeLifecyclePolicy.smartRecallMultiplier);
2412
- let diagAdaptiveRecallBudgetTokens;
2413
- let diagAdaptiveRecallCandidateLimit;
2414
- if (request.includeSemanticRecall !== false && remaining > 500 && (this.vectorStore || libDb)) {
2415
- const lastUserMsg = request.prompt?.trim() || this.getLastUserMessage(messages);
2416
- if (lastUserMsg) {
2417
- try {
2418
- // Check Redis for a pre-computed embedding from afterTurn()
2419
- let precomputedEmbedding;
2420
- try {
2421
- const cached = await this.cache.getQueryEmbedding(request.agentId, request.sessionKey);
2422
- if (cached)
2423
- precomputedEmbedding = cached;
2424
- }
2425
- catch {
2426
- // Redis lookup is best-effort — fall through to Ollama
2427
- }
2428
- diagAdaptiveRecallBudgetTokens = recallBreadth.mainBudgetTokens;
2429
- diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
2430
- const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId,
2431
- // 0.9.0: recall token budget = base 0.12 of remaining * lifecycle multiplier.
2432
- recallBreadth.mainBudgetTokens, libDb || undefined, precomputedEmbedding, contextFingerprints, // C2: skip results already in Active Facts
2433
- // Sprint 1: capture reranker telemetry at assemble level
2434
- (ev) => {
2435
- diagRerankerStatus = ev.status;
2436
- diagRerankerCandidates = ev.candidates;
2437
- diagRerankerProvider = ev.provider;
2438
- }, recallBreadth.candidateLimit);
2439
- if (semanticContent) {
2440
- const tokens = estimateTokens(semanticContent);
2441
- volatileContextParts.push(`## Related Memory\n${semanticContent}`);
2442
- contextTokens += tokens;
2443
- remaining -= tokens;
2444
- // Semantic recall draws from multiple sources, attribute to context
2445
- slots.context += tokens;
2446
- // W3 diagnostics: count non-empty lines as rough results count
2447
- diagSemanticResults = semanticContent.split('\n').filter(l => l.trim().length > 0).length;
2448
- }
2449
- }
2450
- catch (err) {
2451
- // Semantic search is best-effort — don't fail composition
2452
- warnings.push(`Semantic recall failed: ${err.message}`);
2453
- }
2454
- }
2455
- }
2456
- // ── Doc Chunks (L4: Trigger-based retrieval) ─────────────
2457
- // scope: per-tier/per-agent — queryChunks filters by agentId and tier
2458
- // Demand-load governance, identity, and memory chunks based on
2459
- // conversation context. Replaces full ACA file injection for
2460
- // the files that have been seeded into the doc chunk index.
2461
- let triggerFallbackUsed = false;
2462
- if (request.includeDocChunks !== false && remaining > 400 && libDb) {
2463
- // Use request.prompt when available (current-turn text, not stale history)
2464
- const lastMsg = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
2465
- const triggered = matchTriggers(lastMsg, this.triggerRegistry);
2466
- if (triggered.length > 0) {
2467
- diagTriggerHits = triggered.length;
2468
- diagRetrievalMode = 'triggered';
2469
- const docChunkStore = new DocChunkStore(libDb);
2470
- const docParts = [];
2471
- const maxTotalTriggerTokens = Math.min(remaining, this.config.maxTotalTriggerTokens && this.config.maxTotalTriggerTokens > 0
2472
- ? this.config.maxTotalTriggerTokens
2473
- : Math.floor(remaining * 0.40));
2474
- let totalTriggerTokens = 0;
2475
- for (const trigger of triggered) {
2476
- if (remaining < 200)
2477
- break;
2478
- const triggerBudgetRemaining = maxTotalTriggerTokens - totalTriggerTokens;
2479
- if (triggerBudgetRemaining < 200)
2480
- break;
2481
- const maxTokens = Math.min(trigger.maxTokens || 1000, Math.floor(remaining * 0.12), // No single collection takes > 12% of remaining (W4: was 0.15)
2482
- triggerBudgetRemaining);
2483
- try {
2484
- // Build a relevance-based FTS5 query from the user message.
2485
- //
2486
- // Problem: trigger keywords are stems ('escalat', 'irreversib') for
2487
- // substring matching against user messages, but FTS5 tokenizes on word
2488
- // boundaries. 'escalat' does not match 'escalation' in FTS5 without a
2489
- // prefix operator.
2490
- //
2491
- // Solution: extract actual words from the user message that contain a
2492
- // matched trigger keyword, then use FTS5 prefix queries (word*) for
2493
- // each extracted word. This bridges stem-matching and FTS5 indexing.
2494
- const msgLower = lastMsg.toLowerCase();
2495
- const matchedKeywords = trigger.keywords.filter(kw => msgLower.includes(kw.toLowerCase()));
2496
- // Extract whole words from the message that overlap with matched keywords
2497
- const msgWords = lastMsg.match(/\b\w{4,}\b/g) || [];
2498
- const relevantWords = msgWords.filter(word => matchedKeywords.some(kw => word.toLowerCase().includes(kw.toLowerCase()) ||
2499
- kw.toLowerCase().includes(word.toLowerCase().slice(0, 5))));
2500
- // Build FTS5 OR query: "word1* OR word2* OR word3*"
2501
- // FTS5 treats space-separated terms as AND by default — we want OR so
2502
- // that any relevant term is sufficient to retrieve a matching chunk.
2503
- // Prefix operator (*) ensures stems match full words in the index.
2504
- // Sort by keyword match specificity (longer matched keyword = more specific term),
2505
- // then cap at 6 terms to keep FTS queries reasonable.
2506
- // No positional slice — all relevant words participate, not just the first 3.
2507
- const sortedWords = [...new Set(relevantWords)].sort((a, b) => {
2508
- const aLen = Math.max(...matchedKeywords.filter(kw => a.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(a.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
2509
- const bLen = Math.max(...matchedKeywords.filter(kw => b.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(b.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
2510
- return bLen - aLen; // Most specific match first
2511
- });
2512
- // Sanitize FTS5 terms: quote each word, strip internal quotes, add prefix wildcard.
2513
- // Matches the pattern used in the keystone history FTS path.
2514
- const sanitizeFtsTerm = (w) => `"${w.replace(/"/g, '')}"*`;
2515
- const ftsTerms = sortedWords.length > 0
2516
- ? sortedWords.slice(0, 6).map(sanitizeFtsTerm).join(' OR ')
2517
- : matchedKeywords
2518
- .sort((a, b) => b.length - a.length)
2519
- .slice(0, 3)
2520
- .map(sanitizeFtsTerm)
2521
- .join(' OR ');
2522
- // Fallback uses raw message words — also sanitize to prevent FTS5 syntax errors.
2523
- const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3)
2524
- .map(sanitizeFtsTerm).join(' OR ');
2525
- const chunks = docChunkStore.queryChunks({
2526
- collection: trigger.collection,
2527
- agentId: request.agentId,
2528
- tier: request.tier,
2529
- limit: trigger.maxChunks || 3,
2530
- keyword: ftsKeyword,
2531
- });
2532
- if (chunks.length === 0)
2533
- continue;
2534
- const chunkLines = [];
2535
- let chunkTokens = 0;
2536
- for (const chunk of chunks) {
2537
- // Skip chunks from files OpenClaw already injects into the system prompt
2538
- const chunkBasename = chunk.sourcePath.split('/').pop() || '';
2539
- if (OPENCLAW_BOOTSTRAP_FILES.has(chunkBasename))
2540
- continue;
2541
- // C2: degrade oversized chunks to canonical artifact references before
2542
- // enforcing the per-collection budget gate. Otherwise an oversized raw
2543
- // chunk gets dropped before the tiny degraded ref ever has a chance to fit.
2544
- const c2ChunkRef = degradeOversizedDocChunk(chunk.id, chunk.sourcePath, chunk.content, c2ArtifactThresholdTokens);
2545
- const renderedChunk = c2ChunkRef !== null
2546
- ? `### ${chunk.sectionPath}\n${c2ChunkRef}`
2547
- : `### ${chunk.sectionPath}\n${chunk.content}`;
2548
- const renderedTokens = estimateTokens(renderedChunk);
2549
- if (chunkTokens + renderedTokens > maxTokens)
2550
- break;
2551
- chunkLines.push(renderedChunk);
2552
- chunkTokens += renderedTokens;
2553
- if (c2ChunkRef !== null)
2554
- c2ArtifactDegradations++;
2555
- }
2556
- if (chunkLines.length > 0) {
2557
- const collectionLabel = trigger.collection.split('/').pop() || trigger.collection;
2558
- docParts.push(`## ${collectionLabel} (retrieved)\n${chunkLines.join('\n\n')}`);
2559
- totalTriggerTokens += chunkTokens;
2560
- contextTokens += chunkTokens;
2561
- remaining -= chunkTokens;
2562
- slots.library += chunkTokens;
2563
- diagDocChunkCollections++;
2564
- }
2565
- }
2566
- catch {
2567
- // Doc chunk retrieval is best-effort — don't fail composition
2568
- }
2569
- }
2570
- if (docParts.length > 0) {
2571
- volatileContextParts.push(docParts.join('\n\n'));
2572
- }
2573
- }
2574
- else if (request.includeSemanticRecall !== false && remaining > 400 && (this.vectorStore || libDb)) {
2575
- // Trigger-miss fallback: no trigger fired — attempt bounded semantic retrieval
2576
- // so there is never a silent zero-memory path on doc chunks.
2577
- // INVARIANT: this block is mutually exclusive with triggered-retrieval above.
2578
- // If refactored to run both paths, cap combined semantic budget to avoid double-recall.
2579
- try {
2580
- // 0.9.0: trigger-miss fallback uses the same lifecycle-scaled breadth so
2581
- // a /new surge widens fallback recall and high/critical pressure narrows it.
2582
- if (diagAdaptiveRecallBudgetTokens === undefined) {
2583
- diagAdaptiveRecallBudgetTokens = recallBreadth.fallbackBudgetTokens;
2584
- diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
2585
- }
2586
- const fallbackContent = await Promise.race([
2587
- this.buildSemanticRecall(lastMsg, request.agentId, recallBreadth.fallbackBudgetTokens, libDb || undefined, undefined, contextFingerprints, // C2: skip results already in Active Facts
2588
- undefined, recallBreadth.candidateLimit),
2589
- new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
2590
- ]);
2591
- if (fallbackContent) {
2592
- volatileContextParts.push(`## Related Memory\n${fallbackContent}`);
2593
- const fallbackTokens = estimateTokens(fallbackContent);
2594
- contextTokens += fallbackTokens;
2595
- remaining -= fallbackTokens;
2596
- slots.context += fallbackTokens;
2597
- triggerFallbackUsed = true;
2598
- diagTriggerFallbackUsed = true;
2599
- diagRetrievalMode = 'fallback_knn';
2600
- }
2601
- }
2602
- catch {
2603
- // Fallback is best-effort — never fail composition (includes timeout)
2604
- }
2605
- }
2606
- }
2607
- // ── Session-Scoped Doc Chunks (spawn context inheritance) ────
2608
- // When parentSessionKey is set, retrieve ephemeral doc chunks indexed
2609
- // by buildSpawnContext() for this spawn session.
2610
- if (request.parentSessionKey && remaining > 200 && libDb) {
2611
- try {
2612
- const spawnChunkStore = new DocChunkStore(libDb);
2613
- const spawnQueryMsg = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
2614
- const spawnChunks = spawnChunkStore.queryDocChunks(request.agentId, spawnQueryMsg, { sessionKey: request.parentSessionKey, limit: 8 });
2615
- if (spawnChunks.length > 0) {
2616
- const spawnLines = [];
2617
- let spawnTokens = 0;
2618
- const maxSpawnTokens = Math.floor(remaining * 0.15);
2619
- for (const chunk of spawnChunks) {
2620
- // C2: degrade oversized spawn chunks before enforcing the lane budget,
2621
- // so a bounded reference can fit even when the raw chunk cannot.
2622
- const c2SpawnRef = degradeOversizedDocChunk(chunk.id, chunk.sourcePath, chunk.content, c2ArtifactThresholdTokens);
2623
- const renderedChunk = c2SpawnRef ?? chunk.content;
2624
- const renderedTokens = estimateTokens(renderedChunk);
2625
- if (spawnTokens + renderedTokens > maxSpawnTokens)
2626
- break;
2627
- spawnLines.push(renderedChunk);
2628
- spawnTokens += renderedTokens;
2629
- if (c2SpawnRef !== null)
2630
- c2ArtifactDegradations++;
2631
- }
2632
- if (spawnLines.length > 0) {
2633
- volatileContextParts.push(`## Spawn Context Documents\n${spawnLines.join('\n\n')}`);
2634
- contextTokens += spawnTokens;
2635
- remaining -= spawnTokens;
2636
- slots.library += spawnTokens;
2637
- }
2638
- }
2639
- }
2640
- catch {
2641
- // Session-scoped chunk retrieval is best-effort
2642
- }
2643
- }
2644
- // ── Cross-Session Context (L2: Messages) ─────────────────
2645
- if (request.includeContext !== false && remaining > 500) {
2646
- const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb, contextFingerprints // C3: skip entries already in facts/semantic recall
2647
- );
2648
- if (crossSessionContent) {
2649
- const tokens = estimateTokens(crossSessionContent);
2650
- const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
2651
- if (tokens <= maxContextTokens) {
2652
- volatileContextParts.push(`## Other Active Sessions\n${crossSessionContent}`);
2653
- contextTokens += tokens;
2654
- remaining -= tokens;
2655
- slots.context += tokens;
2656
- }
2657
- else {
2658
- const truncated = this.truncateToTokens(crossSessionContent, maxContextTokens);
2659
- const truncTokens = estimateTokens(truncated);
2660
- volatileContextParts.push(`## Other Active Sessions (truncated)\n${truncated}`);
2661
- contextTokens += truncTokens;
2662
- remaining -= truncTokens;
2663
- slots.context += truncTokens;
2664
- warnings.push('Cross-session context truncated');
2665
- }
2666
- }
2667
- }
2668
- // ── Action Verification Summary ─────────────────────────
2669
- // Keep recent action history on the dynamic side of the cache boundary.
2670
- if (remaining > 50 && request.includeLibrary !== false) {
2671
- const pressurePct = budget > 0 ? Math.round(((budget - remaining) / budget) * 100) : 0;
2672
- const actionSummary = buildActionVerificationSummary(messages, pressurePct);
2673
- if (actionSummary) {
2674
- const actionTokens = Math.ceil(actionSummary.length / 4);
2675
- if (actionTokens <= remaining) {
2676
- volatileContextParts.push(actionSummary);
2677
- contextTokens += actionTokens;
2678
- remaining -= actionTokens;
2679
- slots.context += actionTokens;
2680
- }
2681
- }
2682
- }
2683
- const firstNonSystem = messages.findIndex(m => m.role !== 'system');
2684
- const stableInsertIdx = firstNonSystem === -1 ? messages.length : firstNonSystem;
2685
- if (stablePrefixMessages.length > 0) {
2686
- messages.splice(stableInsertIdx, 0, ...stablePrefixMessages);
2687
- }
2688
- // ── Inject assembled context block ──────────────────────
2689
- // Sprint 4: Prompt-tail placement.
2690
- // Volatile context (active facts, temporal, open-domain, semantic recall,
2691
- // doc chunks, cross-session) moves AFTER all history messages so that
2692
- // query-shaped material lands near the user turn rather than buried mid-prompt.
2693
- //
2694
- // Layout after Sprint 4:
2695
- // [stable prefix: system, identity, FOS/MOD, stable facts, knowledge, prefs]
2696
- // [history: keystones, cross-topic, recent conversation messages]
2697
- // [volatile context block ← here, at the tail] ← Sprint 4 reorder
2698
- // [last user message]
2699
- //
2700
- // The cache boundary (dynamicBoundary: true) stays on this block so the
2701
- // Anthropic/OpenAI cache-prefix logic still fires correctly — everything
2702
- // ABOVE this message is the stable prefix eligible for caching.
2703
- const assembledContextBlock = volatileContextParts.length > 0 ? volatileContextParts.join('\n\n') : undefined;
2704
- let s4VolatileContextPosition;
2705
- let s4MessagesBeforeVolatile;
2706
- if (assembledContextBlock) {
2707
- const contextMsg = {
2708
- role: 'system',
2709
- textContent: assembledContextBlock,
2710
- toolCalls: null,
2711
- toolResults: null,
2712
- // CACHE_PREFIX_BOUNDARY_SLOT: this message starts the volatile side of the
2713
- // prompt. Everything above it is stable-prefix material eligible for reuse;
2714
- // everything at or below it is per-session / per-turn context.
2715
- metadata: { dynamicBoundary: true, cacheBoundarySlot: CACHE_PREFIX_BOUNDARY_SLOT },
2716
- };
2717
- // Sprint 4: Insert at tail (end of messages array), AFTER history.
2718
- // The last user message (if any) should remain the final message, so we
2719
- // insert the volatile block just before the last user message.
2720
- const lastMsgIdx = messages.length - 1;
2721
- const lastMsg = lastMsgIdx >= 0 ? messages[lastMsgIdx] : undefined;
2722
- if (lastMsg && lastMsg.role === 'user') {
2723
- // Insert volatile block before the last user message so user turn stays last
2724
- messages.splice(lastMsgIdx, 0, contextMsg);
2725
- s4VolatileContextPosition = lastMsgIdx;
2726
- s4MessagesBeforeVolatile = lastMsgIdx;
2727
- }
2728
- else {
2729
- // No trailing user message — append at end
2730
- messages.push(contextMsg);
2731
- s4VolatileContextPosition = messages.length - 1;
2732
- s4MessagesBeforeVolatile = messages.length - 1;
2733
- }
2734
- }
2735
- const stablePrefix = getStablePrefixMessages(messages);
2736
- const prefixSegmentCount = stablePrefix.length;
2737
- const prefixTokens = stablePrefix.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2738
- const volatileHistoryTokens = messages.slice(prefixSegmentCount)
2739
- .reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2740
- const prefixHash = computeStablePrefixHash(stablePrefix);
2741
- // ─── Safety Valve: Post-Assembly Budget Check (C1-aware) ──────────────
2742
- // Re-estimate total tokens after all slots are assembled. If the
2743
- // composition exceeds tokenBudget * 1.05 (5% tolerance for estimation
2744
- // drift), trim history messages from the oldest until we're under budget.
2745
- // History is the most compressible slot — system/identity are never
2746
- // truncated, and context (facts/recall/episodes) is more valuable per-token.
2747
- //
2748
- // C1: When an assistant message with toolCalls is ejected, its dependent
2749
- // tool-result messages are co-ejected or stubbed via resolveToolChainEjections.
2750
- // This ensures no orphaned tool-results survive above the stable-prefix
2751
- // boundary and eliminates the downstream repairToolPairs cleanup cost.
2752
- const estimatedTotal = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2753
- const hardCeiling = Math.floor(budget * 1.05);
2754
- if (estimatedTotal > hardCeiling) {
2755
- const overage = estimatedTotal - budget;
2756
- let trimmed = 0;
2757
- let trimCount = 0;
2758
- // Collect indices of messages to eject before mutating the array.
2759
- // Walk forward from the first non-system message, trimming oldest first.
2760
- // Sprint 4: Skip the volatile context block (dynamicBoundary: true) — it
2761
- // is query-shaped content that should not be evicted during the safety
2762
- // valve pass. The stable prefix system messages are also protected (role=system).
2763
- const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
2764
- const ejectIndices = new Set();
2765
- if (firstNonSystemIdx >= 0) {
2766
- let i = firstNonSystemIdx;
2767
- while (i < messages.length && trimmed < overage) {
2768
- // Don't trim the last user message (current prompt).
2769
- if (i === messages.length - 1 && messages[i].role === 'user')
2770
- break;
2771
- // Sprint 4: Don't trim the volatile context block (dynamicBoundary marker).
2772
- const meta = messages[i].metadata;
2773
- if (meta?.dynamicBoundary) {
2774
- i++;
2775
- continue;
2776
- }
2777
- const msgTokens = estimateMessageTokens(messages[i]);
2778
- ejectIndices.add(i);
2779
- trimmed += msgTokens;
2780
- trimCount++;
2781
- i++;
2782
- }
2783
- }
2784
- if (ejectIndices.size > 0) {
2785
- // C1: centralized ejection — resolves dependent tool-results atomically.
2786
- const ejectionResult = resolveToolChainEjections(messages, ejectIndices, 'eviction_oversize');
2787
- // Replace in-place so the rest of the compose path sees the clean array.
2788
- messages.length = 0;
2789
- messages.push(...ejectionResult.messages);
2790
- c1CoEjections += ejectionResult.coEjections;
2791
- c1StubReplacements += ejectionResult.stubReplacements;
2792
- slots.history = Math.max(0, slots.history - trimmed);
2793
- remaining += trimmed;
2794
- const c1Note = (ejectionResult.coEjections + ejectionResult.stubReplacements > 0)
2795
- ? ` [C1: ${ejectionResult.coEjections} co-ejected, ${ejectionResult.stubReplacements} stubbed]`
2796
- : '';
2797
- warnings.push(`Safety valve: trimmed ${trimCount} oldest history messages (${trimmed} tokens) to fit budget${c1Note}`);
2798
- }
2799
- }
2800
- // ─── Sprint 2.1: Hydrate active-turn artifact stubs ────────────────────
2801
- // Must run on NeutralMessages[] BEFORE provider translation.
2802
- const hydrationResult = this.hydrateActiveTurnArtifacts(messages, db);
2803
- // ─── Translate to provider format (unless caller wants neutral) ───
2804
- // When skipProviderTranslation is set, return NeutralMessages directly.
2805
- // The context engine plugin uses this: the OpenClaw runtime handles its
2806
- // own provider translation, so double-translating corrupts tool calls.
2807
- const outputMessages = toComposeOutputMessages(messages);
2808
- // T1.3: Strip warm-replay provenance flags before output.
2809
- // _warmed is an internal tag added by warmSession() to mark messages
2810
- // seeded from SQLite into Redis. It must not leak into provider submissions
2811
- // or be visible to the runtime (which might misinterpret it).
2812
- for (const msg of outputMessages) {
2813
- const m = msg;
2814
- if (m.metadata && m.metadata._warmed) {
2815
- const { _warmed, ...cleanMeta } = m.metadata;
2816
- m.metadata = Object.keys(cleanMeta).length > 0 ? cleanMeta : undefined;
2817
- }
2818
- }
2819
- const totalTokens = budget - remaining;
2820
- // Sprint 3: Unified pressure signal — compose path
2821
- const s3Pressure = computeUnifiedPressure(totalTokens, budget, PRESSURE_SOURCE.COMPOSE_POST_ASSEMBLY);
2822
- // ─── Slot reconciliation ─────────────────────────────────────────────────
2823
- // totalTokens = budget - remaining is the authoritative spend figure.
2824
- // The slot accounting can drift from this due to history trim (which
2825
- // reduces slots.history but adds back to remaining after the budget
2826
- // was already committed) and FOS/MOD token rounding.
2827
- // Reconcile: assign any unaccounted tokens to slots.history so that
2828
- // sum(slots) === totalTokens always holds.
2829
- {
2830
- const slotSum = (slots.system ?? 0) + (slots.identity ?? 0) +
2831
- (slots.history ?? 0) + (slots.facts ?? 0) +
2832
- (slots.context ?? 0) + (slots.library ?? 0);
2833
- const delta = totalTokens - slotSum;
2834
- if (delta !== 0) {
2835
- slots.history = (slots.history ?? 0) + delta;
2836
- }
2837
- }
2838
- // ─── Compaction Fence Update ──────────────────────────────
2839
- // Record the oldest message ID that the LLM can see in this compose
2840
- // cycle. Everything below this ID becomes eligible for compaction.
2841
- // If history was included, query the DB for the oldest included message.
2842
- //
2843
- // Sprint 1: Capture compaction eligibility counts BEFORE updating the fence
2844
- // so we can report how many messages were eligible at the start of this pass.
2845
- let diagCompactionEligibleCount;
2846
- let diagCompactionEligibleRatio;
2847
- let diagCompactionProcessedCount;
2848
- if (request.includeHistory !== false && slots.history > 0) {
2849
- try {
2850
- const conversation = store.getConversation(request.sessionKey);
2851
- if (conversation) {
2852
- // Sprint 1: read eligibility BEFORE advancing the fence
2853
- try {
2854
- ensureCompactionFenceSchema(db);
2855
- const eligibilityBefore = getCompactionEligibility(db, conversation.id);
2856
- if (eligibilityBefore.fence !== null) {
2857
- // Total messages below fence (denominator for ratio)
2858
- const totalRow = db.prepare('SELECT COUNT(*) AS cnt FROM messages WHERE conversation_id = ?').get(conversation.id);
2859
- const totalMessages = totalRow?.cnt ?? 0;
2860
- diagCompactionEligibleCount = eligibilityBefore.eligibleCount;
2861
- diagCompactionEligibleRatio = totalMessages > 0
2862
- ? Math.round((eligibilityBefore.eligibleCount / totalMessages) * 1000) / 1000
2863
- : 0;
2864
- }
2865
- }
2866
- catch {
2867
- // Eligibility query is best-effort
2868
- }
2869
- // The compositor included N history messages (after truncation).
2870
- // Count how many non-system messages are in the output to determine
2871
- // how far back we reached.
2872
- const historyMsgCount = messages.filter(m => m.role !== 'system').length;
2873
- if (historyMsgCount > 0) {
2874
- // Get the oldest message we would have included.
2875
- // getRecentMessages returns the last N in chronological order,
2876
- // so the first element is the oldest included.
2877
- const oldestIncluded = db.prepare(`
2878
- SELECT id FROM messages
2879
- WHERE conversation_id = ?
2880
- ORDER BY message_index DESC
2881
- LIMIT 1 OFFSET ?
2882
- `).get(conversation.id, historyMsgCount - 1);
2883
- if (oldestIncluded) {
2884
- updateCompactionFence(db, conversation.id, oldestIncluded.id, { minTailMessages: 8 });
2885
- // Sprint 1: count how many messages moved from eligible -> fence-protected
2886
- // (i.e. they are now above the updated fence)
2887
- try {
2888
- const eligibilityAfter = getCompactionEligibility(db, conversation.id);
2889
- if (diagCompactionEligibleCount !== undefined) {
2890
- diagCompactionProcessedCount = Math.max(0, diagCompactionEligibleCount - eligibilityAfter.eligibleCount);
2891
- }
2892
- }
2893
- catch {
2894
- // After-eligibility query is best-effort
2895
- }
2896
- }
2897
- }
2898
- }
2899
- }
2900
- catch {
2901
- // Fence update is best-effort — never fail composition
2902
- warnings.push('Compaction fence update failed (non-fatal)');
2903
- }
2904
- }
2905
- // W3: Build compose diagnostics
2906
- let zeroResultReason;
2907
- if (volatileContextParts.length === 0 && stablePrefixMessages.length === 0) {
2908
- if (diagScopeFiltered > 0 && diagFactsIncluded === 0 && diagSemanticResults === 0) {
2909
- zeroResultReason = 'scope_filtered_all';
2910
- }
2911
- else if (remaining <= 0) {
2912
- zeroResultReason = 'budget_exhausted';
2913
- }
2914
- else if (diagTriggerHits === 0 && !diagTriggerFallbackUsed) {
2915
- zeroResultReason = 'no_trigger_no_fallback';
2916
- }
2917
- else if ((diagTriggerHits > 0 || diagTriggerFallbackUsed) && diagFactsIncluded === 0 && diagSemanticResults === 0 && diagDocChunkCollections === 0) {
2918
- // Retrieval was attempted (trigger fired or fallback ran) but returned nothing — likely a retrieval bug
2919
- // rather than a genuinely empty corpus. Distinguish from 'empty_corpus' for observability.
2920
- zeroResultReason = 'unknown';
2921
- }
2922
- else {
2923
- zeroResultReason = 'empty_corpus';
2924
- }
2925
- }
2926
- // ── Sprint 4: Explicit budget lanes ───────────────────────────────────────────────
2927
- // Compute allocated token lanes for this compose pass.
2928
- // Budget = effective input budget (post-reserve).
2929
- // Filled values reflect actual spend after slot fill and safety-valve trim.
2930
- const s4HistoryLane = Math.floor(budget * b4HistoryFraction);
2931
- const s4MemoryLane = Math.floor(budget * b4MemoryFraction);
2932
- const s4StableFilledTokens = (slots.system ?? 0) + (slots.identity ?? 0);
2933
- const s4HistoryFilledTokens = slots.history ?? 0;
2934
- const s4MemoryFilledTokens = (slots.facts ?? 0) + (slots.context ?? 0) + (slots.library ?? 0);
2935
- const s4TotalFilled = s4StableFilledTokens + s4HistoryFilledTokens + s4MemoryFilledTokens;
2936
- const budgetLanes = {
2937
- effectiveBudget: budget,
2938
- stablePrefix: slots.system + slots.identity,
2939
- history: s4HistoryLane,
2940
- memory: s4MemoryLane,
2941
- historyFraction: b4HistoryFraction,
2942
- memoryFraction: b4MemoryFraction,
2943
- overhead: Math.max(0, budget - s4TotalFilled),
2944
- filled: {
2945
- stablePrefix: s4StableFilledTokens,
2946
- history: s4HistoryFilledTokens,
2947
- memory: s4MemoryFilledTokens,
2948
- },
2949
- };
2950
- // ── Sprint 4: OpenAI prefix-cache diagnostics ────────────────────────────────────
2951
- // Expose prefix-boundary information for OpenAI providers so operators
2952
- // can tune prompt layout for cache hit rate without guesswork.
2953
- // Non-fatal — never block compose.
2954
- let openaiPrefixCacheDiag;
2955
- try {
2956
- const s4Provider = s4DetectProvider(request.provider ?? request.model);
2957
- if (s4Provider === 'openai' || s4Provider === 'openai-responses') {
2958
- const totalWindowTokens = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2959
- const cacheableFraction = totalWindowTokens > 0
2960
- ? Math.round((prefixTokens / totalWindowTokens) * 1000) / 1000
2961
- : 0;
2962
- // Sprint 4: volatileAtTail is true when the volatile context block is
2963
- // positioned AFTER any history (or, vacuously, when no history exists and
2964
- // the block sits just before the final user turn). In both cases nothing
2965
- // but the current user message follows the boundary, which is the
2966
- // cacheable layout. When assembledContextBlock is missing we report
2967
- // false since there is nothing to place at tail.
2968
- let s4VolatileAtTail = false;
2969
- if (s4VolatileContextPosition !== undefined) {
2970
- // Any messages after the boundary must be user turns only (no history).
2971
- const tail = messages.slice(s4VolatileContextPosition + 1);
2972
- s4VolatileAtTail = tail.every(m => m.role === 'user')
2973
- && s4VolatileContextPosition >= prefixSegmentCount;
2974
- }
2975
- openaiPrefixCacheDiag = {
2976
- stablePrefixMessageCount: prefixSegmentCount,
2977
- stablePrefixTokens: prefixTokens,
2978
- volatileAtTail: s4VolatileAtTail,
2979
- cacheableFraction,
2980
- prefixHash,
2981
- };
2982
- }
2983
- }
2984
- catch {
2985
- // Provider detection is best-effort — never block compose
2986
- }
2987
- // 0.9.0: lifecycle policy was resolved pre-recall and used to scale recall
2988
- // breadth. Diagnostics surface the same object so reported band/multiplier
2989
- // matches what actually controlled retrieval this compose pass.
2990
- const diagnostics = {
2991
- triggerHits: diagTriggerHits,
2992
- triggerFallbackUsed: diagTriggerFallbackUsed,
2993
- factsIncluded: diagFactsIncluded,
2994
- semanticResultsIncluded: diagSemanticResults,
2995
- docChunksCollections: diagDocChunkCollections,
2996
- scopeFiltered: diagScopeFiltered,
2997
- zeroResultReason,
2998
- retrievalMode: diagRetrievalMode,
2999
- crossTopicKeystones: diagCrossTopicKeystones,
3000
- reserveFraction: dynamicReserve,
3001
- avgTurnCostTokens: avgTurnCost,
3002
- dynamicReserveActive: isDynamic,
3003
- sessionPressureHigh: pressureHigh,
3004
- fingerprintDedups: diagFingerprintDedups,
3005
- fingerprintCollisions: diagFingerprintCollisions,
3006
- windowCacheHit: false,
3007
- prefixSegmentCount,
3008
- prefixTokens,
3009
- prefixHash,
3010
- // B2: Surface the previous cached prefixHash when this full compose was
3011
- // triggered by a cache bypass (stable-prefix mutation detected).
3012
- prevPrefixHash: _prevPrefixHashFromBypass,
3013
- volatileHistoryTokens,
3014
- // Sprint 4 fields
3015
- sessionType: s4SessionType,
3016
- historyDepthChosen: s4EffectiveDepth,
3017
- estimatedMsgDensityTokens: s4ObservedDensity,
3018
- rescueTrimFired: s4RescueTrimFired,
3019
- // Sprint 4: prompt-tail placement diagnostics
3020
- budgetLanes,
3021
- volatileContextPosition: s4VolatileContextPosition,
3022
- messagesBeforeVolatile: s4MessagesBeforeVolatile,
3023
- openaiPrefixCacheDiag,
3024
- // Sprint 3: unified pressure signal
3025
- sessionPressureFraction: s3Pressure.fraction,
3026
- pressureSource: s3Pressure.source,
3027
- // B4: model-aware lane budget diagnostics
3028
- mecwProfile: b4MecwProfile,
3029
- mecwApplied: b4MecwApplied,
3030
- mecwBlend: b4MecwBlend,
3031
- effectiveHistoryFraction: b4HistoryFraction,
3032
- effectiveMemoryFraction: b4MemoryFraction,
3033
- trimSoftTarget: TRIM_BUDGET_POLICY.trimSoftTarget,
3034
- trimGrowthThreshold: TRIM_BUDGET_POLICY.trimGrowthThreshold,
3035
- trimHeadroomFraction: TRIM_BUDGET_POLICY.trimHeadroomFraction,
3036
- // 0.9.0: adaptive lifecycle diagnostics for compose.preRecall
3037
- adaptiveLifecycleBand: composeLifecyclePolicy.band,
3038
- adaptiveLifecyclePressurePct: composeLifecyclePolicy.pressurePct,
3039
- adaptiveWarmHistoryBudgetFraction: composeLifecyclePolicy.warmHistoryBudgetFraction,
3040
- adaptiveSmartRecallMultiplier: composeLifecyclePolicy.smartRecallMultiplier,
3041
- adaptiveTrimSoftTarget: composeLifecyclePolicy.trimSoftTarget,
3042
- adaptiveCompactionTargetFraction: composeLifecyclePolicy.compactionTargetFraction,
3043
- adaptiveBreadcrumbPackage: composeLifecyclePolicy.emitBreadcrumbPackage,
3044
- adaptiveTopicCentroidEviction: composeLifecyclePolicy.enableTopicCentroidEviction,
3045
- adaptiveProactiveCompaction: composeLifecyclePolicy.triggerProactiveCompaction,
3046
- adaptiveLifecycleReasons: composeLifecyclePolicy.reasons,
3047
- adaptiveRecallBudgetTokens: diagAdaptiveRecallBudgetTokens,
3048
- adaptiveRecallCandidateLimit: diagAdaptiveRecallCandidateLimit,
3049
- adaptiveEvictionLifecycleBand: evictionLifecyclePolicy.band,
3050
- adaptiveEvictionPressurePct: evictionLifecyclePolicy.pressurePct,
3051
- adaptiveEvictionTopicAwareEligibleClusters,
3052
- adaptiveEvictionTopicAwareDroppedClusters,
3053
- adaptiveEvictionProtectedClusters,
3054
- adaptiveEvictionTopicIdCoveragePct,
3055
- adaptiveEvictionBypassReason,
3056
- composeTopicSource,
3057
- composeTopicState,
3058
- composeTopicMessageCount,
3059
- composeTopicStampedMessageCount,
3060
- composeTopicTelemetryStatus: 'emitted',
3061
- adaptiveLifecycleBandDiverged: evictionLifecyclePolicy.band !== composeLifecyclePolicy.band,
3062
- adaptiveForkedContext: s09ForkedContextSeed ? true : undefined,
3063
- adaptiveForkedParentPressurePct: s09ForkedParentPressure != null
3064
- ? Math.round(s09ForkedParentPressure * 100)
3065
- : undefined,
3066
- adaptiveForkedParentUserTurns: s09ForkedContextSeed?.parentUserTurnCount,
3067
- // C1: tool-chain ejection telemetry
3068
- toolChainCoEjections: c1CoEjections > 0 ? c1CoEjections : undefined,
3069
- toolChainStubReplacements: c1StubReplacements > 0 ? c1StubReplacements : undefined,
3070
- // C2: artifact oversize degradation telemetry
3071
- artifactDegradations: c2ArtifactDegradations > 0 ? c2ArtifactDegradations : undefined,
3072
- artifactOversizeThresholdTokens: c2ArtifactThresholdTokens,
3073
- // Sprint 2.1: tool artifact hydration telemetry
3074
- artifactsHydrated: hydrationResult.artifactsHydrated > 0 ? hydrationResult.artifactsHydrated : undefined,
3075
- hydrationBytes: hydrationResult.hydrationBytes > 0 ? hydrationResult.hydrationBytes : undefined,
3076
- hydrationMisses: hydrationResult.hydrationMisses > 0 ? hydrationResult.hydrationMisses : undefined,
3077
- // Sprint 1: observability layer
3078
- rerankerStatus: diagRerankerStatus,
3079
- rerankerCandidates: diagRerankerCandidates,
3080
- rerankerProvider: diagRerankerProvider,
3081
- // Sprint 1: named slot spans (allocated vs filled, overflow flag)
3082
- slotSpans: {
3083
- system: { allocated: slots.system, filled: slots.system, overflow: false },
3084
- identity: { allocated: slots.identity, filled: slots.identity, overflow: false },
3085
- history: { allocated: Math.floor(budget * b4HistoryFraction), filled: slots.history, overflow: slots.history > Math.floor(budget * b4HistoryFraction) },
3086
- facts: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.facts, overflow: false },
3087
- context: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.context, overflow: false },
3088
- library: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.library, overflow: false },
3089
- },
3090
- // Sprint 1: compaction eligibility
3091
- compactionEligibleCount: diagCompactionEligibleCount,
3092
- compactionEligibleRatio: diagCompactionEligibleRatio,
3093
- compactionProcessedCount: diagCompactionProcessedCount,
3094
- };
3095
- if (pressureHigh) {
3096
- warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
3097
- }
3098
- else if (dynamicReserve > 0.40) {
3099
- console.info(`[hypermem:compositor] dynamic_reserve=${Math.round(dynamicReserve * 100)}% avg_turn_cost=${Math.round(avgTurnCost / 1000)}k horizon=${this.config.dynamicReserveTurnHorizon ?? 5}`);
3100
- }
3101
- const composedAt = new Date().toISOString();
3102
- // ─── Write Window Cache ─────────────────────────────
3103
- // Cache the composed message array so the plugin can serve it directly
3104
- // on the next assemble() call without re-running the full compose pipeline.
3105
- // Short TTL (120s). External L4 mutations should set skipWindowCache=true.
3106
- //
3107
- // VS-1: Dual-write, session-scoped key for backwards compat;
3108
- // topic-scoped key for per-topic window retrieval when activeTopicId is set.
3109
- try {
3110
- // B2: Compute a cheap prefix input hash from the system + identity slot
3111
- // contents that fed the stable prefix. Stored in WindowCacheMeta so the
3112
- // C4 fast-exit can detect prefix mutations without re-running full compose.
3113
- const _prefixInputHash = createHash('sha256')
3114
- .update(systemContent ?? '')
3115
- .update('\n␞\n')
3116
- .update(identityContent ?? '')
3117
- .digest('hex');
3118
- await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
3119
- await this.cache.setWindowMeta(request.agentId, request.sessionKey, {
3120
- slots: slots,
3121
- totalTokens,
3122
- warnings,
3123
- diagnostics,
3124
- composedAt,
3125
- prefixHash,
3126
- prefixInputHash: _prefixInputHash,
3127
- }, 120);
3128
- }
3129
- catch {
3130
- // Window cache write is best-effort
3131
- }
3132
- if (composedActiveTopicId) {
3133
- try {
3134
- await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
3135
- }
3136
- catch {
3137
- // Topic window write is best-effort
3138
- }
3139
- }
3140
- // ─── Write Session Cursor ─────────────────────────────────
3141
- // Record the newest message included in the submission window.
3142
- // Background indexer uses this to find unprocessed high-signal content.
3143
- if (request.includeHistory !== false && slots.history > 0) {
3144
- try {
3145
- const historyMsgs = messages.filter(m => m.role !== 'system');
3146
- const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
3147
- if (lastHistoryMsg) {
3148
- const sm = lastHistoryMsg;
3149
- if (sm.id != null && sm.messageIndex != null) {
3150
- const cursor = {
3151
- lastSentId: sm.id,
3152
- lastSentIndex: sm.messageIndex,
3153
- lastSentAt: composedAt,
3154
- windowSize: historyMsgs.length,
3155
- tokenCount: totalTokens,
3156
- };
3157
- await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
3158
- try {
3159
- db.prepare(`
3160
- UPDATE conversations
3161
- SET cursor_last_sent_id = ?,
3162
- cursor_last_sent_index = ?,
3163
- cursor_last_sent_at = ?,
3164
- cursor_window_size = ?,
3165
- cursor_token_count = ?
3166
- WHERE session_key = ?
3167
- `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
3168
- }
3169
- catch {
3170
- // SQLite cursor write is best-effort, don't block compose
3171
- }
3172
- }
3173
- }
3174
- }
3175
- catch {
3176
- // Cursor write is best-effort
3177
- }
3178
- }
3179
- try {
3180
- const conversation = sampleConv ?? store.getConversation(request.sessionKey);
3181
- if (conversation) {
3182
- const snapshotContext = getOrCreateActiveContext(db, request.agentId, request.sessionKey, conversation.id);
3183
- const repairNoticeContent = await this.cache.getSlot(request.agentId, request.sessionKey, 'repair_notice');
3184
- insertCompositionSnapshot(db, {
3185
- contextId: snapshotContext.id,
3186
- headMessageId: snapshotContext.headMessageId ?? null,
3187
- model: request.model ?? request.provider ?? 'unknown',
3188
- contextWindow: totalWindow,
3189
- totalTokens,
3190
- fillPct: totalWindow > 0 ? Math.round((totalTokens / totalWindow) * 10000) / 10000 : 0,
3191
- snapshotKind: 'composed_window',
3192
- repairDepth: repairNoticeContent ? MAX_WARM_RESTORE_REPAIR_DEPTH : 0,
3193
- slots: buildCompositionSnapshotSlots({
3194
- system: systemContent,
3195
- identity: identityContent,
3196
- repairNotice: repairNoticeContent,
3197
- messages,
3198
- contextBlock: assembledContextBlock,
3199
- }),
3200
- });
3201
- }
3202
- }
3203
- catch (error) {
3204
- console.warn(`[hypermem:compositor] composition snapshot write skipped: ${error.message}`);
3205
- }
3206
- console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones} c2_degradations=${c2ArtifactDegradations} c2_threshold=${c2ArtifactThresholdTokens}`);
3207
- return {
3208
- messages: outputMessages,
3209
- tokenCount: totalTokens,
3210
- slots,
3211
- truncated: remaining < 0 || estimatedTotal > hardCeiling,
3212
- hasWarnings: warnings.length > 0,
3213
- warnings,
3214
- contextBlock: assembledContextBlock,
3215
- diagnostics,
3216
- };
3217
- }
3218
- /**
3219
- * Warm a session from SQLite into Redis.
3220
- * Called on session start or Redis cache miss.
3221
- */
3222
- async warmSession(agentId, sessionKey, db, opts) {
3223
- const store = new MessageStore(db);
3224
- const conversation = store.getConversation(sessionKey);
3225
- if (!conversation)
3226
- return;
3227
- // Phase 3 (Turn DAG): resolve active context for DAG-native warm preload.
3228
- // Uses context.head_message_id to walk only the active branch.
3229
- let activeContext = null;
3230
- try {
3231
- activeContext = getOrCreateActiveContext(db, agentId, sessionKey, conversation.id);
3232
- }
3233
- catch {
3234
- try {
3235
- activeContext = getActiveContext(db, agentId, sessionKey);
3236
- }
3237
- catch {
3238
- // Context resolution is best-effort
3239
- }
3240
- }
3241
- // Phase 0 fence enforcement: resolve compaction fence for warm bootstrap.
3242
- // Fence remains as transitional safety — primary scoping is via DAG walk.
3243
- let warmFenceMessageId;
3244
- try {
3245
- ensureCompactionFenceSchema(db);
3246
- const fence = getCompactionFence(db, conversation.id);
3247
- if (fence)
3248
- warmFenceMessageId = fence.fenceMessageId;
3249
- }
3250
- catch {
3251
- // Fence lookup is best-effort
3252
- }
3253
- const warmMeta = {
3254
- agentId,
3255
- sessionKey,
3256
- provider: conversation.provider,
3257
- model: conversation.model,
3258
- channelType: conversation.channelType,
3259
- tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
3260
- lastActive: conversation.updatedAt,
3261
- status: conversation.status,
3262
- };
3263
- if (activeContext) {
3264
- const warnSnapshotVerifyFallback = (reason, detail) => {
3265
- const detailSuffix = detail ? ` ${detail}` : '';
3266
- console.warn(`[hypermem:compositor] warm snapshot verify fallback session=${sessionKey} reason=${reason} verify_fallback_count=1 cold_rewarm_count=1${detailSuffix}`);
3267
- };
3268
- try {
3269
- const snapshotCandidates = listCompositionSnapshots(db, activeContext.id, 2);
3270
- const latestSnapshot = getLatestValidCompositionSnapshot(db, activeContext.id);
3271
- if (latestSnapshot?.verification.slots) {
3272
- const targetModel = opts?.model ?? conversation.model ?? 'unknown';
3273
- const sourceModel = latestSnapshot.snapshot.model;
3274
- const sourceProvider = s4DetectProvider(sourceModel);
3275
- const targetProvider = s4DetectProvider(conversation.provider ?? targetModel);
3276
- const restored = restoreWarmSnapshotState(latestSnapshot.verification.slots, {
3277
- sourceProvider,
3278
- targetProvider,
3279
- });
3280
- if (restored) {
3281
- if (!restored.diagnostics.rolloutGatePassed) {
3282
- const gateSummary = restored.diagnostics.rolloutGateViolations
3283
- .map(violation => `${violation.gate}=${violation.actual}/${violation.max}`)
3284
- .join(', ');
3285
- console.warn(`[hypermem:compositor] warm snapshot rollout gate blocked session=${sessionKey} snapshot=${latestSnapshot.snapshot.id} violations=${JSON.stringify(gateSummary)} verify_fallback_count=${latestSnapshot.fallbackUsed ? 1 : 0} cold_rewarm_count=1`);
3286
- warnSnapshotVerifyFallback('rollout_gate_blocked', `snapshot=${latestSnapshot.snapshot.id} violations=${JSON.stringify(gateSummary)}`);
3287
- }
3288
- else {
3289
- if (latestSnapshot.fallbackUsed) {
3290
- console.warn(`[hypermem:compositor] warm snapshot verify fallback session=${sessionKey} restored_snapshot=${latestSnapshot.snapshot.id} verify_fallback_count=1 cold_rewarm_count=0 reason=latest_snapshot_invalid_or_unverifiable`);
3291
- }
3292
- const repairNoticeLines = [
3293
- `Repair notice: this session is a repaired continuation from snapshot ${latestSnapshot.snapshot.id}.`,
3294
- `Source model: ${sourceModel}. Target model: ${targetModel}.`,
3295
- `Source provider: ${sourceProvider}. Target provider: ${targetProvider}.`,
3296
- `Cross-model boundary: ${sourceModel !== targetModel ? 'yes' : 'no'}.`,
3297
- `Cross-provider boundary: ${restored.diagnostics.crossProviderBoundary ? 'yes' : 'no'}.`,
3298
- `Repair depth: ${MAX_WARM_RESTORE_REPAIR_DEPTH}.`
3299
- ];
3300
- if (latestSnapshot.fallbackUsed) {
3301
- repairNoticeLines.push('Snapshot verify fallback count: 1.');
3302
- }
3303
- if (restored.diagnostics.quotedAssistantTurns > 0) {
3304
- repairNoticeLines.push(`Quoted foreign-provider assistant turns: ${restored.diagnostics.quotedAssistantTurns}.`);
3305
- }
3306
- if (restored.diagnostics.toolPairParityViolations > 0) {
3307
- repairNoticeLines.push(`Tool-pair parity gaps flagged: ${restored.diagnostics.toolPairParityViolations}.`);
3308
- }
3309
- if (restored.diagnostics.requiredSlotDrops.length > 0) {
3310
- repairNoticeLines.push(`Required-slot gaps flagged: ${restored.diagnostics.requiredSlotDrops.join(', ')}.`);
3311
- }
3312
- const tokenParityDriftExceeded = restored.diagnostics.tokenParityDriftP95 > WARM_RESTORE_MEASUREMENT_GATES.tokenParityDriftP95Max
3313
- || restored.diagnostics.tokenParityDriftP99 > WARM_RESTORE_MEASUREMENT_GATES.tokenParityDriftP99Max;
3314
- if (tokenParityDriftExceeded
3315
- || restored.diagnostics.requiredSlotDropRate > WARM_RESTORE_MEASUREMENT_GATES.requiredSlotDropRateMax
3316
- || restored.diagnostics.stablePrefixBoundaryViolations > WARM_RESTORE_MEASUREMENT_GATES.stablePrefixBoundaryViolationsMax
3317
- || restored.diagnostics.toolPairParityViolations > WARM_RESTORE_MEASUREMENT_GATES.toolPairParityViolationsMax
3318
- || restored.diagnostics.continuityCriticalBoundaryTransformRate > WARM_RESTORE_MEASUREMENT_GATES.continuityCriticalBoundaryTransformRateMax) {
3319
- repairNoticeLines.push(`Warm-restore instrumentation gap: token parity drift p95=${restored.diagnostics.tokenParityDriftP95.toFixed(4)}, p99=${restored.diagnostics.tokenParityDriftP99.toFixed(4)}, stable_prefix violations=${restored.diagnostics.stablePrefixBoundaryViolations}, continuity-critical transform rate=${restored.diagnostics.continuityCriticalBoundaryTransformRate.toFixed(4)}.`);
3320
- }
3321
- const repairNoticeContent = repairNoticeLines.join(' ');
3322
- await this.cache.invalidateWindow(agentId, sessionKey);
3323
- await this.cache.warmSession(agentId, sessionKey, {
3324
- system: restored.system ?? opts?.systemPrompt,
3325
- identity: restored.identity ?? opts?.identity,
3326
- repairNotice: repairNoticeContent,
3327
- history: restored.history,
3328
- meta: warmMeta,
3329
- });
3330
- console.info(`[hypermem:compositor] warm snapshot restore session=${sessionKey} snapshot=${latestSnapshot.snapshot.id} fallback=${latestSnapshot.fallbackUsed} cross_provider=${restored.diagnostics.crossProviderBoundary} quoted_assistant_turns=${restored.diagnostics.quotedAssistantTurns} tool_pair_gaps=${restored.diagnostics.toolPairParityViolations} rollout_gate_passed=${restored.diagnostics.rolloutGatePassed} token_parity_drift_p95=${restored.diagnostics.tokenParityDriftP95.toFixed(4)} token_parity_drift_p99=${restored.diagnostics.tokenParityDriftP99.toFixed(4)}`);
3331
- return;
3332
- }
3333
- }
3334
- warnSnapshotVerifyFallback('restore_unusable', `snapshot_count=${snapshotCandidates.length}`);
3335
- }
3336
- else if (snapshotCandidates.length > 0) {
3337
- warnSnapshotVerifyFallback('no_valid_snapshot', `snapshot_count=${snapshotCandidates.length}`);
3338
- }
3339
- }
3340
- catch (error) {
3341
- warnSnapshotVerifyFallback('restore_exception', `error=${JSON.stringify(error.message)}`);
3342
- }
3343
- }
3344
- // Fetch a generous pool from SQLite, apply gradient transform, then
3345
- // token-budget-cap the warm set. This replaces the old WARM_BOOTSTRAP_CAP
3346
- // message-count constant which was a blunt instrument — 100 messages of
3347
- // large tool results can massively exceed the history budget allocation.
3348
- // Warm budget uses the same reserve fraction as compose() so warm history
3349
- // never pre-fills more than compose() would actually allow.
3350
- const reserve = this.config.contextWindowReserve ?? 0.15;
3351
- const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve, this.config.budgetFraction);
3352
- const warmBudget = Math.floor(effectiveBudget * (this.config.warmHistoryBudgetFraction ?? 0.4));
3353
- // Phase 3 (Turn DAG): prefer DAG walk from context head for warm preload.
3354
- // This ensures only active-branch messages enter the warm cache.
3355
- let rawHistory;
3356
- if (activeContext?.headMessageId) {
3357
- rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, this.config.maxHistoryMessages);
3358
- // DAG walk may return empty for legacy data — fall back to fence-scoped query
3359
- if (rawHistory.length === 0) {
3360
- rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, warmFenceMessageId);
3361
- }
3362
- }
3363
- else {
3364
- rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, warmFenceMessageId);
3365
- }
3366
- const transformedForWarm = applyToolGradient(rawHistory, {
3367
- totalWindowTokens: resolveModelWindow(opts?.model, this.config.defaultTokenBudget),
3368
- });
3369
- // Walk newest→oldest, accumulate transformed token cost, stop when budget exhausted
3370
- let warmTokens = 0;
3371
- const history = [];
3372
- for (let i = transformedForWarm.length - 1; i >= 0; i--) {
3373
- const cost = estimateMessageTokens(transformedForWarm[i]);
3374
- if (warmTokens + cost > warmBudget)
3375
- break;
3376
- // T1.3 Provenance flag: tag warm-seeded messages so they can be identified
3377
- // downstream. The flag is stripped before provider submission in compose().
3378
- // This prevents the runtime from treating warm-replayed user messages as
3379
- // new inbound queries (ghost message bug).
3380
- const tagged = { ...transformedForWarm[i] };
3381
- tagged.metadata = { ...(tagged.metadata || {}), _warmed: true };
3382
- history.unshift(tagged);
3383
- warmTokens += cost;
3384
- }
3385
- // Note: facts and context are intentionally NOT cached here.
3386
- // compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
3387
- // from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
3388
- // Caching them here would create stale entries that compose() ignores anyway.
3389
- // Invalidate the window cache so the next compose rebuilds with the fresh
3390
- // system/identity slots. Without this, the fast-exit returns a stale bundle
3391
- // that predates the warm and reports identity=0.
3392
- await this.cache.invalidateWindow(agentId, sessionKey);
3393
- await this.cache.warmSession(agentId, sessionKey, {
3394
- system: opts?.systemPrompt,
3395
- identity: opts?.identity,
3396
- history,
3397
- meta: warmMeta,
3398
- });
3399
- }
3400
- async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth, trimSoftTarget) {
3401
- const store = new MessageStore(db);
3402
- const conversation = store.getConversation(sessionKey);
3403
- if (!conversation)
3404
- return;
3405
- // Phase 3 (Turn DAG): resolve active context for DAG-native gradient refresh
3406
- let activeContext = null;
3407
- try {
3408
- activeContext = getActiveContext(db, agentId, sessionKey);
3409
- }
3410
- catch {
3411
- // Context resolution is best-effort
3412
- }
3413
- // Phase 0 fence enforcement for gradient refresh (transitional safety)
3414
- let gradientFenceMessageId;
3415
- try {
3416
- ensureCompactionFenceSchema(db);
3417
- const fence = getCompactionFence(db, conversation.id);
3418
- if (fence)
3419
- gradientFenceMessageId = fence.fenceMessageId;
3420
- }
3421
- catch {
3422
- // Fence lookup is best-effort
3423
- }
3424
- // Phase 3: prefer DAG walk from context head
3425
- const refreshHistoryLimit = Math.min(this.config.maxHistoryMessages, Math.max(1, historyDepth ?? this.config.maxHistoryMessages));
3426
- let rawHistory;
3427
- if (activeContext?.headMessageId) {
3428
- rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, refreshHistoryLimit);
3429
- if (rawHistory.length === 0) {
3430
- rawHistory = store.getRecentMessages(conversation.id, refreshHistoryLimit, gradientFenceMessageId);
3431
- }
3432
- }
3433
- else {
3434
- rawHistory = store.getRecentMessages(conversation.id, refreshHistoryLimit, gradientFenceMessageId);
3435
- }
3436
- // Sprint 3 (AfterTurn Rebuild/Trim Loop Fix): cap gradient total-window tokens
3437
- // at the same 65% target that assemble.normal trims to. Previously this was
3438
- // tokenBudget/0.80 (≈1.25×budget), which made applyToolGradient preserve more
3439
- // content than the trim target allowed — causing assemble.normal to always trim
3440
- // on the next turn even in the steady-state path. Aligning the gradient cap to
3441
- // the trim target means the rebuilt window already fits within the assemble
3442
- // envelope by construction.
3443
- const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0, { trimSoftTarget });
3444
- const transformedHistory = applyToolGradient(rawHistory, {
3445
- totalWindowTokens: tokenBudget && tokenBudget > 0
3446
- ? gradientAssembleBudget
3447
- : TOOL_PLANNING_BASELINE_WINDOW,
3448
- });
3449
- // If a token budget is provided, trim the gradient-compressed window to fit
3450
- // before writing to Redis. The cap uses the same GRADIENT_ASSEMBLE_TARGET
3451
- // (0.65) so the window written to Redis sits inside the assemble.normal trim
3452
- // envelope. The next assemble() will find the window already within budget
3453
- // and skip the trim entirely in the steady-state path.
3454
- let historyToWrite = transformedHistory;
3455
- if (tokenBudget && tokenBudget > 0) {
3456
- const budgetCap = gradientAssembleBudget;
3457
- let runningTokens = 0;
3458
- const clusters = clusterNeutralMessages(transformedHistory);
3459
- const cappedClusters = [];
3460
- // Walk newest-first, keep whole clusters so tool-call/result pairs survive together.
3461
- for (let i = clusters.length - 1; i >= 0; i--) {
3462
- const cluster = clusters[i];
3463
- if (runningTokens + cluster.tokenCost > budgetCap && cappedClusters.length > 0)
3464
- break;
3465
- cappedClusters.unshift(cluster);
3466
- runningTokens += cluster.tokenCost;
3467
- if (runningTokens >= budgetCap)
3468
- break;
3469
- }
3470
- historyToWrite = cappedClusters.flatMap(cluster => cluster.messages);
3471
- if (historyToWrite.length < transformedHistory.length) {
3472
- console.log(`[hypermem] refreshRedisGradient: cluster-capped ${transformedHistory.length}→${historyToWrite.length} messages ` +
3473
- `for ${agentId}/${sessionKey} (budgetCap=${budgetCap}, tokenCost=${runningTokens})`);
3474
- }
3475
- }
3476
- await this.cache.replaceHistory(agentId, sessionKey, historyToWrite, refreshHistoryLimit);
3477
- }
3478
- // ─── Slot Content Resolution ─────────────────────────────────
3479
- /**
3480
- * Get slot content: try Redis first, fall back to SQLite.
3481
- */
3482
- async getSlotContent(agentId, sessionKey, slot, db, libraryDb) {
3483
- const cached = await this.cache.getSlot(agentId, sessionKey, slot);
3484
- if (cached)
3485
- return cached;
3486
- switch (slot) {
3487
- case 'facts': {
3488
- const result = this.buildFactsFromDb(agentId, sessionKey, libraryDb || this.libraryDb || db);
3489
- return result ? result[0] : null;
3490
- }
3491
- case 'context':
3492
- return this.buildCrossSessionContext(agentId, sessionKey, db, libraryDb || this.libraryDb);
3493
- default:
3494
- return null;
3495
- }
3496
- }
3497
- /**
3498
- * Get conversation history: try Redis first, fall back to SQLite.
3499
- *
3500
- * When topicId is provided (P3.4), the SQLite path filters to messages
3501
- * matching that topic OR with topic_id IS NULL (Option B transition safety).
3502
- * The Redis path is unaffected — Redis doesn't index by topic, so topic
3503
- * filtering only applies to the SQLite fallback.
3504
- */
3505
- async getHistory(agentId, sessionKey, limit, store, topicId, fenceMessageId, activeContext) {
3506
- // Pass limit through to Redis — this is the correct enforcement point.
3507
- // Previously getHistory() ignored the limit on the Redis path (LRANGE 0 -1),
3508
- // meaning historyDepth in the compose request had no effect on hot sessions.
3509
- const cached = await this.cache.getHistory(agentId, sessionKey, limit);
3510
- if (cached.length > 0)
3511
- return cached;
3512
- // Phase 3 (Turn DAG): walk from context.head_message_id backward through
3513
- // parent_id links. This is the primary correctness mechanism — the fence
3514
- // remains as transitional safety only.
3515
- if (activeContext?.headMessageId) {
3516
- const dagMessages = store.getHistoryByDAGWalk(activeContext.headMessageId, limit);
3517
- if (dagMessages.length > 0)
3518
- return dagMessages;
3519
- // DAG walk returned empty (e.g., legacy data without parent chains) — fall through
3520
- }
3521
- const conversation = store.getConversation(sessionKey);
3522
- if (!conversation)
3523
- return [];
3524
- if (topicId) {
3525
- // P3.4: Option B — active topic messages + legacy NULL messages
3526
- return store.getRecentMessagesByTopic(conversation.id, topicId, limit, fenceMessageId);
3527
- }
3528
- return store.getRecentMessages(conversation.id, limit, fenceMessageId);
3529
- }
3530
- // ─── L4 Library Builders ─────────────────────────────────────
3531
- /**
3532
- * Build facts content from library DB.
3533
- */
3534
- /**
3535
- * Build facts content from library DB.
3536
- * Applies filterByScope (W1) to enforce retrieval access control.
3537
- * Returns [content, factCount, scopeFilteredCount] or null if DB unavailable.
3538
- */
3539
- buildFactsFromDb(agentId, sessionKey, db) {
3540
- const sections = this.buildFactSectionsFromDb(agentId, sessionKey, db);
3541
- if (!sections)
3542
- return null;
3543
- const combined = [sections.stableContent, sections.volatileContent]
3544
- .filter((value) => Boolean(value))
3545
- .join('\n');
3546
- return [
3547
- combined || null,
3548
- sections.stableCount + sections.volatileCount,
3549
- sections.filteredCount,
3550
- ];
3551
- }
3552
- buildFactSectionsFromDb(agentId, sessionKey, db) {
3553
- if (!db)
3554
- return null;
3555
- const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='facts'").get();
3556
- if (!tableExists || tableExists.cnt === 0)
3557
- return null;
3558
- const rawRows = db.prepare(`
3559
- SELECT content, domain, confidence, agent_id, source_session_key AS session_key, scope FROM facts
3560
- WHERE agent_id = ?
3561
- AND superseded_by IS NULL
3562
- AND (expires_at IS NULL OR expires_at > datetime('now'))
3563
- AND decay_score < 0.8
3564
- AND confidence >= 0.5
3565
- ORDER BY confidence DESC, decay_score ASC
3566
- LIMIT ?
3567
- `).all(agentId, this.config.maxFacts);
3568
- if (rawRows.length === 0) {
3569
- return {
3570
- stableContent: null,
3571
- stableCount: 0,
3572
- volatileContent: null,
3573
- volatileCount: 0,
3574
- filteredCount: 0,
3575
- };
3576
- }
3577
- const ctx = { agentId, sessionKey };
3578
- const { allowed, filteredCount } = filterByScope(rawRows.map(r => ({
3579
- ...r,
3580
- agentId: r.agent_id,
3581
- sessionKey: r.session_key,
3582
- })), ctx);
3583
- if (allowed.length === 0) {
3584
- return {
3585
- stableContent: null,
3586
- stableCount: 0,
3587
- volatileContent: null,
3588
- volatileCount: 0,
3589
- filteredCount,
3590
- };
3591
- }
3592
- const formatRows = (rows) => {
3593
- if (rows.length === 0)
3594
- return null;
3595
- return rows
3596
- .map(r => {
3597
- const fromOtherSession = r.sessionKey && r.sessionKey !== sessionKey;
3598
- const sessionSuffix = fromOtherSession
3599
- ? `, session:${r.sessionKey.slice(-8)}`
3600
- : '';
3601
- return `- [${r.domain || 'general'}${sessionSuffix}] ${r.content}`;
3602
- })
3603
- .join('\n');
3604
- };
3605
- const stableRows = allowed.filter(r => r.scope !== 'session' && (!r.sessionKey || r.sessionKey !== sessionKey));
3606
- const volatileRows = allowed.filter(r => !stableRows.includes(r));
3607
- return {
3608
- stableContent: formatRows(stableRows),
3609
- stableCount: stableRows.length,
3610
- volatileContent: formatRows(volatileRows),
3611
- volatileCount: volatileRows.length,
3612
- filteredCount,
3613
- };
3614
- }
3615
- /**
3616
- * Build knowledge content from library DB.
3617
- * Prioritizes high-confidence, non-superseded entries.
3618
- */
3619
- buildKnowledgeFromDb(agentId, db) {
3620
- const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='knowledge'").get();
3621
- if (!tableExists || tableExists.cnt === 0)
3622
- return null;
3623
- const rows = db.prepare(`
3624
- SELECT domain, key, content, confidence FROM knowledge
3625
- WHERE agent_id = ?
3626
- AND superseded_by IS NULL
3627
- AND (expires_at IS NULL OR expires_at > datetime('now'))
3628
- ORDER BY confidence DESC, updated_at DESC
3629
- LIMIT 15
3630
- `).all(agentId);
3631
- if (rows.length === 0)
3632
- return null;
3633
- // Group by domain for cleaner presentation
3634
- const byDomain = {};
3635
- for (const row of rows) {
3636
- if (!byDomain[row.domain])
3637
- byDomain[row.domain] = [];
3638
- byDomain[row.domain].push({ key: row.key, content: row.content });
3639
- }
3640
- const lines = [];
3641
- for (const [domain, entries] of Object.entries(byDomain)) {
3642
- lines.push(`### ${domain}`);
3643
- for (const entry of entries) {
3644
- lines.push(`- **${entry.key}:** ${entry.content}`);
3645
- }
3646
- }
3647
- return lines.join('\n');
3648
- }
3649
- /**
3650
- * Build wiki page context for the active topic.
3651
- * Queries the knowledge table for a synthesized topic page and returns it
3652
- * wrapped with a header. Capped at 600 tokens.
3653
- */
3654
- buildWikiPageContext(agentId, topicName, db) {
3655
- const knowledgeStore = new KnowledgeStore(db);
3656
- const knowledge = knowledgeStore.get(agentId, 'topic-synthesis', topicName);
3657
- if (!knowledge)
3658
- return null;
3659
- const wrapped = `## Active Topic: ${topicName}\n${knowledge.content}`;
3660
- return this.truncateToTokens(wrapped, 600);
3661
- }
3662
- /**
3663
- * Build preferences content from library DB.
3664
- * Shows user/operator preferences relevant to this agent.
3665
- */
3666
- buildPreferencesFromDb(agentId, db) {
3667
- const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='preferences'").get();
3668
- if (!tableExists || tableExists.cnt === 0)
3669
- return null;
3670
- // Get preferences set by this agent or marked fleet-visible
3671
- const rows = db.prepare(`
3672
- SELECT subject, key, value, domain, confidence FROM preferences
3673
- WHERE (agent_id = ? OR agent_id IS NULL)
3674
- ORDER BY confidence DESC, updated_at DESC
3675
- LIMIT 10
3676
- `).all(agentId);
3677
- if (rows.length === 0)
3678
- return null;
3679
- // Group by subject
3680
- const bySubject = {};
3681
- for (const row of rows) {
3682
- if (!bySubject[row.subject])
3683
- bySubject[row.subject] = [];
3684
- bySubject[row.subject].push({ key: row.key, value: row.value, domain: row.domain });
3685
- }
3686
- const lines = [];
3687
- for (const [subject, prefs] of Object.entries(bySubject)) {
3688
- lines.push(`### ${subject}`);
3689
- for (const pref of prefs) {
3690
- const domainTag = pref.domain ? ` [${pref.domain}]` : '';
3691
- lines.push(`- **${pref.key}:**${domainTag} ${pref.value}`);
3692
- }
3693
- }
3694
- return lines.join('\n');
3695
- }
3696
- // ─── L3 Hybrid Retrieval (FTS5 + KNN) ───────────────────────
3697
- /**
3698
- * Build semantic recall content using hybrid FTS5+KNN retrieval.
3699
- *
3700
- * Uses Reciprocal Rank Fusion to merge keyword and vector results.
3701
- * Gracefully degrades: FTS5-only when no vector store, KNN-only
3702
- * when FTS query is empty (all stop words), both when available.
3703
- *
3704
- * @param precomputedEmbedding — optional pre-computed embedding for the query.
3705
- * When provided, the Ollama call inside VectorStore.search() is skipped.
3706
- */
3707
- async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints, // C2: skip results already in Active Facts
3708
- onRerankerTelemetry, // Sprint 1: surface reranker status at assemble level
3709
- resultLimit) {
3710
- const libDb = libraryDb || this.libraryDb;
3711
- if (!libDb && !this.vectorStore)
3712
- return null;
3713
- // 0.9.0: clamp the lifecycle-scaled candidate limit. Caller already clamps
3714
- // via scaleRecallBreadth; this is a defensive floor so direct callers (none
3715
- // outside compose today) cannot accidentally request 0 results.
3716
- const hybridLimit = Math.max(RECALL_BREADTH_BASE.candidateLimitMin, Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.floor(resultLimit && resultLimit > 0 ? resultLimit : RECALL_BREADTH_BASE.candidateLimit)));
3717
- // KNN-only legacy fallback historically used 8 — keep it slightly below the
3718
- // hybrid limit to preserve prior behavior at multiplier=1, while still
3719
- // scaling with the same adaptive limit.
3720
- const knnFallbackLimit = Math.max(RECALL_BREADTH_BASE.candidateLimitMin, Math.min(RECALL_BREADTH_BASE.candidateLimitMax, hybridLimit - 2));
3721
- // Inline fingerprint helper (mirrors compose-scope version; C2 dedup only used here)
3722
- const fpCheck = existingFingerprints
3723
- ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
3724
- : () => false;
3725
- // Use hybrid search when library DB is available
3726
- if (libDb) {
3727
- const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
3728
- tables: ['facts', 'knowledge', 'episodes'],
3729
- limit: hybridLimit,
3730
- agentId,
3731
- maxKnnDistance: 1.2,
3732
- precomputedEmbedding,
3733
- reranker: this.reranker,
3734
- rerankerMinCandidates: this.rerankerMinCandidates,
3735
- rerankerMaxDocuments: this.rerankerMaxDocuments,
3736
- rerankerTopK: this.rerankerTopK,
3737
- // Sprint 1: thread reranker telemetry into compose diagnostics
3738
- onRerankerTelemetry,
3739
- });
3740
- if (results.length === 0)
3741
- return null;
3742
- const lines = [];
3743
- let tokens = 0;
3744
- // TUNE-015: apply recency decay to recall scores.
3745
- // Messages and episodes from distant past score down even if semantically relevant.
3746
- // A 5-day-old task-request should not compete equally with today's messages.
3747
- // - Episodes: exponential decay, half-life 7 days
3748
- // - Facts/knowledge: step-function penalty for items older than 48h
3749
- // (prevents completed/stale tasks from outranking recent ones)
3750
- // 48-72h: multiply by 0.7
3751
- // >72h: multiply by 0.5
3752
- const now = Date.now();
3753
- const decayedResults = results.map(result => {
3754
- if (!result.createdAt)
3755
- return result;
3756
- const ageMs = now - new Date(result.createdAt).getTime();
3757
- const ageDays = ageMs / 86_400_000;
3758
- if (result.sourceTable === 'episodes') {
3759
- // Exponential half-life decay for episodes
3760
- const decayFactor = Math.pow(0.5, ageDays / 7);
3761
- return { ...result, score: result.score * decayFactor };
3762
- }
3763
- // Step-function recency penalty for facts and knowledge
3764
- const ageHours = ageMs / 3_600_000;
3765
- if (ageHours > 72) {
3766
- return { ...result, score: result.score * 0.5 };
3767
- }
3768
- if (ageHours > 48) {
3769
- return { ...result, score: result.score * 0.7 };
3770
- }
3771
- return result;
3772
- });
3773
- // Re-sort after decay adjustment
3774
- decayedResults.sort((a, b) => b.score - a.score);
3775
- for (const result of decayedResults) {
3776
- // TUNE-001: drop very-low-relevance results (RRF scores below 0.008 are noise)
3777
- if (result.score < 0.008)
3778
- continue;
3779
- // TUNE-016: FTS-only results require higher floor — low-score FTS hits are noise
3780
- if (result.sources.length === 1 && result.sources[0] === 'fts' && result.score < 0.05)
3781
- continue;
3782
- // TUNE-014: episodes require higher confidence — score:2 episodes bleed adjacent
3783
- // session context and contaminate current session. Require fts+knn agreement
3784
- // (score >= 0.04) for episodes to make it into assembled context.
3785
- if (result.sourceTable === 'episodes' && result.score < 0.04)
3786
- continue;
3787
- // C2: Skip results whose content is already fingerprinted (e.g. in Active Facts)
3788
- // Dedup count is not tracked separately here — compose-level counter covers the other paths.
3789
- if (fpCheck(result.content))
3790
- continue;
3791
- const label = this.formatHybridResult(result);
3792
- const lineTokens = estimateTokens(label);
3793
- if (tokens + lineTokens > maxTokens)
3794
- break;
3795
- lines.push(label);
3796
- tokens += lineTokens;
3797
- }
3798
- return lines.length > 0 ? lines.join('\n') : null;
3799
- }
3800
- // Fallback: KNN-only when no library DB (legacy path)
3801
- if (!this.vectorStore)
3802
- return null;
3803
- const results = await this.vectorStore.search(userMessage, {
3804
- tables: ['facts', 'knowledge', 'episodes'],
3805
- limit: knnFallbackLimit,
3806
- maxDistance: 1.2,
3807
- precomputedEmbedding,
3808
- });
3809
- if (results.length === 0)
3810
- return null;
3811
- const lines = [];
3812
- let tokens = 0;
3813
- for (const result of results) {
3814
- const label = this.formatVectorResult(result);
3815
- const lineTokens = estimateTokens(label);
3816
- if (tokens + lineTokens > maxTokens)
3817
- break;
3818
- lines.push(label);
3819
- tokens += lineTokens;
3820
- }
3821
- return lines.length > 0 ? lines.join('\n') : null;
3822
- }
3823
- /**
3824
- * Format a hybrid search result for injection into context.
3825
- * Shows retrieval source(s) and relevance score.
3826
- */
3827
- formatHybridResult(result) {
3828
- const type = result.sourceTable;
3829
- const sourceTag = result.sources.length === 2 ? 'fts+knn' : result.sources[0];
3830
- const scoreStr = (result.score * 100).toFixed(0);
3831
- switch (type) {
3832
- case 'facts':
3833
- return `- [fact, ${sourceTag}, score:${scoreStr}] ${result.content}`;
3834
- case 'knowledge':
3835
- return `- [knowledge/${result.metadata || 'general'}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
3836
- case 'episodes':
3837
- return `- [episode/${result.domain || 'event'}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
3838
- default:
3839
- return `- [${type}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
3840
- }
3841
- }
3842
- /**
3843
- * Format a vector-only search result (legacy fallback).
3844
- */
3845
- formatVectorResult(result) {
3846
- const relevance = Math.max(0, Math.round((1 - result.distance) * 100));
3847
- const type = result.sourceTable;
3848
- switch (type) {
3849
- case 'facts':
3850
- return `- [fact, ${relevance}% relevant] ${result.content}`;
3851
- case 'knowledge':
3852
- return `- [knowledge/${result.metadata || 'general'}, ${relevance}% relevant] ${result.content}`;
3853
- case 'episodes':
3854
- return `- [episode/${result.domain || 'event'}, ${relevance}% relevant] ${result.content}`;
3855
- default:
3856
- return `- [${type}, ${relevance}% relevant] ${result.content}`;
3857
- }
3858
- }
3859
- // ─── L2 Cross-Session Context ────────────────────────────────
3860
- /**
3861
- * Build cross-session context by finding recent activity
3862
- * in other sessions for this agent.
3863
- */
3864
- // TODO Phase 1: buildCrossSessionContext queries OTHER conversations. Each has its
3865
- // own compaction fence. Per-conversation fence filtering should be added here so
3866
- // zombie messages from other sessions don't leak into cross-session context.
3867
- buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb, existingFingerprints // C3: skip entries already in facts/semantic recall
3868
- ) {
3869
- const conversation = db.prepare('SELECT id FROM conversations WHERE session_key = ?').get(currentSessionKey);
3870
- if (!conversation)
3871
- return null;
3872
- const rows = db.prepare(`
3873
- SELECT m.text_content, m.role, c.channel_type, m.created_at
3874
- FROM messages m
3875
- JOIN conversations c ON m.conversation_id = c.id
3876
- WHERE c.agent_id = ?
3877
- AND m.conversation_id != ?
3878
- AND c.status = 'active'
3879
- AND m.text_content IS NOT NULL
3880
- AND m.is_heartbeat = 0
3881
- ORDER BY m.created_at DESC
3882
- LIMIT 10
3883
- `).all(agentId, conversation.id);
3884
- if (rows.length === 0)
3885
- return null;
3886
- const fpCheck = existingFingerprints
3887
- ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
3888
- : () => false;
3889
- const lines = [];
3890
- for (const r of rows) {
3891
- // C3: Skip cross-session entries whose content fingerprint already appears in context
3892
- if (fpCheck(r.text_content))
3893
- continue;
3894
- const preview = r.text_content.substring(0, 200);
3895
- lines.push(`- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`);
3896
- }
3897
- return lines.length > 0 ? lines.join('\n') : null;
3898
- }
3899
- // ─── Utilities ───────────────────────────────────────────────
3900
- /**
3901
- * Extract the last user message text from the composed messages.
3902
- */
3903
- getLastUserMessage(messages) {
3904
- for (let i = messages.length - 1; i >= 0; i--) {
3905
- if (messages[i].role === 'user' && messages[i].textContent) {
3906
- return messages[i].textContent;
3907
- }
3908
- }
3909
- return null;
3910
- }
3911
- /**
3912
- * Truncate text to approximately fit within a token budget.
3913
- * Truncates at line boundaries when possible.
3914
- */
3915
- truncateToTokens(text, maxTokens) {
3916
- const maxChars = maxTokens * 4; // inverse of our estimation
3917
- if (text.length <= maxChars)
3918
- return text;
3919
- // Try to truncate at a line boundary
3920
- const truncated = text.substring(0, maxChars);
3921
- const lastNewline = truncated.lastIndexOf('\n');
3922
- if (lastNewline > maxChars * 0.7) {
3923
- return truncated.substring(0, lastNewline) + '\n…';
3924
- }
3925
- return truncated + '…';
3926
- }
3927
- // ─── Keystone History Builder ─────────────────────────────────────
3928
- /**
3929
- * Query and score keystone candidates from before the current history window.
3930
- *
3931
- * Trims the oldest messages from includedHistory to free a keystone budget,
3932
- * then queries the DB for older messages scored by episode significance,
3933
- * FTS5 relevance, and recency.
3934
- *
3935
- * Returns null if keystones cannot be injected (no cutoff ID found,
3936
- * no candidates, or all errors).
3937
- */
3938
- async buildKeystones(db, agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, prompt, libraryDb, fenceMessageId, activeContext) {
3939
- const keystoneBudget = Math.floor(historyTokens * keystoneFraction);
3940
- if (keystoneBudget <= 0)
3941
- return null;
3942
- // Trim oldest messages from includedHistory to free keystone budget.
3943
- const trimmedHistory = [...includedHistory];
3944
- let trimmedHistoryTokens = historyTokens;
3945
- let freed = 0;
3946
- while (trimmedHistory.length > 1 && freed < keystoneBudget) {
3947
- const oldest = trimmedHistory.shift();
3948
- const oldestTokens = estimateMessageTokens(oldest);
3949
- freed += oldestTokens;
3950
- trimmedHistoryTokens -= oldestTokens;
3951
- }
3952
- // Find the oldest message ID in the trimmed recent window (cutoff point).
3953
- const oldestRecentMsg = trimmedHistory[0];
3954
- const cutoffId = oldestRecentMsg?.id ?? null;
3955
- if (cutoffId == null)
3956
- return null;
3957
- // Find the current user prompt for FTS matching.
3958
- const promptForFts = prompt?.trim() ||
3959
- (() => {
3960
- for (let i = trimmedHistory.length - 1; i >= 0; i--) {
3961
- if (trimmedHistory[i].role === 'user' && trimmedHistory[i].textContent) {
3962
- return trimmedHistory[i].textContent;
3963
- }
3964
- }
3965
- return null;
3966
- })();
3967
- try {
3968
- // Get the conversation ID from the oldest recent message.
3969
- const convRow = db.prepare('SELECT conversation_id FROM messages WHERE id = ?').get(cutoffId);
3970
- if (!convRow)
3971
- return null;
3972
- const conversationId = convRow.conversation_id;
3973
- const maxAgeHours = 168; // 7 days — tighter window gives recency real scoring weight
3974
- const nowMs = Date.now();
3975
- // Build episode significance map from libraryDb (episodes live there, not in messages.db).
3976
- // Key: source_message_id, Value: max significance for that message.
3977
- const sigMap = new Map();
3978
- if (libraryDb) {
3979
- try {
3980
- const episodeRows = libraryDb.prepare(`
3981
- SELECT source_message_id, MAX(significance) AS significance
3982
- FROM episodes
3983
- WHERE agent_id = ? AND source_message_id IS NOT NULL
3984
- GROUP BY source_message_id
3985
- `).all(agentId);
3986
- for (const row of episodeRows) {
3987
- sigMap.set(row.source_message_id, row.significance);
3988
- }
3989
- }
3990
- catch {
3991
- // Episodes query is best-effort
3992
- }
3993
- }
3994
- const fenceClause = fenceMessageId != null ? 'AND m.id >= ?' : '';
3995
- // Phase 3 (Turn DAG): prefer context_id scoping, but keep legacy NULL
3996
- // rows eligible. Warmed or migrated sessions can have an active context
3997
- // while older messages predate context_id backfill; excluding NULL rows
3998
- // disables within-session keystone recall for those conversations.
3999
- const contextClause = activeContext ? 'AND (m.context_id = ? OR m.context_id IS NULL)' : '';
4000
- const baseParams = [conversationId, cutoffId];
4001
- if (fenceMessageId != null)
4002
- baseParams.push(fenceMessageId);
4003
- if (activeContext)
4004
- baseParams.push(activeContext.id);
4005
- const baseQuery = `
4006
- SELECT
4007
- m.id,
4008
- m.message_index,
4009
- m.role,
4010
- m.text_content,
4011
- m.created_at
4012
- FROM messages m
4013
- WHERE m.conversation_id = ?
4014
- AND m.id < ?
4015
- ${fenceClause}
4016
- ${contextClause}
4017
- AND m.text_content IS NOT NULL
4018
- AND m.is_heartbeat = 0
4019
- AND m.text_content != ''
4020
- LIMIT 200
4021
- `;
4022
- let candidateRows;
4023
- if (promptForFts && promptForFts.length >= 3) {
4024
- // Build a safe FTS5 query: extract words ≥3 chars, up to 8, OR with prefix.
4025
- const ftsTerms = (promptForFts.match(/\b\w{3,}\b/g) || [])
4026
- .slice(0, 8)
4027
- .map(w => `"${w.replace(/"/g, '')}"*`)
4028
- .join(' OR ');
4029
- if (ftsTerms) {
4030
- try {
4031
- const ftsParams = [conversationId, cutoffId];
4032
- if (fenceMessageId != null)
4033
- ftsParams.push(fenceMessageId);
4034
- if (activeContext)
4035
- ftsParams.push(activeContext.id);
4036
- ftsParams.push(ftsTerms);
4037
- candidateRows = db.prepare(`
4038
- SELECT
4039
- m.id,
4040
- m.message_index,
4041
- m.role,
4042
- m.text_content,
4043
- m.created_at
4044
- FROM messages m
4045
- WHERE m.conversation_id = ?
4046
- AND m.id < ?
4047
- ${fenceClause}
4048
- ${contextClause}
4049
- AND m.text_content IS NOT NULL
4050
- AND m.is_heartbeat = 0
4051
- AND m.text_content != ''
4052
- AND m.id IN (
4053
- SELECT rowid FROM messages_fts
4054
- WHERE messages_fts MATCH ?
4055
- LIMIT 100
4056
- )
4057
- LIMIT 200
4058
- `).all(...ftsParams);
4059
- }
4060
- catch {
4061
- // FTS query may fail on special characters — fall back to base query
4062
- candidateRows = db.prepare(baseQuery).all(...baseParams);
4063
- }
4064
- }
4065
- else {
4066
- candidateRows = db.prepare(baseQuery).all(...baseParams);
4067
- }
4068
- }
4069
- else {
4070
- candidateRows = db.prepare(baseQuery).all(...baseParams);
4071
- }
4072
- if (candidateRows.length === 0)
4073
- return null;
4074
- // Build KeystoneCandidate objects with computed ftsRank and ageHours.
4075
- const totalCandidates = candidateRows.length;
4076
- const candidates = candidateRows.map((row, idx) => {
4077
- const createdMs = new Date(row.created_at).getTime();
4078
- const ageHours = (nowMs - createdMs) / (1000 * 60 * 60);
4079
- // Normalize FTS rank by position (best match = 1.0, worst = 0.1)
4080
- const ftsRank = totalCandidates > 1
4081
- ? 1.0 - (idx / totalCandidates) * 0.9
4082
- : 1.0;
4083
- return {
4084
- messageId: row.id,
4085
- messageIndex: row.message_index,
4086
- role: row.role,
4087
- content: row.text_content || '',
4088
- timestamp: row.created_at,
4089
- episodeSignificance: sigMap.get(row.id) ?? null,
4090
- ftsRank,
4091
- ageHours,
4092
- };
4093
- });
4094
- // Score and rank candidates.
4095
- const ranked = rankKeystones(candidates, maxAgeHours);
4096
- // Budget-fit: take top-scored candidates until keystoneBudget exhausted.
4097
- let kTokens = 0;
4098
- const selectedKeystones = [];
4099
- for (const candidate of ranked) {
4100
- if (selectedKeystones.length >= keystoneMaxMsgs)
4101
- break;
4102
- const msg = {
4103
- role: candidate.role,
4104
- textContent: candidate.content,
4105
- toolCalls: null,
4106
- toolResults: null,
4107
- };
4108
- const msgTokens = estimateMessageTokens(msg);
4109
- if (kTokens + msgTokens > keystoneBudget)
4110
- continue; // skip oversized; keep trying
4111
- selectedKeystones.push(candidate);
4112
- kTokens += msgTokens;
4113
- }
4114
- if (selectedKeystones.length === 0)
4115
- return null;
4116
- // Sort selected keystones chronologically for injection.
4117
- selectedKeystones.sort((a, b) => a.messageIndex - b.messageIndex);
4118
- const keystoneMessages = selectedKeystones.map(c => ({
4119
- role: c.role,
4120
- textContent: c.content,
4121
- toolCalls: null,
4122
- toolResults: null,
4123
- }));
4124
- return {
4125
- keystoneMessages,
4126
- keystoneTokens: kTokens,
4127
- trimmedHistory,
4128
- trimmedHistoryTokens,
4129
- };
4130
- }
4131
- catch {
4132
- // Keystone injection is best-effort — never fail compose
4133
- return null;
4134
- }
4135
- }
4136
- // ─── Cross-Topic Keystone Retrieval (P3.5) ───────────────────────
4137
- /**
4138
- * Pull high-signal messages from OTHER topics in this session when their
4139
- * content is semantically relevant to the current active topic.
4140
- *
4141
- * Heuristic-only: no model calls. Token overlap between the current topic
4142
- * name + last 3 user messages and candidate message content.
4143
- *
4144
- * @param agentId - The agent's ID
4145
- * @param sessionKey - Current session key
4146
- * @param activeTopic - The current active topic (id + name)
4147
- * @param currentMessages - Recently included history messages for query extraction
4148
- * @param db - The messages database
4149
- * @param maxKeystones - Max cross-topic keystones to return (default 3)
4150
- * @returns Scored keystones sorted by score DESC, deduplicated by message id
4151
- */
4152
- async getKeystonesByTopic(agentId, sessionKey, activeTopic, currentMessages, db, maxKeystones = 3, fenceMessageId, activeContext) {
4153
- const otherTopics = db.prepare(`
4154
- SELECT id, name
4155
- FROM topics
4156
- WHERE session_key = ? AND id != ?
4157
- ORDER BY last_active_at DESC
4158
- LIMIT 5
4159
- `).all(sessionKey, activeTopic.id);
4160
- if (otherTopics.length === 0)
4161
- return [];
4162
- // Extract key terms from active topic name + last 3 user messages
4163
- const queryTerms = this.extractQueryTerms(activeTopic.name, currentMessages);
4164
- if (queryTerms.size === 0)
4165
- return [];
4166
- const nowMs = Date.now();
4167
- const maxAgeHours = 168; // 7 days, same as within-session keystones
4168
- const seenIds = new Set();
4169
- const allCandidates = [];
4170
- for (const topic of otherTopics) {
4171
- let topicMessages;
4172
- try {
4173
- const topicFenceClause = fenceMessageId != null ? 'AND m.id >= ?' : '';
4174
- // Phase 3 (Turn DAG): constrain cross-topic queries to active context_id
4175
- const topicContextClause = activeContext ? 'AND m.context_id = ?' : '';
4176
- const topicParams = [sessionKey, agentId, topic.id];
4177
- if (fenceMessageId != null)
4178
- topicParams.push(fenceMessageId);
4179
- if (activeContext)
4180
- topicParams.push(activeContext.id);
4181
- topicMessages = db.prepare(`
4182
- SELECT m.id, m.message_index, m.role, m.text_content, m.created_at
4183
- FROM messages m
4184
- JOIN conversations c ON m.conversation_id = c.id
4185
- WHERE c.session_key = ?
4186
- AND c.agent_id = ?
4187
- AND m.topic_id = ?
4188
- ${topicFenceClause}
4189
- ${topicContextClause}
4190
- AND m.text_content IS NOT NULL
4191
- AND m.text_content != ''
4192
- AND m.is_heartbeat = 0
4193
- ORDER BY m.message_index DESC
4194
- LIMIT 50
4195
- `).all(...topicParams);
4196
- }
4197
- catch {
4198
- // Corrupt topic data — skip this topic, never throw
4199
- continue;
4200
- }
4201
- if (topicMessages.length === 0)
4202
- continue;
4203
- const topicCandidates = topicMessages.map((msg, idx) => {
4204
- const createdMs = new Date(msg.created_at).getTime();
4205
- const ageHours = (nowMs - createdMs) / (1000 * 60 * 60);
4206
- const ftsRank = topicMessages.length > 1
4207
- ? 1.0 - (idx / topicMessages.length) * 0.9
4208
- : 1.0;
4209
- return {
4210
- messageId: msg.id,
4211
- messageIndex: msg.message_index,
4212
- role: msg.role,
4213
- content: msg.text_content,
4214
- timestamp: msg.created_at,
4215
- episodeSignificance: null,
4216
- ftsRank,
4217
- ageHours,
4218
- };
4219
- });
4220
- const topTopicKeystones = rankKeystones(topicCandidates, maxAgeHours).slice(0, 10);
4221
- // Filter to messages with semantic overlap (≥2 matching terms)
4222
- const relevant = topTopicKeystones.filter(candidate => {
4223
- const contentLower = candidate.content.toLowerCase();
4224
- let matches = 0;
4225
- for (const term of queryTerms) {
4226
- if (contentLower.includes(term)) {
4227
- matches++;
4228
- if (matches >= 2)
4229
- return true;
4230
- }
4231
- }
4232
- return false;
4233
- });
4234
- if (relevant.length === 0)
4235
- continue;
4236
- // Re-score filtered candidates so they compete on the same final scale
4237
- for (const candidate of relevant) {
4238
- if (seenIds.has(candidate.messageId))
4239
- continue;
4240
- seenIds.add(candidate.messageId);
4241
- const score = scoreKeystone(candidate, maxAgeHours);
4242
- allCandidates.push({ ...candidate, score });
4243
- }
4244
- }
4245
- if (allCandidates.length === 0)
4246
- return [];
4247
- // Sort by score DESC and return top maxKeystones
4248
- return allCandidates
4249
- .sort((a, b) => b.score - a.score)
4250
- .slice(0, maxKeystones);
4251
- }
4252
- /**
4253
- * Extract lowercase key terms from a topic name and the last 3 user messages.
4254
- * Terms are: tokens with ≥4 characters (skip short stop words).
4255
- * Returns a Set for O(1) lookup.
4256
- */
4257
- extractQueryTerms(topicName, messages) {
4258
- const terms = new Set();
4259
- const MIN_TERM_LEN = 4;
4260
- // From topic name
4261
- const topicTokens = topicName.toLowerCase().match(/\b[a-z0-9]{4,}\b/g) ?? [];
4262
- for (const t of topicTokens)
4263
- terms.add(t);
4264
- // From last 3 user messages
4265
- let userCount = 0;
4266
- for (let i = messages.length - 1; i >= 0 && userCount < 3; i--) {
4267
- const msg = messages[i];
4268
- if (msg.role === 'user' && msg.textContent) {
4269
- const tokens = msg.textContent.toLowerCase().match(/\b[a-z0-9]{4,}\b/g) ?? [];
4270
- for (const t of tokens) {
4271
- if (t.length >= MIN_TERM_LEN)
4272
- terms.add(t);
4273
- }
4274
- userCount++;
4275
- }
4276
- }
4277
- return terms;
4278
- }
4279
- }
4280
- //# sourceMappingURL=compositor.js.map