@rbalchii/anchor-engine 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. package/LICENSE +609 -0
  2. package/README.md +317 -0
  3. package/anchor.bat +5 -0
  4. package/docs/API.md +314 -0
  5. package/docs/DEPLOYMENT.md +448 -0
  6. package/docs/INDEX.md +226 -0
  7. package/docs/STAR_Whitepaper_Executive.md +216 -0
  8. package/docs/TROUBLESHOOTING.md +535 -0
  9. package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
  10. package/docs/archive/adoption-guide.md +264 -0
  11. package/docs/archive/adoption-preparation.md +179 -0
  12. package/docs/archive/agent-harness-integration.md +227 -0
  13. package/docs/archive/api-reference.md +106 -0
  14. package/docs/archive/api_flows_diagram.md +118 -0
  15. package/docs/archive/architecture.md +410 -0
  16. package/docs/archive/architecture_diagram.md +174 -0
  17. package/docs/archive/broader-adoption-preparation.md +175 -0
  18. package/docs/archive/browser-paradigm-architecture.md +163 -0
  19. package/docs/archive/chat-integration.md +124 -0
  20. package/docs/archive/community-adoption-materials.md +103 -0
  21. package/docs/archive/community-adoption.md +147 -0
  22. package/docs/archive/comparison-with-siloed-solutions.md +192 -0
  23. package/docs/archive/comprehensive-docs.md +156 -0
  24. package/docs/archive/data_flow_diagram.md +251 -0
  25. package/docs/archive/enhancement-implementation-summary.md +146 -0
  26. package/docs/archive/evolution-summary.md +141 -0
  27. package/docs/archive/ingestion_pipeline_diagram.md +198 -0
  28. package/docs/archive/native-module-profiling-results.md +135 -0
  29. package/docs/archive/positioning-document.md +158 -0
  30. package/docs/archive/positioning.md +175 -0
  31. package/docs/archive/query-builder-documentation.md +218 -0
  32. package/docs/archive/quick-reference.md +40 -0
  33. package/docs/archive/quickstart.md +63 -0
  34. package/docs/archive/relationship-narrative-discovery.md +141 -0
  35. package/docs/archive/search-logic-improvement-plan.md +336 -0
  36. package/docs/archive/search_architecture_diagram.md +212 -0
  37. package/docs/archive/semantic-architecture-guide.md +97 -0
  38. package/docs/archive/sequence-diagrams.md +128 -0
  39. package/docs/archive/system_components_diagram.md +296 -0
  40. package/docs/archive/test-framework-integration.md +109 -0
  41. package/docs/archive/testing-framework-documentation.md +397 -0
  42. package/docs/archive/testing-framework-summary.md +121 -0
  43. package/docs/archive/testing-framework.md +377 -0
  44. package/docs/archive/ui-architecture.md +75 -0
  45. package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
  46. package/docs/arxiv/RELATED_WORK.tex +39 -0
  47. package/docs/arxiv/compile.bat +48 -0
  48. package/docs/arxiv/joss_response.md +33 -0
  49. package/docs/arxiv/prepare-submission.bat +46 -0
  50. package/docs/arxiv/review.md +128 -0
  51. package/docs/arxiv/star-whitepaper.tex +657 -0
  52. package/docs/code-patterns.md +289 -0
  53. package/docs/whitepaper.md +445 -0
  54. package/engine/dist/agent/runtime.d.ts +41 -0
  55. package/engine/dist/agent/runtime.d.ts.map +1 -0
  56. package/engine/dist/agent/runtime.js +73 -0
  57. package/engine/dist/agent/runtime.js.map +1 -0
  58. package/engine/dist/commands/audit-tags.d.ts +14 -0
  59. package/engine/dist/commands/audit-tags.d.ts.map +1 -0
  60. package/engine/dist/commands/audit-tags.js +180 -0
  61. package/engine/dist/commands/audit-tags.js.map +1 -0
  62. package/engine/dist/commands/distill.d.ts +19 -0
  63. package/engine/dist/commands/distill.d.ts.map +1 -0
  64. package/engine/dist/commands/distill.js +114 -0
  65. package/engine/dist/commands/distill.js.map +1 -0
  66. package/engine/dist/commands/generate-synonyms.d.ts +14 -0
  67. package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
  68. package/engine/dist/commands/generate-synonyms.js +91 -0
  69. package/engine/dist/commands/generate-synonyms.js.map +1 -0
  70. package/engine/dist/config/index.d.ts +115 -0
  71. package/engine/dist/config/index.d.ts.map +1 -0
  72. package/engine/dist/config/index.js +326 -0
  73. package/engine/dist/config/index.js.map +1 -0
  74. package/engine/dist/config/max-recall-config.d.ts +102 -0
  75. package/engine/dist/config/max-recall-config.d.ts.map +1 -0
  76. package/engine/dist/config/max-recall-config.js +102 -0
  77. package/engine/dist/config/max-recall-config.js.map +1 -0
  78. package/engine/dist/config/paths.d.ts +40 -0
  79. package/engine/dist/config/paths.d.ts.map +1 -0
  80. package/engine/dist/config/paths.js +49 -0
  81. package/engine/dist/config/paths.js.map +1 -0
  82. package/engine/dist/core/batch.d.ts +19 -0
  83. package/engine/dist/core/batch.d.ts.map +1 -0
  84. package/engine/dist/core/batch.js +37 -0
  85. package/engine/dist/core/batch.js.map +1 -0
  86. package/engine/dist/core/db.d.ts +58 -0
  87. package/engine/dist/core/db.d.ts.map +1 -0
  88. package/engine/dist/core/db.js +563 -0
  89. package/engine/dist/core/db.js.map +1 -0
  90. package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
  91. package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
  92. package/engine/dist/core/inference/ChatWorker.js +28 -0
  93. package/engine/dist/core/inference/ChatWorker.js.map +1 -0
  94. package/engine/dist/core/inference/context_manager.d.ts +49 -0
  95. package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
  96. package/engine/dist/core/inference/context_manager.js +199 -0
  97. package/engine/dist/core/inference/context_manager.js.map +1 -0
  98. package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
  99. package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
  100. package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
  101. package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
  102. package/engine/dist/core/vector.d.ts +40 -0
  103. package/engine/dist/core/vector.d.ts.map +1 -0
  104. package/engine/dist/core/vector.js +167 -0
  105. package/engine/dist/core/vector.js.map +1 -0
  106. package/engine/dist/index.d.ts +4 -0
  107. package/engine/dist/index.d.ts.map +1 -0
  108. package/engine/dist/index.js +400 -0
  109. package/engine/dist/index.js.map +1 -0
  110. package/engine/dist/middleware/auth.d.ts +14 -0
  111. package/engine/dist/middleware/auth.d.ts.map +1 -0
  112. package/engine/dist/middleware/auth.js +44 -0
  113. package/engine/dist/middleware/auth.js.map +1 -0
  114. package/engine/dist/middleware/request-tracing.d.ts +29 -0
  115. package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
  116. package/engine/dist/middleware/request-tracing.js +115 -0
  117. package/engine/dist/middleware/request-tracing.js.map +1 -0
  118. package/engine/dist/middleware/validate.d.ts +30 -0
  119. package/engine/dist/middleware/validate.d.ts.map +1 -0
  120. package/engine/dist/middleware/validate.js +117 -0
  121. package/engine/dist/middleware/validate.js.map +1 -0
  122. package/engine/dist/native/index.d.ts +106 -0
  123. package/engine/dist/native/index.d.ts.map +1 -0
  124. package/engine/dist/native/index.js +230 -0
  125. package/engine/dist/native/index.js.map +1 -0
  126. package/engine/dist/native/types.d.ts +45 -0
  127. package/engine/dist/native/types.d.ts.map +1 -0
  128. package/engine/dist/native/types.js +6 -0
  129. package/engine/dist/native/types.js.map +1 -0
  130. package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
  131. package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
  132. package/engine/dist/profiling/atomization-profiling.js +108 -0
  133. package/engine/dist/profiling/atomization-profiling.js.map +1 -0
  134. package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
  135. package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
  136. package/engine/dist/profiling/bottleneck-identification.js +249 -0
  137. package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
  138. package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
  139. package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
  140. package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
  141. package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
  142. package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
  143. package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
  144. package/engine/dist/profiling/simhash-profiling.js +168 -0
  145. package/engine/dist/profiling/simhash-profiling.js.map +1 -0
  146. package/engine/dist/routes/api.d.ts +9 -0
  147. package/engine/dist/routes/api.d.ts.map +1 -0
  148. package/engine/dist/routes/api.js +37 -0
  149. package/engine/dist/routes/api.js.map +1 -0
  150. package/engine/dist/routes/enhanced-api.d.ts +9 -0
  151. package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
  152. package/engine/dist/routes/enhanced-api.js +139 -0
  153. package/engine/dist/routes/enhanced-api.js.map +1 -0
  154. package/engine/dist/routes/health.d.ts +8 -0
  155. package/engine/dist/routes/health.d.ts.map +1 -0
  156. package/engine/dist/routes/health.js +89 -0
  157. package/engine/dist/routes/health.js.map +1 -0
  158. package/engine/dist/routes/monitoring.d.ts +8 -0
  159. package/engine/dist/routes/monitoring.d.ts.map +1 -0
  160. package/engine/dist/routes/monitoring.js +509 -0
  161. package/engine/dist/routes/monitoring.js.map +1 -0
  162. package/engine/dist/routes/v1/admin.d.ts +3 -0
  163. package/engine/dist/routes/v1/admin.d.ts.map +1 -0
  164. package/engine/dist/routes/v1/admin.js +261 -0
  165. package/engine/dist/routes/v1/admin.js.map +1 -0
  166. package/engine/dist/routes/v1/atoms.d.ts +3 -0
  167. package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
  168. package/engine/dist/routes/v1/atoms.js +172 -0
  169. package/engine/dist/routes/v1/atoms.js.map +1 -0
  170. package/engine/dist/routes/v1/backup.d.ts +3 -0
  171. package/engine/dist/routes/v1/backup.d.ts.map +1 -0
  172. package/engine/dist/routes/v1/backup.js +100 -0
  173. package/engine/dist/routes/v1/backup.js.map +1 -0
  174. package/engine/dist/routes/v1/git.d.ts +3 -0
  175. package/engine/dist/routes/v1/git.d.ts.map +1 -0
  176. package/engine/dist/routes/v1/git.js +316 -0
  177. package/engine/dist/routes/v1/git.js.map +1 -0
  178. package/engine/dist/routes/v1/ingest.d.ts +3 -0
  179. package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
  180. package/engine/dist/routes/v1/ingest.js +66 -0
  181. package/engine/dist/routes/v1/ingest.js.map +1 -0
  182. package/engine/dist/routes/v1/memory.d.ts +14 -0
  183. package/engine/dist/routes/v1/memory.d.ts.map +1 -0
  184. package/engine/dist/routes/v1/memory.js +87 -0
  185. package/engine/dist/routes/v1/memory.js.map +1 -0
  186. package/engine/dist/routes/v1/research.d.ts +3 -0
  187. package/engine/dist/routes/v1/research.d.ts.map +1 -0
  188. package/engine/dist/routes/v1/research.js +109 -0
  189. package/engine/dist/routes/v1/research.js.map +1 -0
  190. package/engine/dist/routes/v1/search.d.ts +3 -0
  191. package/engine/dist/routes/v1/search.d.ts.map +1 -0
  192. package/engine/dist/routes/v1/search.js +180 -0
  193. package/engine/dist/routes/v1/search.js.map +1 -0
  194. package/engine/dist/routes/v1/settings.d.ts +8 -0
  195. package/engine/dist/routes/v1/settings.d.ts.map +1 -0
  196. package/engine/dist/routes/v1/settings.js +211 -0
  197. package/engine/dist/routes/v1/settings.js.map +1 -0
  198. package/engine/dist/routes/v1/system.d.ts +3 -0
  199. package/engine/dist/routes/v1/system.d.ts.map +1 -0
  200. package/engine/dist/routes/v1/system.js +326 -0
  201. package/engine/dist/routes/v1/system.js.map +1 -0
  202. package/engine/dist/routes/v1/tags.d.ts +3 -0
  203. package/engine/dist/routes/v1/tags.d.ts.map +1 -0
  204. package/engine/dist/routes/v1/tags.js +102 -0
  205. package/engine/dist/routes/v1/tags.js.map +1 -0
  206. package/engine/dist/server-8080.d.ts +2 -0
  207. package/engine/dist/server-8080.d.ts.map +1 -0
  208. package/engine/dist/server-8080.js +74 -0
  209. package/engine/dist/server-8080.js.map +1 -0
  210. package/engine/dist/services/backup/backup-restore.d.ts +37 -0
  211. package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
  212. package/engine/dist/services/backup/backup-restore.js +385 -0
  213. package/engine/dist/services/backup/backup-restore.js.map +1 -0
  214. package/engine/dist/services/backup/backup.d.ts +14 -0
  215. package/engine/dist/services/backup/backup.d.ts.map +1 -0
  216. package/engine/dist/services/backup/backup.js +442 -0
  217. package/engine/dist/services/backup/backup.js.map +1 -0
  218. package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
  219. package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
  220. package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
  221. package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
  222. package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
  223. package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
  224. package/engine/dist/services/distillation/radial-distiller.js +394 -0
  225. package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
  226. package/engine/dist/services/health-check-enhanced.d.ts +89 -0
  227. package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
  228. package/engine/dist/services/health-check-enhanced.js +417 -0
  229. package/engine/dist/services/health-check-enhanced.js.map +1 -0
  230. package/engine/dist/services/idle-manager.d.ts +56 -0
  231. package/engine/dist/services/idle-manager.d.ts.map +1 -0
  232. package/engine/dist/services/idle-manager.js +210 -0
  233. package/engine/dist/services/idle-manager.js.map +1 -0
  234. package/engine/dist/services/inference/inference-service.d.ts +27 -0
  235. package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
  236. package/engine/dist/services/inference/inference-service.js +89 -0
  237. package/engine/dist/services/inference/inference-service.js.map +1 -0
  238. package/engine/dist/services/inference/inference.d.ts +59 -0
  239. package/engine/dist/services/inference/inference.d.ts.map +1 -0
  240. package/engine/dist/services/inference/inference.js +131 -0
  241. package/engine/dist/services/inference/inference.js.map +1 -0
  242. package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
  243. package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
  244. package/engine/dist/services/ingest/atomizer-service.js +982 -0
  245. package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
  246. package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
  247. package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
  248. package/engine/dist/services/ingest/content-cleaner.js +166 -0
  249. package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
  250. package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
  251. package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
  252. package/engine/dist/services/ingest/github-ingest-service.js +537 -0
  253. package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
  254. package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
  255. package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
  256. package/engine/dist/services/ingest/ingest-atomic.js +437 -0
  257. package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
  258. package/engine/dist/services/ingest/ingest.d.ts +50 -0
  259. package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
  260. package/engine/dist/services/ingest/ingest.js +230 -0
  261. package/engine/dist/services/ingest/ingest.js.map +1 -0
  262. package/engine/dist/services/ingest/watchdog.d.ts +31 -0
  263. package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
  264. package/engine/dist/services/ingest/watchdog.js +400 -0
  265. package/engine/dist/services/ingest/watchdog.js.map +1 -0
  266. package/engine/dist/services/llm/context.d.ts +6 -0
  267. package/engine/dist/services/llm/context.d.ts.map +1 -0
  268. package/engine/dist/services/llm/context.js +80 -0
  269. package/engine/dist/services/llm/context.js.map +1 -0
  270. package/engine/dist/services/llm/provider.d.ts +23 -0
  271. package/engine/dist/services/llm/provider.d.ts.map +1 -0
  272. package/engine/dist/services/llm/provider.js +338 -0
  273. package/engine/dist/services/llm/provider.js.map +1 -0
  274. package/engine/dist/services/llm/reader.d.ts +12 -0
  275. package/engine/dist/services/llm/reader.d.ts.map +1 -0
  276. package/engine/dist/services/llm/reader.js +40 -0
  277. package/engine/dist/services/llm/reader.js.map +1 -0
  278. package/engine/dist/services/mirror/mirror.d.ts +28 -0
  279. package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
  280. package/engine/dist/services/mirror/mirror.js +208 -0
  281. package/engine/dist/services/mirror/mirror.js.map +1 -0
  282. package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
  283. package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
  284. package/engine/dist/services/nlp/nlp-service.js +151 -0
  285. package/engine/dist/services/nlp/nlp-service.js.map +1 -0
  286. package/engine/dist/services/nlp/query-parser.d.ts +9 -0
  287. package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
  288. package/engine/dist/services/nlp/query-parser.js +29 -0
  289. package/engine/dist/services/nlp/query-parser.js.map +1 -0
  290. package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
  291. package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
  292. package/engine/dist/services/query-builder/DataFrame.js +263 -0
  293. package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
  294. package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
  295. package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
  296. package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
  297. package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
  298. package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
  299. package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
  300. package/engine/dist/services/query-builder/utils/export.js +130 -0
  301. package/engine/dist/services/query-builder/utils/export.js.map +1 -0
  302. package/engine/dist/services/research/researcher.d.ts +15 -0
  303. package/engine/dist/services/research/researcher.d.ts.map +1 -0
  304. package/engine/dist/services/research/researcher.js +123 -0
  305. package/engine/dist/services/research/researcher.js.map +1 -0
  306. package/engine/dist/services/scribe/scribe.d.ts +43 -0
  307. package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
  308. package/engine/dist/services/scribe/scribe.js +135 -0
  309. package/engine/dist/services/scribe/scribe.js.map +1 -0
  310. package/engine/dist/services/search/bright-nodes.d.ts +41 -0
  311. package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
  312. package/engine/dist/services/search/bright-nodes.js +117 -0
  313. package/engine/dist/services/search/bright-nodes.js.map +1 -0
  314. package/engine/dist/services/search/context-inflator.d.ts +63 -0
  315. package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
  316. package/engine/dist/services/search/context-inflator.js +649 -0
  317. package/engine/dist/services/search/context-inflator.js.map +1 -0
  318. package/engine/dist/services/search/context-manager.d.ts +34 -0
  319. package/engine/dist/services/search/context-manager.d.ts.map +1 -0
  320. package/engine/dist/services/search/context-manager.js +124 -0
  321. package/engine/dist/services/search/context-manager.js.map +1 -0
  322. package/engine/dist/services/search/distributed-query.d.ts +38 -0
  323. package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
  324. package/engine/dist/services/search/distributed-query.js +105 -0
  325. package/engine/dist/services/search/distributed-query.js.map +1 -0
  326. package/engine/dist/services/search/explore.d.ts +73 -0
  327. package/engine/dist/services/search/explore.d.ts.map +1 -0
  328. package/engine/dist/services/search/explore.js +388 -0
  329. package/engine/dist/services/search/explore.js.map +1 -0
  330. package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
  331. package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
  332. package/engine/dist/services/search/graph-context-serializer.js +435 -0
  333. package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
  334. package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
  335. package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
  336. package/engine/dist/services/search/llm-context-formatter.js +394 -0
  337. package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
  338. package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
  339. package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
  340. package/engine/dist/services/search/physics-tag-walker.js +611 -0
  341. package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
  342. package/engine/dist/services/search/query-parser.d.ts +66 -0
  343. package/engine/dist/services/search/query-parser.d.ts.map +1 -0
  344. package/engine/dist/services/search/query-parser.js +346 -0
  345. package/engine/dist/services/search/query-parser.js.map +1 -0
  346. package/engine/dist/services/search/search-utils.d.ts +100 -0
  347. package/engine/dist/services/search/search-utils.d.ts.map +1 -0
  348. package/engine/dist/services/search/search-utils.js +473 -0
  349. package/engine/dist/services/search/search-utils.js.map +1 -0
  350. package/engine/dist/services/search/search.d.ts +116 -0
  351. package/engine/dist/services/search/search.d.ts.map +1 -0
  352. package/engine/dist/services/search/search.js +1286 -0
  353. package/engine/dist/services/search/search.js.map +1 -0
  354. package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
  355. package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
  356. package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
  357. package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
  358. package/engine/dist/services/search/streaming-search.d.ts +51 -0
  359. package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
  360. package/engine/dist/services/search/streaming-search.js +94 -0
  361. package/engine/dist/services/search/streaming-search.js.map +1 -0
  362. package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
  363. package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
  364. package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
  365. package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
  366. package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
  367. package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
  368. package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
  369. package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
  370. package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
  371. package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
  372. package/engine/dist/services/semantic/semantic-search.js +649 -0
  373. package/engine/dist/services/semantic/semantic-search.js.map +1 -0
  374. package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
  375. package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
  376. package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
  377. package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
  378. package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
  379. package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
  380. package/engine/dist/services/semantic/types/semantic.js +7 -0
  381. package/engine/dist/services/semantic/types/semantic.js.map +1 -0
  382. package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
  383. package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
  384. package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
  385. package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
  386. package/engine/dist/services/system-status.d.ts +68 -0
  387. package/engine/dist/services/system-status.d.ts.map +1 -0
  388. package/engine/dist/services/system-status.js +107 -0
  389. package/engine/dist/services/system-status.js.map +1 -0
  390. package/engine/dist/services/tags/discovery.d.ts +16 -0
  391. package/engine/dist/services/tags/discovery.d.ts.map +1 -0
  392. package/engine/dist/services/tags/discovery.js +206 -0
  393. package/engine/dist/services/tags/discovery.js.map +1 -0
  394. package/engine/dist/services/tags/gliner.d.ts +18 -0
  395. package/engine/dist/services/tags/gliner.d.ts.map +1 -0
  396. package/engine/dist/services/tags/gliner.js +119 -0
  397. package/engine/dist/services/tags/gliner.js.map +1 -0
  398. package/engine/dist/services/tags/infector.d.ts +21 -0
  399. package/engine/dist/services/tags/infector.d.ts.map +1 -0
  400. package/engine/dist/services/tags/infector.js +168 -0
  401. package/engine/dist/services/tags/infector.js.map +1 -0
  402. package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
  403. package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
  404. package/engine/dist/services/tags/tag-auditor.js +283 -0
  405. package/engine/dist/services/tags/tag-auditor.js.map +1 -0
  406. package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
  407. package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
  408. package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
  409. package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
  410. package/engine/dist/services/vision/vision_service.d.ts +4 -0
  411. package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
  412. package/engine/dist/services/vision/vision_service.js +197 -0
  413. package/engine/dist/services/vision/vision_service.js.map +1 -0
  414. package/engine/dist/test-framework/core.d.ts +133 -0
  415. package/engine/dist/test-framework/core.d.ts.map +1 -0
  416. package/engine/dist/test-framework/core.js +313 -0
  417. package/engine/dist/test-framework/core.js.map +1 -0
  418. package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
  419. package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
  420. package/engine/dist/test-framework/dataset-runner.js +223 -0
  421. package/engine/dist/test-framework/dataset-runner.js.map +1 -0
  422. package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
  423. package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
  424. package/engine/dist/test-framework/diagnostic-tests.js +283 -0
  425. package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
  426. package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
  427. package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
  428. package/engine/dist/test-framework/performance-regression-tests.js +331 -0
  429. package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
  430. package/engine/dist/types/api.d.ts +53 -0
  431. package/engine/dist/types/api.d.ts.map +1 -0
  432. package/engine/dist/types/api.js +2 -0
  433. package/engine/dist/types/api.js.map +1 -0
  434. package/engine/dist/types/atomic.d.ts +42 -0
  435. package/engine/dist/types/atomic.d.ts.map +1 -0
  436. package/engine/dist/types/atomic.js +10 -0
  437. package/engine/dist/types/atomic.js.map +1 -0
  438. package/engine/dist/types/context-protocol.d.ts +137 -0
  439. package/engine/dist/types/context-protocol.d.ts.map +1 -0
  440. package/engine/dist/types/context-protocol.js +28 -0
  441. package/engine/dist/types/context-protocol.js.map +1 -0
  442. package/engine/dist/types/context.d.ts +2 -0
  443. package/engine/dist/types/context.d.ts.map +1 -0
  444. package/engine/dist/types/context.js +2 -0
  445. package/engine/dist/types/context.js.map +1 -0
  446. package/engine/dist/types/index.d.ts +20 -0
  447. package/engine/dist/types/index.d.ts.map +1 -0
  448. package/engine/dist/types/index.js +18 -0
  449. package/engine/dist/types/index.js.map +1 -0
  450. package/engine/dist/types/search.d.ts +31 -0
  451. package/engine/dist/types/search.d.ts.map +1 -0
  452. package/engine/dist/types/search.js +2 -0
  453. package/engine/dist/types/search.js.map +1 -0
  454. package/engine/dist/types/taxonomy.d.ts +137 -0
  455. package/engine/dist/types/taxonomy.d.ts.map +1 -0
  456. package/engine/dist/types/taxonomy.js +138 -0
  457. package/engine/dist/types/taxonomy.js.map +1 -0
  458. package/engine/dist/types/taxonomy.simple.d.ts +131 -0
  459. package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
  460. package/engine/dist/types/taxonomy.simple.js +132 -0
  461. package/engine/dist/types/taxonomy.simple.js.map +1 -0
  462. package/engine/dist/types/tool-call.d.ts +16 -0
  463. package/engine/dist/types/tool-call.d.ts.map +1 -0
  464. package/engine/dist/types/tool-call.js +6 -0
  465. package/engine/dist/types/tool-call.js.map +1 -0
  466. package/engine/dist/types/trace.d.ts +25 -0
  467. package/engine/dist/types/trace.d.ts.map +1 -0
  468. package/engine/dist/types/trace.js +5 -0
  469. package/engine/dist/types/trace.js.map +1 -0
  470. package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
  471. package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
  472. package/engine/dist/utils/adaptive-concurrency.js +266 -0
  473. package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
  474. package/engine/dist/utils/date_extractor.d.ts +2 -0
  475. package/engine/dist/utils/date_extractor.d.ts.map +1 -0
  476. package/engine/dist/utils/date_extractor.js +32 -0
  477. package/engine/dist/utils/date_extractor.js.map +1 -0
  478. package/engine/dist/utils/native-module-manager.d.ts +48 -0
  479. package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
  480. package/engine/dist/utils/native-module-manager.js +265 -0
  481. package/engine/dist/utils/native-module-manager.js.map +1 -0
  482. package/engine/dist/utils/native-module-profiler.d.ts +66 -0
  483. package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
  484. package/engine/dist/utils/native-module-profiler.js +182 -0
  485. package/engine/dist/utils/native-module-profiler.js.map +1 -0
  486. package/engine/dist/utils/path-manager.d.ts +59 -0
  487. package/engine/dist/utils/path-manager.d.ts.map +1 -0
  488. package/engine/dist/utils/path-manager.js +154 -0
  489. package/engine/dist/utils/path-manager.js.map +1 -0
  490. package/engine/dist/utils/performance-monitor.d.ts +92 -0
  491. package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
  492. package/engine/dist/utils/performance-monitor.js +221 -0
  493. package/engine/dist/utils/performance-monitor.js.map +1 -0
  494. package/engine/dist/utils/process-manager.d.ts +18 -0
  495. package/engine/dist/utils/process-manager.d.ts.map +1 -0
  496. package/engine/dist/utils/process-manager.js +100 -0
  497. package/engine/dist/utils/process-manager.js.map +1 -0
  498. package/engine/dist/utils/request-tracer.d.ts +131 -0
  499. package/engine/dist/utils/request-tracer.d.ts.map +1 -0
  500. package/engine/dist/utils/request-tracer.js +414 -0
  501. package/engine/dist/utils/request-tracer.js.map +1 -0
  502. package/engine/dist/utils/resource-manager.d.ts +108 -0
  503. package/engine/dist/utils/resource-manager.d.ts.map +1 -0
  504. package/engine/dist/utils/resource-manager.js +235 -0
  505. package/engine/dist/utils/resource-manager.js.map +1 -0
  506. package/engine/dist/utils/safe-dns.d.ts +14 -0
  507. package/engine/dist/utils/safe-dns.d.ts.map +1 -0
  508. package/engine/dist/utils/safe-dns.js +105 -0
  509. package/engine/dist/utils/safe-dns.js.map +1 -0
  510. package/engine/dist/utils/structured-logger.d.ts +124 -0
  511. package/engine/dist/utils/structured-logger.d.ts.map +1 -0
  512. package/engine/dist/utils/structured-logger.js +332 -0
  513. package/engine/dist/utils/structured-logger.js.map +1 -0
  514. package/engine/dist/utils/tag-cleanup.d.ts +11 -0
  515. package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
  516. package/engine/dist/utils/tag-cleanup.js +111 -0
  517. package/engine/dist/utils/tag-cleanup.js.map +1 -0
  518. package/engine/dist/utils/tag-filter.d.ts +19 -0
  519. package/engine/dist/utils/tag-filter.d.ts.map +1 -0
  520. package/engine/dist/utils/tag-filter.js +147 -0
  521. package/engine/dist/utils/tag-filter.js.map +1 -0
  522. package/engine/dist/utils/tag-modulation.d.ts +80 -0
  523. package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
  524. package/engine/dist/utils/tag-modulation.js +284 -0
  525. package/engine/dist/utils/tag-modulation.js.map +1 -0
  526. package/engine/dist/utils/timer.d.ts +40 -0
  527. package/engine/dist/utils/timer.d.ts.map +1 -0
  528. package/engine/dist/utils/timer.js +76 -0
  529. package/engine/dist/utils/timer.js.map +1 -0
  530. package/engine/dist/utils/token-utils.d.ts +19 -0
  531. package/engine/dist/utils/token-utils.d.ts.map +1 -0
  532. package/engine/dist/utils/token-utils.js +71 -0
  533. package/engine/dist/utils/token-utils.js.map +1 -0
  534. package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
  535. package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
  536. package/engine/dist/utils/wasm-module-loader.js +136 -0
  537. package/engine/dist/utils/wasm-module-loader.js.map +1 -0
  538. package/engine/package.json +105 -0
  539. package/package.json +106 -0
@@ -0,0 +1,1286 @@
1
+ /**
2
+ * Search Orchestrator — "The Brain"
3
+ *
4
+ * Core search orchestration, Tag-Walker physics engine, engram lookup,
5
+ * and result merging. All NLP parsing lives in query-parser.ts ("The Ears"),
6
+ * utilities in search-utils.ts ("The Tools"), and graph reasoning in
7
+ * bright-nodes.ts ("The Illuminator").
8
+ *
9
+ * Standard 086 Compliant.
10
+ * Standard 086 = "Dual-Strategy Search" (internal specification numbering).
11
+ * See specs/standards/STANDARD_086_DUAL_STRATEGY_SEARCH.md for full spec.
12
+ * Two modes: Standard Search (70/30 budget, temporal decay) and Max-Recall
13
+ * (zero decay, 3-hop traversal). Mode auto-selects based on token budget.
14
+ */
15
+ import { db } from '../../core/db.js';
16
+ import { createHash } from 'crypto';
17
+ import { config } from '../../config/index.js';
18
+ import { ContextInflator } from './context-inflator.js';
19
+ import { systemStatus } from '../system-status.js';
20
+ import { processWithAdaptiveConcurrency } from '../../utils/adaptive-concurrency.js';
21
+ // --- Imports from extracted modules ---
22
+ import { nlp, getGlobalTags, sanitizeFtsQuery, expandCamelCase, extractTemporalContext, splitQueryIntoMolecules, parseQuery } from './query-parser.js';
23
+ import { getHammingDistance, formatResults, filterDisplayTags } from './search-utils.js';
24
+ // Re-export everything that external consumers need
25
+ export { getGlobalTags, filterDisplayTags, parseQuery, splitQueryIntoMolecules };
26
+ export { getBrightNodes, getStructuredGraph } from './bright-nodes.js';
27
+ /**
28
+ * Lightweight semantic scoring for two-pass search (Standard 134)
29
+ * Scores candidates without expensive context inflation
30
+ */
31
+ function calculateLightweightScore(result, queryTerms, query) {
32
+ if (!result.content)
33
+ return result.score || 0;
34
+ const content = result.content.toLowerCase();
35
+ const contentWords = new Set(content.split(/\s+/).filter(w => w.length > 2));
36
+ // Term overlap score (0-1)
37
+ let termMatches = 0;
38
+ for (const term of queryTerms) {
39
+ const termLower = term.toLowerCase();
40
+ if (content.includes(termLower))
41
+ termMatches++;
42
+ }
43
+ const termScore = queryTerms.length > 0 ? termMatches / queryTerms.length : 0;
44
+ // Exact phrase bonus
45
+ const phraseBonus = content.includes(query.toLowerCase()) ? 0.3 : 0;
46
+ // Tag relevance bonus
47
+ const tagBonus = result.tags && result.tags.length > 0
48
+ ? result.tags.filter(t => queryTerms.some(qt => t.toLowerCase().includes(qt.toLowerCase()))).length * 0.1
49
+ : 0;
50
+ // Recency bonus (newer = higher score, decay over 30 days)
51
+ let recencyBonus = 0;
52
+ if (result.timestamp) {
53
+ const ageDays = (Date.now() - result.timestamp) / (1000 * 60 * 60 * 24);
54
+ recencyBonus = Math.max(0, 0.2 * (1 - ageDays / 30));
55
+ }
56
+ // Combine scores (base score + term overlap + bonuses)
57
+ const baseScore = result.score || 0.5;
58
+ return Math.min(1.0, baseScore * 0.3 + termScore * 0.5 + phraseBonus + tagBonus + recencyBonus);
59
+ }
60
+ /**
61
+ * Create or update an engram (lexical sidecar) for fast entity lookup
62
+ */
63
+ export async function createEngram(key, memoryIds) {
64
+ const normalizedKey = key.toLowerCase().trim();
65
+ const engramId = createHash('md5').update(normalizedKey).digest('hex');
66
+ const insertQuery = `INSERT INTO engrams (key, value) VALUES ($1, $2) ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value`;
67
+ await db.run(insertQuery, [engramId, JSON.stringify(memoryIds)]);
68
+ }
69
+ /**
70
+ * Lookup memories by engram key (O(1) operation)
71
+ */
72
+ export async function lookupByEngram(key) {
73
+ const normalizedKey = key.toLowerCase().trim();
74
+ const engramId = createHash('md5').update(normalizedKey).digest('hex');
75
+ const query = `SELECT value FROM engrams WHERE key = $1`;
76
+ const result = await db.run(query, [engramId]);
77
+ if (result.rows && result.rows.length > 0) {
78
+ return JSON.parse(result.rows[0].value);
79
+ }
80
+ return [];
81
+ }
82
+ /**
83
+ * Hydrate engram IDs into full SearchResult objects
84
+ */
85
+ export async function hydrateEngrams(ids) {
86
+ if (!ids || ids.length === 0)
87
+ return [];
88
+ const query = `
89
+ SELECT id, content, source_path, timestamp, buckets, tags, provenance, compound_id, start_byte, end_byte
90
+ FROM atoms
91
+ WHERE id = ANY($1)
92
+ `;
93
+ try {
94
+ const result = await db.run(query, [ids]);
95
+ return result.rows.map((row) => ({
96
+ id: row.id,
97
+ content: row.content,
98
+ source: row.source_path, // Map source_path to source
99
+ timestamp: row.timestamp,
100
+ buckets: row.buckets || [],
101
+ tags: row.tags || [],
102
+ epochs: '',
103
+ provenance: row.provenance || 'internal',
104
+ score: 1.0, // High score for direct engram hits
105
+ compound_id: row.compound_id,
106
+ start_byte: row.start_byte,
107
+ end_byte: row.end_byte
108
+ }));
109
+ }
110
+ catch (e) {
111
+ console.error('[Search] Failed to hydrate engrams:', e);
112
+ return [];
113
+ }
114
+ }
115
+ /**
116
+ * Enrich atoms with molecule tags for better contextual associations
117
+ * Fetches tags from parent molecules and merges them with atom tags
118
+ * This provides richer semantic context for LLMs viewing search results
119
+ */
120
+ async function enrichAtomsWithMoleculeTags(anchors) {
121
+ try {
122
+ // Group anchors by compound_id for efficient batch query
123
+ const anchorsByCompound = new Map();
124
+ for (const anchor of anchors) {
125
+ if (anchor.compound_id) {
126
+ if (!anchorsByCompound.has(anchor.compound_id)) {
127
+ anchorsByCompound.set(anchor.compound_id, []);
128
+ }
129
+ anchorsByCompound.get(anchor.compound_id).push(anchor);
130
+ }
131
+ }
132
+ if (anchorsByCompound.size === 0)
133
+ return;
134
+ const compoundIds = Array.from(anchorsByCompound.keys());
135
+ try {
136
+ // ⚡ Bolt Optimization: Batch fetch molecules for all compounds using ANY() to prevent N+1 queries
137
+ const molQuery = `
138
+ SELECT compound_id, tags
139
+ FROM molecules
140
+ WHERE compound_id = ANY($1) AND tags IS NOT NULL
141
+ `;
142
+ const molResult = await db.run(molQuery, [compoundIds]);
143
+ // Group molecule tags by compound_id
144
+ const tagsByCompound = new Map();
145
+ if (molResult.rows && molResult.rows.length > 0) {
146
+ for (const molRow of molResult.rows) {
147
+ const cId = molRow.compound_id;
148
+ if (!tagsByCompound.has(cId)) {
149
+ tagsByCompound.set(cId, new Set());
150
+ }
151
+ const compoundTags = tagsByCompound.get(cId);
152
+ if (molRow.tags) {
153
+ let rawTags = molRow.tags;
154
+ if (typeof rawTags === 'string') {
155
+ try {
156
+ rawTags = JSON.parse(rawTags);
157
+ }
158
+ catch {
159
+ // Malformed tags JSON for this molecule; skip this row only.
160
+ continue;
161
+ }
162
+ }
163
+ if (Array.isArray(rawTags)) {
164
+ for (const tag of rawTags) {
165
+ if (tag && typeof tag === 'string') {
166
+ compoundTags.add(tag);
167
+ }
168
+ }
169
+ }
170
+ }
171
+ }
172
+ }
173
+ // Merge molecule tags with each atom's tags
174
+ for (const [compoundId, compoundAnchors] of anchorsByCompound) {
175
+ const moleculeTags = tagsByCompound.get(compoundId);
176
+ if (moleculeTags && moleculeTags.size > 0) {
177
+ for (const anchor of compoundAnchors) {
178
+ const atomTags = anchor.tags || [];
179
+ const mergedTags = Array.from(new Set([...atomTags, ...moleculeTags]));
180
+ // Sort tags for consistency (atom tags first, then molecule tags alphabetically)
181
+ anchor.tags = mergedTags.sort();
182
+ }
183
+ }
184
+ }
185
+ }
186
+ catch (molErr) {
187
+ // Silently continue if molecule tag fetch fails, but include compoundId context for debugging
188
+ const sampleCompoundIds = compoundIds.slice(0, 5);
189
+ console.debug('[Search] Could not fetch molecule tags for compounds (count=%d, sample=%o): %o', compoundIds.length, sampleCompoundIds, molErr);
190
+ }
191
+ }
192
+ catch (e) {
193
+ console.warn('[Search] Failed to enrich atoms with molecule tags:', e);
194
+ // Continue without enrichment - this is not a critical failure
195
+ }
196
+ }
197
+ import { PhysicsTagWalker } from './physics-tag-walker.js';
198
+ import { assembleAndSerialize, assembleContextPackage } from './graph-context-serializer.js';
199
+ // ---------------------------------------------------------------------------
200
+ // Search serialization lock — only one search runs at a time to prevent
201
+ // concurrent searches from doubling peak heap usage.
202
+ // ---------------------------------------------------------------------------
203
+ let _searchLock = Promise.resolve();
204
+ function acquireSearchLock() {
205
+ let release;
206
+ const next = new Promise(resolve => { release = resolve; });
207
+ const acquired = _searchLock.then(() => release);
208
+ _searchLock = _searchLock.then(() => next);
209
+ return acquired;
210
+ }
211
+ // Memory thresholds - loaded from user_settings.json with defaults
212
+ // Standard 127/134/135: Configurable memory management
213
+ function getMemoryThresholds() {
214
+ const userSettings = config.MEMORY || {};
215
+ return {
216
+ // HEAP_PRESSURE_MB: if V8 heapUsed exceeds this, downgrade max-recall → standard
217
+ HEAP_PRESSURE_MB: userSettings.heap_pressure_mb ?? 500,
218
+ // Throttling thresholds for memory-aware search pacing
219
+ THROTTLE_START_MB: userSettings.throttle_start_mb ?? 800,
220
+ THROTTLE_MAX_MB: userSettings.throttle_max_mb ?? 1200,
221
+ EMERGENCY_STOP_MB: userSettings.emergency_stop_mb ?? 1500,
222
+ // Streaming results configuration
223
+ RESULTS_BATCH_SIZE: userSettings.search_results_batch_size ?? 20,
224
+ ENABLE_STREAMING: userSettings.enable_streaming_results ?? false
225
+ };
226
+ }
227
+ function heapUsedMB() {
228
+ return Math.round(process.memoryUsage().heapUsed / 1024 / 1024);
229
+ }
230
+ /**
231
+ * Memory-aware throttling: slows down or blocks searches based on memory pressure
232
+ * Returns true if search should proceed, false if it should be rejected
233
+ * Standard 127/134/135: Configurable memory thresholds
234
+ */
235
+ async function throttleSearchForMemory() {
236
+ const heapMB = heapUsedMB();
237
+ const thresholds = getMemoryThresholds();
238
+ // Emergency stop - reject search
239
+ if (heapMB >= thresholds.EMERGENCY_STOP_MB) {
240
+ console.warn(`[Throttle] EMERGENCY: Heap at ${heapMB}MB >= ${thresholds.EMERGENCY_STOP_MB}MB. Rejecting search.`);
241
+ return { proceed: false, delayMs: 0, reason: `Memory too high (${heapMB}MB)` };
242
+ }
243
+ // Throttle zone - reject if too high
244
+ if (heapMB >= thresholds.THROTTLE_MAX_MB) {
245
+ console.warn(`[Throttle] Heap at ${heapMB}MB >= ${thresholds.THROTTLE_MAX_MB}MB. Rejecting search temporarily.`);
246
+ return { proceed: false, delayMs: 0, reason: `Memory pressure (${heapMB}MB)` };
247
+ }
248
+ // Throttle zone - add delay based on memory pressure
249
+ if (heapMB >= thresholds.THROTTLE_START_MB) {
250
+ const pressureRatio = (heapMB - thresholds.THROTTLE_START_MB) / (thresholds.THROTTLE_MAX_MB - thresholds.THROTTLE_START_MB);
251
+ const delayMs = Math.round(pressureRatio * 10000); // Up to 10 second delay
252
+ console.log(`[Throttle] Heap at ${heapMB}MB. Delaying search by ${delayMs}ms (pressure: ${(pressureRatio * 100).toFixed(0)}%)`);
253
+ await new Promise(resolve => setTimeout(resolve, delayMs));
254
+ return { proceed: true, delayMs, reason: `Throttled (${heapMB}MB)` };
255
+ }
256
+ // Normal operation - no delay
257
+ return { proceed: true, delayMs: 0 };
258
+ }
259
+ /**
260
+ * Find Anchors (Direct Hits) - Formerly part of tagWalkerSearch
261
+ * Executes Strategy A (Atom positions) and Strategy B (Molecules FTS)
262
+ */
263
+ export async function findAnchors(query, buckets = [], tags = [], _maxChars = config.SEARCH.max_chars_default, provenance = 'all', filters, fuzzy = false) {
264
+ try {
265
+ const sanitizedQuery = sanitizeFtsQuery(query);
266
+ if (!sanitizedQuery)
267
+ return [];
268
+ // 0. Dynamic Atom Scaling
269
+ const tokenBudget = Math.floor(_maxChars / 4);
270
+ const avgTokensPerAtom = 60; // Tuned for better density
271
+ const targetAtomCount = Math.max(10, Math.ceil(tokenBudget / avgTokensPerAtom));
272
+ console.log(`[Search] Dynamic Scaling: Budget=${tokenBudget}t -> Target=${targetAtomCount} atoms`);
273
+ // Construct Query String for FTS
274
+ // Use OR ( | ) by default so multi-word queries find documents containing
275
+ // ANY of the terms, not ALL of them. AND ( & ) is too restrictive for
276
+ // conversational queries like "College Music education" — it requires all
277
+ // three words in the same molecule, which rarely matches.
278
+ // Strip English stop words before building the tsquery — 'simple' config
279
+ // does NOT filter them, so connector words like "and", "the", "or" would
280
+ // match almost every molecule and corrupt ranking.
281
+ const FTS_STOP_WORDS = new Set([
282
+ 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'been', 'being', 'but', 'by',
283
+ 'can', 'could', 'did', 'do', 'does', 'doing', 'done', 'each', 'for',
284
+ 'from', 'had', 'has', 'have', 'having', 'he', 'her', 'him', 'his',
285
+ 'how', 'i', 'if', 'in', 'is', 'it', 'its', 'itself', 'just', 'me',
286
+ 'more', 'my', 'no', 'not', 'of', 'off', 'on', 'or', 'our', 'out',
287
+ 'own', 'same', 'she', 'should', 'so', 'some', 'such', 'than', 'that',
288
+ 'the', 'their', 'them', 'then', 'there', 'these', 'they', 'this',
289
+ 'those', 'to', 'too', 'very', 'was', 'we', 'were', 'what', 'when',
290
+ 'where', 'which', 'while', 'who', 'whom', 'why', 'will', 'with',
291
+ 'would', 'you', 'your', 'yours'
292
+ ]);
293
+ const queryWords = sanitizedQuery.trim().split(/\s+/).filter(t => t.length > 0);
294
+ const contentWords = queryWords.filter(t => !FTS_STOP_WORDS.has(t));
295
+ // Fall back to full word list if stop-word stripping removed everything
296
+ const baseTerms = contentWords.length > 0 ? contentWords : queryWords;
297
+ // Expand camelCase identifiers (e.g. findAnchors → [findanchors, find, anchors])
298
+ // so FTS can match partial names and prose descriptions of the same concept.
299
+ const tsTerms = expandCamelCase(baseTerms);
300
+ let tsQueryString = tsTerms.join(' | ');
301
+ let anchors = [];
302
+ let atomResults = [];
303
+ // A. Atom Search (Radial Inflation) via ContextInflator
304
+ // Use stop-word-stripped terms (tsTerms) so we don't inflate around "and", "the", etc.
305
+ const terms = tsTerms.length > 0 ? tsTerms : sanitizedQuery.split(/\s+/).filter(t => t.length > 0);
306
+ if (terms.length > 0) {
307
+ try {
308
+ // [Standard 132] Use adaptive concurrency based on available memory
309
+ const inflations = await processWithAdaptiveConcurrency(terms, async (term) => ContextInflator.inflateFromAtomPositions(term, 150, 20, undefined, { buckets, provenance }));
310
+ let rawAtoms = inflations.flat();
311
+ // [Standard 134] Two-pass scoring: score candidates before expensive processing
312
+ // This avoids inflating low-quality candidates, saving memory and time
313
+ const scoredAtoms = rawAtoms.map(atom => ({
314
+ ...atom,
315
+ score: calculateLightweightScore(atom, terms, sanitizedQuery)
316
+ }));
317
+ // Sort by score and keep only top N (mobile: 5, desktop: 10 per term)
318
+ const isMobile = process.platform === 'android' || (await import('os')).totalmem() < 2 * 1024 * 1024 * 1024;
319
+ const maxResultsPerTerm = isMobile ? 5 : 10;
320
+ const topAtoms = scoredAtoms
321
+ .sort((a, b) => (b.score || 0) - (a.score || 0))
322
+ .slice(0, maxResultsPerTerm * terms.length);
323
+ atomResults.push(...topAtoms);
324
+ console.log(`[Search] Atom search found ${rawAtoms.length} atoms, kept top ${topAtoms.length} after scoring for terms: ${terms.join(', ')}`);
325
+ }
326
+ catch (e) {
327
+ console.error(`[Search] Atom Search failed:`, e);
328
+ }
329
+ }
330
+ anchors = atomResults;
331
+ // B. Molecule Search (Full-Text with BM25-style ranking)
332
+ let moleculeQuery = `
333
+ SELECT m.id, m.content, c.path as source, m.timestamp,
334
+ '{}'::text[] as buckets, '{}'::text[] as tags, 'epoch_placeholder' as epochs, c.provenance,
335
+ -- Use ts_rank_cd for cover-density ranking (closer to BM25)
336
+ ts_rank_cd(to_tsvector('simple', m.content), to_tsquery('simple', $1)) * 10 as score,
337
+ m.sequence, m.molecular_signature,
338
+ m.start_byte, m.end_byte, m.type, m.numeric_value, m.numeric_unit, m.compound_id
339
+ FROM molecules m
340
+ JOIN compounds c ON m.compound_id = c.id
341
+ WHERE to_tsvector('simple', m.content) @@ to_tsquery('simple', $1)
342
+ `;
343
+ const moleculeParams = [tsQueryString];
344
+ if (buckets.length > 0) {
345
+ moleculeQuery += ` AND EXISTS (
346
+ SELECT 1 FROM atoms a
347
+ WHERE a.source_path = c.path
348
+ AND a.buckets && $${moleculeParams.length + 1}
349
+ )`;
350
+ moleculeParams.push(buckets);
351
+ }
352
+ if (provenance !== 'all' && provenance !== 'quarantine') {
353
+ moleculeQuery += ` AND c.provenance = $${moleculeParams.length + 1}`;
354
+ moleculeParams.push(provenance);
355
+ }
356
+ else if (provenance === 'all') {
357
+ moleculeQuery += ` AND c.provenance != 'quarantine'`;
358
+ }
359
+ // Replace hardcoded LIMIT 50 with the intended dynamic token budget scalar
360
+ moleculeQuery += ` ORDER BY score DESC LIMIT ${targetAtomCount}`;
361
+ try {
362
+ let molResult = await db.run(moleculeQuery, moleculeParams);
363
+ // Strategy 1.1: If AND fails and query has multiple terms, retry with OR (Fuzzy Fallback)
364
+ if (molResult.rows.length === 0 && tsQueryString.includes('&')) {
365
+ console.log('[Search] Initial AND query yielded 0 results. Retrying with OR-fuzzy logic...');
366
+ // To prevent massive Cartesian product explosions in SQL, we limit the OR fallback
367
+ // to the top 8 longest words (which are statistically more likely to be unique/important).
368
+ const allTerms = sanitizedQuery.split(/\s+/).filter(t => t.length > 3);
369
+ const uniqueTerms = Array.from(new Set(allTerms));
370
+ uniqueTerms.sort((a, b) => b.length - a.length);
371
+ const topTerms = uniqueTerms.slice(0, 8);
372
+ if (topTerms.length > 0) {
373
+ const orQueryString = topTerms.join(' | ');
374
+ console.log(`[Search] OR-fuzzy fallback using terms: ${orQueryString}`);
375
+ const orQuery = moleculeQuery.replace(/\$1/g, '$1'); // Keep same param index
376
+ const orParams = [orQueryString, ...moleculeParams.slice(1)];
377
+ molResult = await db.run(orQuery, orParams);
378
+ }
379
+ }
380
+ const molecules = (molResult.rows || []).map((row) => ({
381
+ id: row.id,
382
+ content: row.content,
383
+ source: row.source,
384
+ timestamp: row.timestamp,
385
+ buckets: row.buckets,
386
+ tags: row.tags,
387
+ epochs: row.epochs,
388
+ provenance: row.provenance,
389
+ score: row.score,
390
+ sequence: row.sequence,
391
+ molecular_signature: row.molecular_signature,
392
+ start_byte: row.start_byte,
393
+ end_byte: row.end_byte,
394
+ type: row.type,
395
+ numeric_value: row.numeric_value,
396
+ numeric_unit: row.numeric_unit,
397
+ compound_id: row.compound_id
398
+ }));
399
+ // Merge atom and molecule results
400
+ anchors = [...atomResults, ...molecules];
401
+ // Deduplicate anchors using Range Merging
402
+ // Group by compound_id to find overlaps
403
+ const anchorsByCompound = new Map();
404
+ [...atomResults, ...molecules].forEach(a => {
405
+ if (!a.compound_id)
406
+ return;
407
+ if (!anchorsByCompound.has(a.compound_id)) {
408
+ anchorsByCompound.set(a.compound_id, []);
409
+ }
410
+ anchorsByCompound.get(a.compound_id).push(a);
411
+ });
412
+ anchors = [];
413
+ for (const [cId, compoundAnchors] of anchorsByCompound) {
414
+ // Sort by start byte
415
+ compoundAnchors.sort((a, b) => (a.start_byte || 0) - (b.start_byte || 0));
416
+ const merged = [];
417
+ if (compoundAnchors.length === 0)
418
+ continue;
419
+ let current = compoundAnchors[0];
420
+ for (let i = 1; i < compoundAnchors.length; i++) {
421
+ const next = compoundAnchors[i];
422
+ const currentEnd = (current.end_byte || 0);
423
+ const nextStart = (next.start_byte || 0);
424
+ const nextEnd = (next.end_byte || 0);
425
+ // LOGGING FOR DEBUGGING
426
+ // console.log(`[Dedup] Checking ${cId}: [${current.start_byte}-${currentEnd}] vs [${nextStart}-${nextEnd}]`);
427
+ // Check for overlap or adjacency (within 50 bytes)
428
+ if (nextStart <= currentEnd + 50) {
429
+ // If identical start/end, it's a true duplicate (just skip next)
430
+ if (Math.abs(nextStart - (current.start_byte || 0)) < 5 && Math.abs(nextEnd - currentEnd) < 5) {
431
+ // console.log(`[Dedup] Exact/Near match found. Skipping.`);
432
+ continue;
433
+ }
434
+ // If next is contained in current, skip next
435
+ if (nextEnd <= currentEnd) {
436
+ // console.log(`[Dedup] Next contained in Current. Skipping.`);
437
+ continue;
438
+ }
439
+ // If current is contained in next, switch to next
440
+ if ((next.start_byte || 0) <= (current.start_byte || 0) && nextEnd >= currentEnd) {
441
+ // console.log(`[Dedup] Current contained in Next. Swapping.`);
442
+ current = next;
443
+ continue;
444
+ }
445
+ // Strict Dedup: If they overlap by more than 50% (lowered from 80%), suppress the lower scored one.
446
+ const overlap = Math.min(currentEnd, nextEnd) - Math.max((current.start_byte || 0), nextStart);
447
+ const len1 = currentEnd - (current.start_byte || 0);
448
+ const len2 = nextEnd - nextStart;
449
+ if (overlap > 0 && (overlap / len1 > 0.5 || overlap / len2 > 0.5)) {
450
+ // console.log(`[Dedup] Heavy overlap (>50%). Picking better score.`);
451
+ // Keep the one with higher score, or if equal, the current (first)
452
+ if ((next.score || 0) > (current.score || 0)) {
453
+ current = next;
454
+ }
455
+ continue; // Skip the 'loser'
456
+ }
457
+ merged.push(current);
458
+ current = next;
459
+ }
460
+ else {
461
+ merged.push(current);
462
+ current = next;
463
+ }
464
+ }
465
+ merged.push(current);
466
+ anchors.push(...merged);
467
+ }
468
+ // Final Safety Net: Global Content Similarity Deduplication (O(N^2))
469
+ // Addresses:
470
+ // 1. Cross-Compound Duplicates (different IDs/provenance, same text)
471
+ // 2. Near-Exact Duplicates (whitespace diffs, timestamp diffs)
472
+ // 3. Containment (one result is a subset of another)
473
+ // 4. Overlapping Windows from same compound (NEW FIX)
474
+ const distinctAnchors = [];
475
+ // Sort by score desc to prioritize best matches
476
+ anchors.sort((a, b) => (b.score || 0) - (a.score || 0));
477
+ // Helper for normalization: lowercase + remove non-alphanumeric + unescape JSON
478
+ const normalize = (s) => {
479
+ // First unescape JSON strings (\\\" → ", \\n → newline, etc.)
480
+ let unescaped = s;
481
+ try {
482
+ // Try to unescape common JSON escape sequences
483
+ unescaped = s
484
+ .replace(/\\"/g, '"')
485
+ .replace(/\\\\/g, '\\')
486
+ .replace(/\\n/g, '\n')
487
+ .replace(/\\r/g, '\r')
488
+ .replace(/\\t/g, '\t');
489
+ }
490
+ catch (e) {
491
+ // If unescaping fails, use original
492
+ }
493
+ return unescaped.toLowerCase().replace(/[^a-z0-9]/g, '');
494
+ };
495
+ // Helper for content fingerprinting (hash-based dedup across files)
496
+ const crypto = await import('crypto');
497
+ const contentFingerprints = new Map(); // hash -> kept result
498
+ // Track kept ranges per compound to detect sliding window duplicates
499
+ const keptRanges = new Map();
500
+ for (const candidate of anchors) {
501
+ if (!candidate.content || candidate.content.length < 20) {
502
+ distinctAnchors.push(candidate);
503
+ continue;
504
+ }
505
+ // C. Content Fingerprint Deduplication (ACROSS different files)
506
+ // Hash the normalized content to catch duplicates from different compounds
507
+ const candidateNorm = normalize(candidate.content);
508
+ const contentHash = crypto.createHash('md5').update(candidateNorm.substring(0, 500)).digest('hex');
509
+ if (contentFingerprints.has(contentHash)) {
510
+ // This content already exists from another file - skip it
511
+ continue;
512
+ }
513
+ contentFingerprints.set(contentHash, candidate);
514
+ // A. Geometric Deduplication (if compound_id is available)
515
+ let isGeometricDuplicate = false;
516
+ if (candidate.compound_id && candidate.start_byte !== undefined && candidate.end_byte !== undefined) {
517
+ const ranges = keptRanges.get(candidate.compound_id) || [];
518
+ for (const range of ranges) {
519
+ // Check overlap - LOWERED threshold from 75% to 50% for aggressive dedup
520
+ const overlapStart = Math.max(candidate.start_byte, range.start);
521
+ const overlapEnd = Math.min(candidate.end_byte, range.end);
522
+ const overlapLen = Math.max(0, overlapEnd - overlapStart);
523
+ const candidateLen = candidate.end_byte - candidate.start_byte;
524
+ const rangeLen = range.end - range.start;
525
+ const minLen = Math.min(candidateLen, rangeLen);
526
+ // If overlap is > 50% of either window, it's a duplicate
527
+ if (overlapLen > 0 && (overlapLen >= minLen * 0.5)) {
528
+ isGeometricDuplicate = true;
529
+ break;
530
+ }
531
+ // Check if windows are adjacent or overlapping (within 500 bytes for molecules)
532
+ // Molecules can be large, so use larger threshold
533
+ const gap = Math.max(0, overlapStart - overlapEnd);
534
+ const adjacencyThreshold = Math.max(500, Math.min(candidateLen, rangeLen) * 0.2);
535
+ if (gap >= 0 && gap < adjacencyThreshold) {
536
+ isGeometricDuplicate = true;
537
+ break;
538
+ }
539
+ }
540
+ if (isGeometricDuplicate)
541
+ continue;
542
+ }
543
+ // B. Content Deduplication (Fallback)
544
+ const candidateFingerprint = candidateNorm.substring(0, 100);
545
+ let isContentDuplicate = false;
546
+ for (const kept of distinctAnchors) {
547
+ const keptNorm = normalize(kept.content);
548
+ // 1. Exact Containment (Candidate is subset of Kept, or vice-versa)
549
+ if (keptNorm.includes(candidateNorm)) {
550
+ isContentDuplicate = true;
551
+ break;
552
+ }
553
+ if (candidateNorm.includes(keptNorm)) {
554
+ isContentDuplicate = true;
555
+ break;
556
+ }
557
+ // 2. Fuzzy Prefix Match - INCREASED check length to 50 for better matching
558
+ const keptFingerprint = keptNorm.substring(0, 100);
559
+ const checkLen = Math.min(candidateFingerprint.length, keptFingerprint.length);
560
+ if (checkLen > 50 && candidateFingerprint.substring(0, checkLen) === keptFingerprint.substring(0, checkLen)) {
561
+ isContentDuplicate = true;
562
+ break;
563
+ }
564
+ // 3. SimHash Distance Check - Cross-file near-duplicates (NEW)
565
+ // Hamming distance < 5 out of 64 bits = near-duplicate content
566
+ if (candidate.molecular_signature && kept.molecular_signature) {
567
+ const simhashDistance = getHammingDistance(candidate.molecular_signature, kept.molecular_signature);
568
+ if (simhashDistance < 5) {
569
+ isContentDuplicate = true;
570
+ break;
571
+ }
572
+ }
573
+ }
574
+ if (!isContentDuplicate) {
575
+ distinctAnchors.push(candidate);
576
+ // Register range
577
+ if (candidate.compound_id && candidate.start_byte !== undefined && candidate.end_byte !== undefined) {
578
+ const ranges = keptRanges.get(candidate.compound_id) || [];
579
+ ranges.push({ start: candidate.start_byte, end: candidate.end_byte, content: candidate.content });
580
+ keptRanges.set(candidate.compound_id, ranges);
581
+ }
582
+ }
583
+ }
584
+ const originalCount = anchors.length;
585
+ anchors = distinctAnchors;
586
+ console.log(`[Search] Final Dedup: ${originalCount} -> ${anchors.length} items. Removed ${originalCount - anchors.length} duplicates.`);
587
+ console.log(`[Search] Anchors found: ${atomResults.length} Atoms, ${molecules.length} Molecules. Final Unique: ${anchors.length}`);
588
+ }
589
+ catch (e) {
590
+ console.error('[Search] Molecule search failed:', e);
591
+ anchors = atomResults;
592
+ }
593
+ // Intercept: Read content from Mirror (if source_path exists)
594
+ // For atoms without source files (chat history), keep DB content
595
+ const { getMirrorPath } = await import('../mirror/mirror.js');
596
+ const fs = await import('fs');
597
+ // Parallelize mirror reads for performance (non-blocking I/O)
598
+ await Promise.all(anchors.map(async (anchor) => {
599
+ // Skip mirror read if no source_path (chat history atoms)
600
+ if (!anchor.source || anchor.source.trim() === '') {
601
+ return; // Keep DB content
602
+ }
603
+ try {
604
+ // Calculate Mirror Path
605
+ const mirrorPath = getMirrorPath(anchor.source, anchor.provenance);
606
+ // Check if exists and read async
607
+ try {
608
+ const liveContent = await fs.promises.readFile(mirrorPath, 'utf-8');
609
+ if (liveContent && liveContent.length > 0) {
610
+ anchor.content = liveContent;
611
+ }
612
+ }
613
+ catch (err) {
614
+ // Ignore ENOENT (file missing) or other read errors
615
+ }
616
+ }
617
+ catch (e) {
618
+ // Fail silently -> Keep DB content
619
+ }
620
+ }));
621
+ // === TAG ENRICHMENT: Merge molecule tags with atom tags ===
622
+ // This provides richer contextual associations for LLMs by showing
623
+ // all tags from the parent molecule(s) alongside atom tags
624
+ await enrichAtomsWithMoleculeTags(anchors);
625
+ return anchors;
626
+ }
627
+ catch (e) {
628
+ console.error('[Search] findAnchors failed:', e);
629
+ return [];
630
+ }
631
+ }
632
+ /**
633
+ * Execute search with Intelligent Expansion and Physics Tag-Walker Protocol (GCP)
634
+ *
635
+ * @param query - Search query string
636
+ * @param buckets - Array of buckets to search
637
+ * @param maxChars - Maximum characters to return
638
+ * @param provenance - Provenance filter (internal/external/quarantine/all)
639
+ * @param explicitTags - Explicit tags to filter by
640
+ * @param filters - Additional filters
641
+ * @param useMaxRecall - If true, uses MAX_RECALL_CONFIG for comprehensive retrieval
642
+ * @param userContext - User context for personalization
643
+ */
644
+ export async function executeSearch(query, buckets, maxChars = config.SEARCH.max_chars_default, provenance = 'all', explicitTags = [], filters, useMaxRecall = false, userContext) {
645
+ console.log(`[Search] executeSearch (Physics Engine V2) called with provenance: ${provenance}`);
646
+ const startTime = Date.now();
647
+ // Serialize searches — only one at a time to keep peak heap predictable.
648
+ // Concurrent searches on a large corpus (214K+ atoms) double peak memory usage.
649
+ const release = await acquireSearchLock();
650
+ try {
651
+ return await _executeSearchInternal(query, buckets, maxChars, provenance, explicitTags, filters, useMaxRecall, userContext, startTime);
652
+ }
653
+ finally {
654
+ release();
655
+ if (typeof global.gc === 'function')
656
+ global.gc();
657
+ }
658
+ }
659
+ async function _executeSearchInternal(query, buckets, maxChars = config.SEARCH.max_chars_default, provenance = 'all', explicitTags = [], filters, useMaxRecall = false, userContext, startTime = Date.now()) {
660
+ // Memory-aware throttling: slow down or reject searches based on memory pressure
661
+ const throttleResult = await throttleSearchForMemory();
662
+ if (!throttleResult.proceed) {
663
+ throw new Error(`Search rejected: ${throttleResult.reason}. Please wait and try again.`);
664
+ }
665
+ // Memory pressure check: if heap is already near the limit, downgrade max-recall
666
+ // to standard search to avoid OOM. Trades result depth for stability.
667
+ const heapMB = heapUsedMB();
668
+ const thresholds = getMemoryThresholds();
669
+ if (useMaxRecall && heapMB > thresholds.HEAP_PRESSURE_MB) {
670
+ console.warn(`[Search] Memory pressure detected (${heapMB}MB heap). Downgrading max-recall → standard search.`);
671
+ useMaxRecall = false;
672
+ maxChars = Math.min(maxChars, config.SEARCH.max_chars_default);
673
+ }
674
+ // Check if system is busy with ingestion
675
+ const status = systemStatus.getStatus();
676
+ if (status.isBusy) {
677
+ // Wait for ingestion to finish before running search.
678
+ // Concurrent search+ingestion causes O(N) memory pressure that can exceed the heap limit
679
+ // (e.g. 207K molecules sharing a compound_id → physics walker cross product crashes at 8GB).
680
+ const maxWaitMs = 180_000; // 3 minutes
681
+ const pollMs = 1_000;
682
+ let waited = 0;
683
+ console.log(`[Search] System busy (${status.state}), waiting for idle before proceeding...`);
684
+ while (systemStatus.getStatus().isBusy && waited < maxWaitMs) {
685
+ await new Promise(r => setTimeout(r, pollMs));
686
+ waited += pollMs;
687
+ }
688
+ if (systemStatus.getStatus().isBusy) {
689
+ console.warn(`[Search] System still busy after ${waited}ms, proceeding with risk.`);
690
+ }
691
+ else {
692
+ console.log(`[Search] System became idle after ${waited}ms, proceeding with search.`);
693
+ }
694
+ }
695
+ // 1. Parse & Prepare
696
+ const cleanQuery = query; // Simplified for now, real NLP parsing happens in findAnchors/query-parser calls if needed
697
+ const realBuckets = new Set(buckets || []);
698
+ if (explicitTags.length > 0)
699
+ console.log(`[Search] Explicit tags: ${explicitTags.join(', ')}`);
700
+ // 2. Find Anchors (Planets)
701
+ // Combine Engram Lookup + FTS + Molecule Search
702
+ const engramIds = await lookupByEngram(cleanQuery);
703
+ const engramResults = await hydrateEngrams(engramIds);
704
+ let primaryAnchors = await findAnchors(cleanQuery, Array.from(realBuckets), explicitTags, maxChars, provenance, filters);
705
+ // Tag-Aware Fallback (if low precision/recall on initial anchors)
706
+ if (primaryAnchors.length < 5) {
707
+ console.log(`[Search] Low recall (${primaryAnchors.length} anchors). Attempting Tag-Aware Fallback.`);
708
+ const words = cleanQuery.split(/[\s,]+/);
709
+ // Very naive tag extraction: words > 4 chars, capitalize or check if exists in a tag format.
710
+ // Usually, users type things like "graph nodes consciousness". We can try to use these as tags via LIKE query.
711
+ const fallbackTags = words.filter(w => w.length > 3).map(w => w.toLowerCase());
712
+ if (fallbackTags.length > 0) {
713
+ // Simple programmatic fallback to explicitly look for these terms in the DB tags
714
+ try {
715
+ for (const fbTag of fallbackTags) {
716
+ // PostgreSQL array search - check if tag exists in array
717
+ const tagRes = await db.run(`
718
+ SELECT id, content, source_path, timestamp, buckets, tags, provenance, simhash, embedding, compound_id, start_byte, end_byte
719
+ FROM atoms
720
+ WHERE $1 = ANY(tags)
721
+ LIMIT 20
722
+ `, [fbTag]);
723
+ if (tagRes.rows && tagRes.rows.length > 0) {
724
+ tagRes.rows.forEach((row) => {
725
+ primaryAnchors.push({
726
+ id: String(row.id),
727
+ content: row.content,
728
+ source: row.source_path,
729
+ timestamp: row.timestamp || Date.now(),
730
+ buckets: typeof row.buckets === 'string' ? JSON.parse(row.buckets) : (row.buckets || []),
731
+ tags: typeof row.tags === 'string' ? JSON.parse(row.tags) : (row.tags || []),
732
+ epochs: '',
733
+ provenance: row.provenance,
734
+ score: 0.8, // fallback constant score
735
+ compound_id: row.compound_id,
736
+ start_byte: row.start_byte,
737
+ end_byte: row.end_byte,
738
+ molecular_signature: String(row.simhash)
739
+ });
740
+ });
741
+ }
742
+ }
743
+ // Enrich fallback results with molecule tags
744
+ if (primaryAnchors.length > 0) {
745
+ await enrichAtomsWithMoleculeTags(primaryAnchors);
746
+ }
747
+ }
748
+ catch (e) {
749
+ console.warn('[Search] Tag-aware fallback failed', e);
750
+ }
751
+ }
752
+ }
753
+ const allAnchors = [...engramResults, ...primaryAnchors];
754
+ // Enrich engram results with molecule tags (findAnchors already does this internally)
755
+ if (engramResults.length > 0) {
756
+ await enrichAtomsWithMoleculeTags(engramResults);
757
+ }
758
+ // Deduplicate
759
+ const seenIds = new Set();
760
+ const uniqueAnchors = allAnchors.filter(r => {
761
+ if (seenIds.has(r.id))
762
+ return false;
763
+ seenIds.add(r.id);
764
+ return true;
765
+ });
766
+ // 3. Physics Walker (Moons) - Use TypeScript PhysicsTagWalker
767
+ let walkerResults = [];
768
+ try {
769
+ // Separate real DB IDs from virtual in-memory molecules created by ContextInflator.
770
+ // Virtual IDs (any prefix starting with 'virtual') have no row in atoms/molecules tables.
771
+ // For each virtual anchor, use its compound_id to find the nearest real molecule.
772
+ const realIds = uniqueAnchors
773
+ .map(a => a.id)
774
+ .filter(id => id && id !== '' && !id.startsWith('virtual'));
775
+ // Collect unique compound_ids from virtual anchors so we can resolve them to real mol_* IDs.
776
+ const virtualCompoundIds = [...new Set(uniqueAnchors
777
+ .filter(a => a.id && a.id.startsWith('virtual') && a.compound_id)
778
+ .map(a => a.compound_id))];
779
+ let resolvedMolIds = [];
780
+ if (virtualCompoundIds.length > 0) {
781
+ try {
782
+ const res = await db.run(`SELECT id FROM molecules WHERE compound_id = ANY($1) ORDER BY timestamp DESC LIMIT 100`, [virtualCompoundIds]);
783
+ if (res.rows)
784
+ resolvedMolIds = res.rows.map((r) => String(r.id));
785
+ }
786
+ catch (e) {
787
+ console.warn('[Search] Failed to resolve virtual compound IDs:', e.message);
788
+ }
789
+ }
790
+ const anchorIds = [...new Set([...realIds, ...resolvedMolIds])];
791
+ // Round-robin by compound_id so the walker sees anchors from diverse source
792
+ // documents rather than 30 IDs all from the same file.
793
+ const diverseAnchorIds = [];
794
+ {
795
+ const byCompound = new Map();
796
+ for (const a of uniqueAnchors) {
797
+ if (!a.id || a.id.startsWith('virtual'))
798
+ continue;
799
+ const cid = a.compound_id || '__unknown__';
800
+ if (!byCompound.has(cid))
801
+ byCompound.set(cid, []);
802
+ byCompound.get(cid).push(a.id);
803
+ }
804
+ // Append resolved mol IDs (from virtual compounds) under their compound bucket
805
+ for (const molId of resolvedMolIds) {
806
+ const cid = '__virtual__';
807
+ if (!byCompound.has(cid))
808
+ byCompound.set(cid, []);
809
+ byCompound.get(cid).push(molId);
810
+ }
811
+ const groups = [...byCompound.values()];
812
+ const maxRound = Math.max(...groups.map(g => g.length));
813
+ for (let i = 0; i < maxRound; i++) {
814
+ for (const group of groups) {
815
+ if (i < group.length)
816
+ diverseAnchorIds.push(group[i]);
817
+ }
818
+ }
819
+ }
820
+ const dedupedAnchorIds = [...new Set(diverseAnchorIds)];
821
+ if (dedupedAnchorIds.length > 0) {
822
+ // Use TypeScript PhysicsTagWalker for radial inflation
823
+ const walker = new PhysicsTagWalker();
824
+ walkerResults = await walker.performRadialInflation(dedupedAnchorIds, 1, // radius (1 hop)
825
+ useMaxRecall ? 300 : 150, // maxPerHop (results returned; fetches 3x candidates)
826
+ 0.2, // temperature
827
+ 0.001 // gravityThreshold (lowered from 0.005 for sparser graphs)
828
+ );
829
+ console.log(`[Search] PhysicsTagWalker found ${walkerResults.length} associations`);
830
+ }
831
+ else {
832
+ console.log(`[Search] No valid anchor IDs for Physics Walker`);
833
+ }
834
+ }
835
+ catch (e) {
836
+ console.log(`[Search] Physics Walker failed, skipping: ${e.message}`);
837
+ walkerResults = [];
838
+ }
839
+ // 4. Graph-Context Serialization (GCP)
840
+ const finalUserContext = {
841
+ name: userContext?.name || 'User',
842
+ current_state: userContext?.current_state || 'active'
843
+ };
844
+ const contextPackage = assembleContextPackage({
845
+ user: finalUserContext,
846
+ query: cleanQuery,
847
+ keyTerms: cleanQuery.split(' '),
848
+ scopeTags: explicitTags,
849
+ anchors: uniqueAnchors,
850
+ walkerResults: walkerResults,
851
+ charBudget: maxChars
852
+ });
853
+ const serializedContext = assembleAndSerialize({
854
+ user: finalUserContext,
855
+ query: cleanQuery,
856
+ keyTerms: cleanQuery.split(' '),
857
+ scopeTags: explicitTags,
858
+ anchors: uniqueAnchors,
859
+ walkerResults: walkerResults,
860
+ charBudget: maxChars
861
+ });
862
+ console.log(`[Search] Search completed in ${Date.now() - startTime}ms`);
863
+ // Map back to SearchResult[] for legacy API compatibility
864
+ // Combine Anchors + Walker Results, sorted by score desc
865
+ const combinedResults = [
866
+ ...uniqueAnchors,
867
+ ...walkerResults.map(w => ({
868
+ ...w.result,
869
+ physics: w.physics
870
+ }))
871
+ ];
872
+ // Cap total results fed to formatResults to prevent OOM.
873
+ // 100KB per snippet cap in inflateSnippetFromDisk bounds memory per snippet,
874
+ // but 900+ snippets * 100KB = still huge. Limit by budget: budget / 200 chars minimum
875
+ // gives a rough upper bound on useful snippets.
876
+ const maxResultsForBudget = Math.min(combinedResults.length, Math.max(200, Math.ceil(maxChars / 200)));
877
+ const cappedResults = combinedResults
878
+ .sort((a, b) => (b.score || 0) - (a.score || 0))
879
+ .slice(0, maxResultsForBudget);
880
+ // Apply context provenance formatting with coalescing (Standard 108)
881
+ // Enable coalescing for high-budget queries to improve coherence
882
+ const enableCoalescing = maxChars > 16000; // Only coalesce for budgets > 16k chars
883
+ const proximityThreshold = maxChars > 100000 ? 800 : 500; // Larger threshold for max-recall
884
+ console.log(`[Search] Coalescing: ${enableCoalescing ? 'enabled' : 'disabled'} (threshold: ${proximityThreshold}px)`);
885
+ const formatted = await formatResults(cappedResults, maxChars, {
886
+ enableCoalescing,
887
+ proximityThreshold
888
+ });
889
+ return {
890
+ context: serializedContext,
891
+ results: formatted.results,
892
+ toAgentString: () => serializedContext,
893
+ metadata: { ...contextPackage.graphStats, ...formatted.metadata }
894
+ };
895
+ }
896
+ /**
897
+ * Execute molecule-based search - splits query into sentence-like chunks and searches each separately
898
+ */
899
+ export async function executeMoleculeSearch(query, bucket, buckets, maxChars = config.SEARCH.max_chars_default, deep = false, provenance = 'all', explicitTags = [], userContext) {
900
+ // Memory-aware throttling
901
+ const throttleResult = await throttleSearchForMemory();
902
+ if (!throttleResult.proceed) {
903
+ throw new Error(`Search rejected: ${throttleResult.reason}. Please wait and try again.`);
904
+ }
905
+ // Split the query into molecules (sentence-like chunks)
906
+ const molecules = splitQueryIntoMolecules(query);
907
+ console.log(`[MoleculeSearch] Split query into ${molecules.length} molecules:`, molecules);
908
+ // Search each molecule separately
909
+ const allResults = [];
910
+ const includedIds = new Set();
911
+ for (const [index, molecule] of molecules.entries()) {
912
+ console.log(`[MoleculeSearch] Searching molecule ${index + 1}/${molecules.length}: "${molecule}"`);
913
+ try {
914
+ // Execute search for this specific molecule
915
+ const result = await executeSearch(molecule, buckets, maxChars, provenance, explicitTags, undefined, false, userContext);
916
+ // Add unique results to our collection
917
+ for (const item of result.results) {
918
+ if (!includedIds.has(item.id)) {
919
+ allResults.push(item);
920
+ includedIds.add(item.id);
921
+ }
922
+ }
923
+ }
924
+ catch (error) {
925
+ console.error(`[MoleculeSearch] Error searching molecule:`, molecule, error);
926
+ // Continue with other molecules even if one fails
927
+ }
928
+ }
929
+ // Sort results by score
930
+ allResults.sort((a, b) => b.score - a.score);
931
+ console.log(`[MoleculeSearch] Combined results from ${molecules.length} molecules: ${allResults.length} total results`);
932
+ return await formatResults(allResults, maxChars); // Use original maxChars to maintain token budget
933
+ }
934
+ /**
935
+ * Traditional FTS fallback
936
+ */
937
+ export async function runTraditionalSearch(query, buckets) {
938
+ const sanitizedQuery = sanitizeFtsQuery(query);
939
+ if (!sanitizedQuery)
940
+ return [];
941
+ let querySql = `
942
+ SELECT a.id,
943
+ ts_rank(to_tsvector('simple', a.content), plainto_tsquery('simple', $1)) as score,
944
+ a.content, a.source_path as source, a.timestamp,
945
+ a.buckets, a.tags, 'epoch_placeholder' as epochs, a.provenance
946
+ FROM atoms a
947
+ WHERE to_tsvector('simple', a.content) @@ plainto_tsquery('simple', $1)
948
+ `;
949
+ if (buckets.length > 0) {
950
+ querySql += ` AND EXISTS (
951
+ SELECT 1 FROM unnest(a.buckets) as bucket WHERE bucket = ANY($2)
952
+ )`;
953
+ }
954
+ querySql += ` ORDER BY score DESC`;
955
+ try {
956
+ const result = await db.run(querySql, buckets.length > 0 ? [sanitizedQuery, buckets] : [sanitizedQuery]);
957
+ if (!result.rows)
958
+ return [];
959
+ const mappedResults = result.rows.map((row) => ({
960
+ id: row.id,
961
+ score: row.score,
962
+ content: row.content,
963
+ source: row.source,
964
+ timestamp: row.timestamp,
965
+ buckets: row.buckets,
966
+ tags: row.tags,
967
+ epochs: row.epochs,
968
+ provenance: row.provenance
969
+ }));
970
+ await hydrateFromMirror(mappedResults);
971
+ return mappedResults;
972
+ }
973
+ catch (e) {
974
+ console.error('[Search] FTS failed', e);
975
+ return [];
976
+ }
977
+ }
978
+ /**
979
+ * Helper to hydrate results from Mirror (Code Reuse)
980
+ */
981
+ async function hydrateFromMirror(results) {
982
+ try {
983
+ const { getMirrorPath } = await import('../mirror/mirror.js');
984
+ const fs = await import('fs');
985
+ await Promise.all(results.map(async (res) => {
986
+ try {
987
+ const mirrorPath = getMirrorPath(res.source, res.provenance);
988
+ try {
989
+ const content = await fs.promises.readFile(mirrorPath, 'utf-8');
990
+ if (content)
991
+ res.content = content;
992
+ }
993
+ catch (err) {
994
+ // ignore file not found
995
+ }
996
+ }
997
+ catch (e) { /* ignore */ }
998
+ }));
999
+ }
1000
+ catch (e) { /* ignore */ }
1001
+ }
1002
+ /**
1003
+ * Iterative Search with Back-off Strategy
1004
+ * Attempts to retrieve results by progressively simplifying the query.
1005
+ *
1006
+ * @param useMaxRecall - If true, uses MAX_RECALL_CONFIG for comprehensive retrieval
1007
+ */
1008
+ export async function iterativeSearch(query, buckets = [], maxChars = config.SEARCH.max_chars_default, tags = [], provenance = 'all', useMaxRecall = false, userContext) {
1009
+ // Memory-aware throttling
1010
+ const throttleResult = await throttleSearchForMemory();
1011
+ if (!throttleResult.proceed) {
1012
+ throw new Error(`Search rejected: ${throttleResult.reason}. Please wait and try again.`);
1013
+ }
1014
+ // 0. Extract Scope Tags (Hashtags) to preserve them across strategies
1015
+ // We want to make sure if user typed "#work", it stays even if we strip adjectives.
1016
+ const scopeTags = [...tags];
1017
+ const queryParts = query.split(/\s+/);
1018
+ queryParts.forEach(part => {
1019
+ if (part.startsWith('#'))
1020
+ scopeTags.push(part);
1021
+ });
1022
+ const tagsString = scopeTags.join(' ');
1023
+ // Strategy 1: Standard Expanded Search (All Nouns, Verbs, Dates + Expansion)
1024
+ console.log(`[IterativeSearch] Strategy 1: Standard Execution`);
1025
+ let results = await executeSearch(query, buckets, maxChars, provenance, tags, undefined, useMaxRecall, userContext);
1026
+ if (results.results.length > 0)
1027
+ return { ...results, attempt: 1 };
1028
+ // Strategy 2: Strict "Subjects & Time" (Strip Verbs/Adjectives, keep Nouns + Dates)
1029
+ console.log(`[IterativeSearch] Strategy 2: Strict Nouns/Dates`);
1030
+ const temporalContext = extractTemporalContext(query);
1031
+ const doc = nlp.readDoc(query);
1032
+ const nouns = doc.tokens().filter((t) => {
1033
+ const tag = t.out(nlp.its.pos);
1034
+ return tag === 'NOUN' || tag === 'PROPN';
1035
+ }).out(nlp.its.text);
1036
+ const uniqueTokens = new Set([...nouns, ...temporalContext]);
1037
+ if (uniqueTokens.size > 0) {
1038
+ // Re-inject scope tags
1039
+ const strictQuery = Array.from(uniqueTokens).join(' ') + ' ' + tagsString;
1040
+ console.log(`[IterativeSearch] Fallback Query 1: "${strictQuery.trim()}"`);
1041
+ results = await executeSearch(strictQuery, buckets, maxChars, provenance, tags, undefined, false, userContext);
1042
+ if (results.results.length > 0)
1043
+ return { ...results, attempt: 2 };
1044
+ }
1045
+ // Strategy 3: "Just the Dates" (If query heavily implies time)
1046
+ // Sometimes "2025" is the only anchor we have if keywords fail.
1047
+ // Or maybe just "Proper Nouns" (Entities).
1048
+ const propNouns = doc.tokens().filter((t) => t.out(nlp.its.pos) === 'PROPN').out(nlp.its.text);
1049
+ // Re-inject scope tags
1050
+ const entityQuery = [...new Set([...propNouns, ...temporalContext])].join(' ') + ' ' + tagsString;
1051
+ if (entityQuery.trim().length > 0 && entityQuery.trim() !== (Array.from(uniqueTokens).join(' ') + ' ' + tagsString).trim()) {
1052
+ console.log(`[IterativeSearch] Fallback Query 2: "${entityQuery.trim()}"`);
1053
+ results = await executeSearch(entityQuery, buckets, maxChars, provenance, tags, undefined, false, userContext);
1054
+ if (results.results.length > 0)
1055
+ return { ...results, attempt: 3 };
1056
+ }
1057
+ return { ...results, attempt: 4 }; // Return empty result if all fail
1058
+ }
1059
+ /**
1060
+ * Smart Chat Search (The "Markovian" Context Gatherer)
1061
+ * Logic:
1062
+ * 1. Try standard Iterative Search.
1063
+ * 2. If Recall is Low (< 10 atoms), TRIGGER SPLIT.
1064
+ * 3. Split Query into Top Entities (Alice, Bob, etc.).
1065
+ * 4. Run Parallel Searches for each entity.
1066
+ * 5. Aggregate & Deduplicate.
1067
+ *
1068
+ * @param useMaxRecall - If true, uses MAX_RECALL_CONFIG for comprehensive retrieval
1069
+ */
1070
+ export async function smartChatSearch(query, buckets = [], maxChars = 20000, tags = [], provenance = 'all', useMaxRecall = false, userContext) {
1071
+ const isLongQuery = query.length > 100;
1072
+ let initial = { results: [], context: '', toAgentString: () => '' };
1073
+ // 1. Initial Attempt (Skip if it's a massive max-recall query to force chunking)
1074
+ if (!isLongQuery || !useMaxRecall) {
1075
+ initial = await iterativeSearch(query, buckets, maxChars, tags, provenance, useMaxRecall, userContext);
1076
+ // If we have enough results, returns immediately
1077
+ if (initial.results.length >= 10 && !useMaxRecall) {
1078
+ return { ...initial, strategy: 'standard' };
1079
+ }
1080
+ // Max-recall initial search already runs with full budget and 1639-atom target —
1081
+ // parallel sub-query split would just run 3 more full-budget searches simultaneously,
1082
+ // tripling memory. Return here.
1083
+ if (useMaxRecall && initial.results.length > 0) {
1084
+ return { ...initial, strategy: 'max-recall' };
1085
+ }
1086
+ }
1087
+ console.log(`[SmartSearch] Triggering Multi-Query Split...`);
1088
+ // 2. Extract Entities for Split Search
1089
+ let splitQueries = [];
1090
+ if (isLongQuery && useMaxRecall) {
1091
+ // Chunk the query into groups of 3-4 words for massive keyword lists
1092
+ const words = query.split(/\s+/).filter(w => w.length > 2);
1093
+ for (let i = 0; i < words.length; i += 4) {
1094
+ splitQueries.push(words.slice(i, i + 4).join(' '));
1095
+ }
1096
+ // Limit to top 5 chunks to avoid blowing up the DB
1097
+ splitQueries = splitQueries.slice(0, 5);
1098
+ }
1099
+ else {
1100
+ const doc = nlp.readDoc(query);
1101
+ // Get Proper Nouns (Entities) and regular Nouns
1102
+ // We prioritize PROPN (High Value)
1103
+ let entities = [];
1104
+ entities = doc.tokens()
1105
+ .filter((t) => t.out(nlp.its.pos) === 'PROPN')
1106
+ .out(nlp.its.normal, nlp.as.freqTable)
1107
+ .map((e) => e[0])
1108
+ .slice(0, 3); // Top 3 Entities
1109
+ // If no entities, try Nouns
1110
+ if (entities.length === 0) {
1111
+ const nouns = doc.tokens()
1112
+ .filter((t) => t.out(nlp.its.pos) === 'NOUN')
1113
+ .out(nlp.its.normal, nlp.as.freqTable)
1114
+ .map((e) => e[0])
1115
+ .slice(0, 3);
1116
+ entities.push(...nouns);
1117
+ }
1118
+ splitQueries = entities;
1119
+ }
1120
+ if (splitQueries.length === 0) {
1121
+ // No entities to split on, return what we have
1122
+ return { ...initial, strategy: 'shallow', splitQueries: [] };
1123
+ }
1124
+ console.log(`[SmartSearch] Split Entities/Chunks: ${JSON.stringify(splitQueries)}`);
1125
+ // 3. Sequential Execution
1126
+ // Run each split sub-query one at a time to prevent concurrent heap exhaustion.
1127
+ // Parallel Promise.all with max-recall budgets multiplies memory by N sub-queries.
1128
+ const budgetPerQuery = useMaxRecall ? maxChars : Math.floor(maxChars / splitQueries.length);
1129
+ const parallelResults = [];
1130
+ for (const entity of splitQueries) {
1131
+ parallelResults.push(await executeSearch(entity, buckets, budgetPerQuery, provenance, tags, undefined, useMaxRecall, userContext));
1132
+ }
1133
+ // 4. Merge & Deduplicate
1134
+ const mergedMap = new Map();
1135
+ // Add initial results first
1136
+ initial.results.forEach(r => mergedMap.set(r.id, r));
1137
+ // Add split results
1138
+ parallelResults.forEach((res) => {
1139
+ res.results.forEach(r => {
1140
+ if (!mergedMap.has(r.id)) {
1141
+ // Boost score slightly for multi-path discovery?
1142
+ // Or keep as is.
1143
+ mergedMap.set(r.id, r);
1144
+ }
1145
+ });
1146
+ });
1147
+ const mergedResults = Array.from(mergedMap.values());
1148
+ console.log(`[SmartSearch] Merged Total: ${mergedResults.length} atoms.`);
1149
+ // 4.5. Context Inflation — Expand each atom with surrounding context (n-1, n+1)
1150
+ // For max-recall searches, read full context from disk to fill the budget
1151
+ if (useMaxRecall && mergedResults.length > 0) {
1152
+ // Calculate per-atom budget to fill ~90% of total budget
1153
+ const budgetPerAtom = Math.floor(maxChars * 0.9 / mergedResults.length);
1154
+ console.log(`[SmartSearch] Inflating ${mergedResults.length} atoms with ${budgetPerAtom} chars each (total budget: ${maxChars})...`);
1155
+ const inflatedResults = await ContextInflator.inflate(mergedResults, maxChars, budgetPerAtom // Dynamic radius based on available budget
1156
+ );
1157
+ // Replace merged results with inflated versions
1158
+ mergedResults.length = 0;
1159
+ mergedResults.push(...inflatedResults);
1160
+ const avgChars = Math.round(inflatedResults.reduce((sum, a) => sum + a.content.length, 0) / inflatedResults.length);
1161
+ console.log(`[SmartSearch] Inflation complete: ${inflatedResults.length} atoms with avg ${avgChars} chars each`);
1162
+ }
1163
+ // 5. Re-Format using GCP (Standard 086)
1164
+ const finalUserContext = {
1165
+ name: userContext?.name || 'User',
1166
+ current_state: userContext?.current_state || 'active'
1167
+ };
1168
+ const serializedContext = assembleAndSerialize({
1169
+ user: finalUserContext,
1170
+ query: query,
1171
+ keyTerms: splitQueries,
1172
+ scopeTags: tags,
1173
+ anchors: mergedResults, // Treat all merged results as anchors for now in this aggregate view
1174
+ walkerResults: [],
1175
+ charBudget: maxChars * 1.5
1176
+ });
1177
+ return {
1178
+ context: serializedContext,
1179
+ results: mergedResults,
1180
+ toAgentString: () => serializedContext,
1181
+ strategy: 'split_merge',
1182
+ splitQueries: splitQueries,
1183
+ metadata: { strategy: 'split_merge' }
1184
+ };
1185
+ }
1186
+ /**
1187
+ * Cluster SearchResults into KnowledgeClusters for high-density JSON.
1188
+ * Groups by source file and sorts by chronological timestamp.
1189
+ */
1190
+ export function clusterMolecules(results) {
1191
+ const bySource = new Map();
1192
+ for (const res of results) {
1193
+ const source = res.source || 'unknown';
1194
+ if (!bySource.has(source))
1195
+ bySource.set(source, []);
1196
+ bySource.get(source).push(res);
1197
+ }
1198
+ const clusters = [];
1199
+ for (const [source, mols] of bySource) {
1200
+ // Sort chronologically
1201
+ mols.sort((a, b) => a.timestamp - b.timestamp);
1202
+ let currentGroup = [];
1203
+ for (let i = 0; i < mols.length; i++) {
1204
+ if (i === 0) {
1205
+ currentGroup.push(mols[i]);
1206
+ }
1207
+ else {
1208
+ const gapMs = Math.abs(mols[i].timestamp - mols[i - 1].timestamp);
1209
+ // If > 1 hour gap, split cluster
1210
+ if (gapMs > 60 * 60 * 1000) {
1211
+ clusters.push(createCluster(currentGroup, source));
1212
+ currentGroup = [mols[i]];
1213
+ }
1214
+ else {
1215
+ currentGroup.push(mols[i]);
1216
+ }
1217
+ }
1218
+ }
1219
+ if (currentGroup.length > 0) {
1220
+ clusters.push(createCluster(currentGroup, source));
1221
+ }
1222
+ }
1223
+ return clusters;
1224
+ }
1225
+ function createCluster(mols, source) {
1226
+ const startTs = new Date(mols[0].timestamp).toISOString();
1227
+ const endTs = new Date(mols[mols.length - 1].timestamp).toISOString();
1228
+ // Topic extraction based on tag frequency
1229
+ const tagCounts = new Map();
1230
+ mols.forEach(m => {
1231
+ (m.tags || []).forEach(t => tagCounts.set(t, (tagCounts.get(t) || 0) + 1));
1232
+ });
1233
+ const topTags = Array.from(tagCounts.entries())
1234
+ .sort((a, b) => b[1] - a[1])
1235
+ .slice(0, 3)
1236
+ .map(e => e[0]);
1237
+ const topic = topTags.join(' ');
1238
+ // Transform SearchResult to KnowledgeMolecule
1239
+ const mappedMolecules = mols.map(m => {
1240
+ const people = [];
1241
+ const concepts = [];
1242
+ const projects = [];
1243
+ if (m.tags) {
1244
+ m.tags.forEach(t => {
1245
+ const lower = t.toLowerCase();
1246
+ if (lower.includes('rob') || lower.includes('coda') || lower.includes('oliver')) {
1247
+ people.push(t);
1248
+ }
1249
+ else if (lower.includes('agent') || lower.includes('engine') || lower.includes('project') || lower.includes('anchor')) {
1250
+ projects.push(t);
1251
+ }
1252
+ else if (t.startsWith('#')) {
1253
+ concepts.push(t);
1254
+ }
1255
+ });
1256
+ }
1257
+ return {
1258
+ id: m.id,
1259
+ timestamp: new Date(m.timestamp).toISOString(),
1260
+ speaker: m.provenance || 'unknown',
1261
+ tags: m.tags || [],
1262
+ entities: {
1263
+ people,
1264
+ concepts,
1265
+ projects
1266
+ },
1267
+ content: m.content || '',
1268
+ byte_range: {
1269
+ start: m.start_byte || 0,
1270
+ end: m.end_byte || 0,
1271
+ source: m.source || 'unknown'
1272
+ }
1273
+ };
1274
+ });
1275
+ const safeId = startTs.replace(/[^0-9]/g, '');
1276
+ const basename = source.split(/[/\\]/).pop() || 'unknown';
1277
+ const clusterId = `cluster_${basename}_${safeId}`;
1278
+ return {
1279
+ id: clusterId,
1280
+ start_time: startTs,
1281
+ end_time: endTs,
1282
+ topic: topic,
1283
+ molecules: mappedMolecules
1284
+ };
1285
+ }
1286
+ //# sourceMappingURL=search.js.map