@rbalchii/anchor-engine 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. package/LICENSE +609 -0
  2. package/README.md +317 -0
  3. package/anchor.bat +5 -0
  4. package/docs/API.md +314 -0
  5. package/docs/DEPLOYMENT.md +448 -0
  6. package/docs/INDEX.md +226 -0
  7. package/docs/STAR_Whitepaper_Executive.md +216 -0
  8. package/docs/TROUBLESHOOTING.md +535 -0
  9. package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
  10. package/docs/archive/adoption-guide.md +264 -0
  11. package/docs/archive/adoption-preparation.md +179 -0
  12. package/docs/archive/agent-harness-integration.md +227 -0
  13. package/docs/archive/api-reference.md +106 -0
  14. package/docs/archive/api_flows_diagram.md +118 -0
  15. package/docs/archive/architecture.md +410 -0
  16. package/docs/archive/architecture_diagram.md +174 -0
  17. package/docs/archive/broader-adoption-preparation.md +175 -0
  18. package/docs/archive/browser-paradigm-architecture.md +163 -0
  19. package/docs/archive/chat-integration.md +124 -0
  20. package/docs/archive/community-adoption-materials.md +103 -0
  21. package/docs/archive/community-adoption.md +147 -0
  22. package/docs/archive/comparison-with-siloed-solutions.md +192 -0
  23. package/docs/archive/comprehensive-docs.md +156 -0
  24. package/docs/archive/data_flow_diagram.md +251 -0
  25. package/docs/archive/enhancement-implementation-summary.md +146 -0
  26. package/docs/archive/evolution-summary.md +141 -0
  27. package/docs/archive/ingestion_pipeline_diagram.md +198 -0
  28. package/docs/archive/native-module-profiling-results.md +135 -0
  29. package/docs/archive/positioning-document.md +158 -0
  30. package/docs/archive/positioning.md +175 -0
  31. package/docs/archive/query-builder-documentation.md +218 -0
  32. package/docs/archive/quick-reference.md +40 -0
  33. package/docs/archive/quickstart.md +63 -0
  34. package/docs/archive/relationship-narrative-discovery.md +141 -0
  35. package/docs/archive/search-logic-improvement-plan.md +336 -0
  36. package/docs/archive/search_architecture_diagram.md +212 -0
  37. package/docs/archive/semantic-architecture-guide.md +97 -0
  38. package/docs/archive/sequence-diagrams.md +128 -0
  39. package/docs/archive/system_components_diagram.md +296 -0
  40. package/docs/archive/test-framework-integration.md +109 -0
  41. package/docs/archive/testing-framework-documentation.md +397 -0
  42. package/docs/archive/testing-framework-summary.md +121 -0
  43. package/docs/archive/testing-framework.md +377 -0
  44. package/docs/archive/ui-architecture.md +75 -0
  45. package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
  46. package/docs/arxiv/RELATED_WORK.tex +39 -0
  47. package/docs/arxiv/compile.bat +48 -0
  48. package/docs/arxiv/joss_response.md +33 -0
  49. package/docs/arxiv/prepare-submission.bat +46 -0
  50. package/docs/arxiv/review.md +128 -0
  51. package/docs/arxiv/star-whitepaper.tex +657 -0
  52. package/docs/code-patterns.md +289 -0
  53. package/docs/whitepaper.md +445 -0
  54. package/engine/dist/agent/runtime.d.ts +41 -0
  55. package/engine/dist/agent/runtime.d.ts.map +1 -0
  56. package/engine/dist/agent/runtime.js +73 -0
  57. package/engine/dist/agent/runtime.js.map +1 -0
  58. package/engine/dist/commands/audit-tags.d.ts +14 -0
  59. package/engine/dist/commands/audit-tags.d.ts.map +1 -0
  60. package/engine/dist/commands/audit-tags.js +180 -0
  61. package/engine/dist/commands/audit-tags.js.map +1 -0
  62. package/engine/dist/commands/distill.d.ts +19 -0
  63. package/engine/dist/commands/distill.d.ts.map +1 -0
  64. package/engine/dist/commands/distill.js +114 -0
  65. package/engine/dist/commands/distill.js.map +1 -0
  66. package/engine/dist/commands/generate-synonyms.d.ts +14 -0
  67. package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
  68. package/engine/dist/commands/generate-synonyms.js +91 -0
  69. package/engine/dist/commands/generate-synonyms.js.map +1 -0
  70. package/engine/dist/config/index.d.ts +115 -0
  71. package/engine/dist/config/index.d.ts.map +1 -0
  72. package/engine/dist/config/index.js +326 -0
  73. package/engine/dist/config/index.js.map +1 -0
  74. package/engine/dist/config/max-recall-config.d.ts +102 -0
  75. package/engine/dist/config/max-recall-config.d.ts.map +1 -0
  76. package/engine/dist/config/max-recall-config.js +102 -0
  77. package/engine/dist/config/max-recall-config.js.map +1 -0
  78. package/engine/dist/config/paths.d.ts +40 -0
  79. package/engine/dist/config/paths.d.ts.map +1 -0
  80. package/engine/dist/config/paths.js +49 -0
  81. package/engine/dist/config/paths.js.map +1 -0
  82. package/engine/dist/core/batch.d.ts +19 -0
  83. package/engine/dist/core/batch.d.ts.map +1 -0
  84. package/engine/dist/core/batch.js +37 -0
  85. package/engine/dist/core/batch.js.map +1 -0
  86. package/engine/dist/core/db.d.ts +58 -0
  87. package/engine/dist/core/db.d.ts.map +1 -0
  88. package/engine/dist/core/db.js +563 -0
  89. package/engine/dist/core/db.js.map +1 -0
  90. package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
  91. package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
  92. package/engine/dist/core/inference/ChatWorker.js +28 -0
  93. package/engine/dist/core/inference/ChatWorker.js.map +1 -0
  94. package/engine/dist/core/inference/context_manager.d.ts +49 -0
  95. package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
  96. package/engine/dist/core/inference/context_manager.js +199 -0
  97. package/engine/dist/core/inference/context_manager.js.map +1 -0
  98. package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
  99. package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
  100. package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
  101. package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
  102. package/engine/dist/core/vector.d.ts +40 -0
  103. package/engine/dist/core/vector.d.ts.map +1 -0
  104. package/engine/dist/core/vector.js +167 -0
  105. package/engine/dist/core/vector.js.map +1 -0
  106. package/engine/dist/index.d.ts +4 -0
  107. package/engine/dist/index.d.ts.map +1 -0
  108. package/engine/dist/index.js +400 -0
  109. package/engine/dist/index.js.map +1 -0
  110. package/engine/dist/middleware/auth.d.ts +14 -0
  111. package/engine/dist/middleware/auth.d.ts.map +1 -0
  112. package/engine/dist/middleware/auth.js +44 -0
  113. package/engine/dist/middleware/auth.js.map +1 -0
  114. package/engine/dist/middleware/request-tracing.d.ts +29 -0
  115. package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
  116. package/engine/dist/middleware/request-tracing.js +115 -0
  117. package/engine/dist/middleware/request-tracing.js.map +1 -0
  118. package/engine/dist/middleware/validate.d.ts +30 -0
  119. package/engine/dist/middleware/validate.d.ts.map +1 -0
  120. package/engine/dist/middleware/validate.js +117 -0
  121. package/engine/dist/middleware/validate.js.map +1 -0
  122. package/engine/dist/native/index.d.ts +106 -0
  123. package/engine/dist/native/index.d.ts.map +1 -0
  124. package/engine/dist/native/index.js +230 -0
  125. package/engine/dist/native/index.js.map +1 -0
  126. package/engine/dist/native/types.d.ts +45 -0
  127. package/engine/dist/native/types.d.ts.map +1 -0
  128. package/engine/dist/native/types.js +6 -0
  129. package/engine/dist/native/types.js.map +1 -0
  130. package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
  131. package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
  132. package/engine/dist/profiling/atomization-profiling.js +108 -0
  133. package/engine/dist/profiling/atomization-profiling.js.map +1 -0
  134. package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
  135. package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
  136. package/engine/dist/profiling/bottleneck-identification.js +249 -0
  137. package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
  138. package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
  139. package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
  140. package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
  141. package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
  142. package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
  143. package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
  144. package/engine/dist/profiling/simhash-profiling.js +168 -0
  145. package/engine/dist/profiling/simhash-profiling.js.map +1 -0
  146. package/engine/dist/routes/api.d.ts +9 -0
  147. package/engine/dist/routes/api.d.ts.map +1 -0
  148. package/engine/dist/routes/api.js +37 -0
  149. package/engine/dist/routes/api.js.map +1 -0
  150. package/engine/dist/routes/enhanced-api.d.ts +9 -0
  151. package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
  152. package/engine/dist/routes/enhanced-api.js +139 -0
  153. package/engine/dist/routes/enhanced-api.js.map +1 -0
  154. package/engine/dist/routes/health.d.ts +8 -0
  155. package/engine/dist/routes/health.d.ts.map +1 -0
  156. package/engine/dist/routes/health.js +89 -0
  157. package/engine/dist/routes/health.js.map +1 -0
  158. package/engine/dist/routes/monitoring.d.ts +8 -0
  159. package/engine/dist/routes/monitoring.d.ts.map +1 -0
  160. package/engine/dist/routes/monitoring.js +509 -0
  161. package/engine/dist/routes/monitoring.js.map +1 -0
  162. package/engine/dist/routes/v1/admin.d.ts +3 -0
  163. package/engine/dist/routes/v1/admin.d.ts.map +1 -0
  164. package/engine/dist/routes/v1/admin.js +261 -0
  165. package/engine/dist/routes/v1/admin.js.map +1 -0
  166. package/engine/dist/routes/v1/atoms.d.ts +3 -0
  167. package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
  168. package/engine/dist/routes/v1/atoms.js +172 -0
  169. package/engine/dist/routes/v1/atoms.js.map +1 -0
  170. package/engine/dist/routes/v1/backup.d.ts +3 -0
  171. package/engine/dist/routes/v1/backup.d.ts.map +1 -0
  172. package/engine/dist/routes/v1/backup.js +100 -0
  173. package/engine/dist/routes/v1/backup.js.map +1 -0
  174. package/engine/dist/routes/v1/git.d.ts +3 -0
  175. package/engine/dist/routes/v1/git.d.ts.map +1 -0
  176. package/engine/dist/routes/v1/git.js +316 -0
  177. package/engine/dist/routes/v1/git.js.map +1 -0
  178. package/engine/dist/routes/v1/ingest.d.ts +3 -0
  179. package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
  180. package/engine/dist/routes/v1/ingest.js +66 -0
  181. package/engine/dist/routes/v1/ingest.js.map +1 -0
  182. package/engine/dist/routes/v1/memory.d.ts +14 -0
  183. package/engine/dist/routes/v1/memory.d.ts.map +1 -0
  184. package/engine/dist/routes/v1/memory.js +87 -0
  185. package/engine/dist/routes/v1/memory.js.map +1 -0
  186. package/engine/dist/routes/v1/research.d.ts +3 -0
  187. package/engine/dist/routes/v1/research.d.ts.map +1 -0
  188. package/engine/dist/routes/v1/research.js +109 -0
  189. package/engine/dist/routes/v1/research.js.map +1 -0
  190. package/engine/dist/routes/v1/search.d.ts +3 -0
  191. package/engine/dist/routes/v1/search.d.ts.map +1 -0
  192. package/engine/dist/routes/v1/search.js +180 -0
  193. package/engine/dist/routes/v1/search.js.map +1 -0
  194. package/engine/dist/routes/v1/settings.d.ts +8 -0
  195. package/engine/dist/routes/v1/settings.d.ts.map +1 -0
  196. package/engine/dist/routes/v1/settings.js +211 -0
  197. package/engine/dist/routes/v1/settings.js.map +1 -0
  198. package/engine/dist/routes/v1/system.d.ts +3 -0
  199. package/engine/dist/routes/v1/system.d.ts.map +1 -0
  200. package/engine/dist/routes/v1/system.js +326 -0
  201. package/engine/dist/routes/v1/system.js.map +1 -0
  202. package/engine/dist/routes/v1/tags.d.ts +3 -0
  203. package/engine/dist/routes/v1/tags.d.ts.map +1 -0
  204. package/engine/dist/routes/v1/tags.js +102 -0
  205. package/engine/dist/routes/v1/tags.js.map +1 -0
  206. package/engine/dist/server-8080.d.ts +2 -0
  207. package/engine/dist/server-8080.d.ts.map +1 -0
  208. package/engine/dist/server-8080.js +74 -0
  209. package/engine/dist/server-8080.js.map +1 -0
  210. package/engine/dist/services/backup/backup-restore.d.ts +37 -0
  211. package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
  212. package/engine/dist/services/backup/backup-restore.js +385 -0
  213. package/engine/dist/services/backup/backup-restore.js.map +1 -0
  214. package/engine/dist/services/backup/backup.d.ts +14 -0
  215. package/engine/dist/services/backup/backup.d.ts.map +1 -0
  216. package/engine/dist/services/backup/backup.js +442 -0
  217. package/engine/dist/services/backup/backup.js.map +1 -0
  218. package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
  219. package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
  220. package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
  221. package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
  222. package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
  223. package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
  224. package/engine/dist/services/distillation/radial-distiller.js +394 -0
  225. package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
  226. package/engine/dist/services/health-check-enhanced.d.ts +89 -0
  227. package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
  228. package/engine/dist/services/health-check-enhanced.js +417 -0
  229. package/engine/dist/services/health-check-enhanced.js.map +1 -0
  230. package/engine/dist/services/idle-manager.d.ts +56 -0
  231. package/engine/dist/services/idle-manager.d.ts.map +1 -0
  232. package/engine/dist/services/idle-manager.js +210 -0
  233. package/engine/dist/services/idle-manager.js.map +1 -0
  234. package/engine/dist/services/inference/inference-service.d.ts +27 -0
  235. package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
  236. package/engine/dist/services/inference/inference-service.js +89 -0
  237. package/engine/dist/services/inference/inference-service.js.map +1 -0
  238. package/engine/dist/services/inference/inference.d.ts +59 -0
  239. package/engine/dist/services/inference/inference.d.ts.map +1 -0
  240. package/engine/dist/services/inference/inference.js +131 -0
  241. package/engine/dist/services/inference/inference.js.map +1 -0
  242. package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
  243. package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
  244. package/engine/dist/services/ingest/atomizer-service.js +982 -0
  245. package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
  246. package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
  247. package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
  248. package/engine/dist/services/ingest/content-cleaner.js +166 -0
  249. package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
  250. package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
  251. package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
  252. package/engine/dist/services/ingest/github-ingest-service.js +537 -0
  253. package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
  254. package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
  255. package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
  256. package/engine/dist/services/ingest/ingest-atomic.js +437 -0
  257. package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
  258. package/engine/dist/services/ingest/ingest.d.ts +50 -0
  259. package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
  260. package/engine/dist/services/ingest/ingest.js +230 -0
  261. package/engine/dist/services/ingest/ingest.js.map +1 -0
  262. package/engine/dist/services/ingest/watchdog.d.ts +31 -0
  263. package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
  264. package/engine/dist/services/ingest/watchdog.js +400 -0
  265. package/engine/dist/services/ingest/watchdog.js.map +1 -0
  266. package/engine/dist/services/llm/context.d.ts +6 -0
  267. package/engine/dist/services/llm/context.d.ts.map +1 -0
  268. package/engine/dist/services/llm/context.js +80 -0
  269. package/engine/dist/services/llm/context.js.map +1 -0
  270. package/engine/dist/services/llm/provider.d.ts +23 -0
  271. package/engine/dist/services/llm/provider.d.ts.map +1 -0
  272. package/engine/dist/services/llm/provider.js +338 -0
  273. package/engine/dist/services/llm/provider.js.map +1 -0
  274. package/engine/dist/services/llm/reader.d.ts +12 -0
  275. package/engine/dist/services/llm/reader.d.ts.map +1 -0
  276. package/engine/dist/services/llm/reader.js +40 -0
  277. package/engine/dist/services/llm/reader.js.map +1 -0
  278. package/engine/dist/services/mirror/mirror.d.ts +28 -0
  279. package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
  280. package/engine/dist/services/mirror/mirror.js +208 -0
  281. package/engine/dist/services/mirror/mirror.js.map +1 -0
  282. package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
  283. package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
  284. package/engine/dist/services/nlp/nlp-service.js +151 -0
  285. package/engine/dist/services/nlp/nlp-service.js.map +1 -0
  286. package/engine/dist/services/nlp/query-parser.d.ts +9 -0
  287. package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
  288. package/engine/dist/services/nlp/query-parser.js +29 -0
  289. package/engine/dist/services/nlp/query-parser.js.map +1 -0
  290. package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
  291. package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
  292. package/engine/dist/services/query-builder/DataFrame.js +263 -0
  293. package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
  294. package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
  295. package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
  296. package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
  297. package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
  298. package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
  299. package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
  300. package/engine/dist/services/query-builder/utils/export.js +130 -0
  301. package/engine/dist/services/query-builder/utils/export.js.map +1 -0
  302. package/engine/dist/services/research/researcher.d.ts +15 -0
  303. package/engine/dist/services/research/researcher.d.ts.map +1 -0
  304. package/engine/dist/services/research/researcher.js +123 -0
  305. package/engine/dist/services/research/researcher.js.map +1 -0
  306. package/engine/dist/services/scribe/scribe.d.ts +43 -0
  307. package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
  308. package/engine/dist/services/scribe/scribe.js +135 -0
  309. package/engine/dist/services/scribe/scribe.js.map +1 -0
  310. package/engine/dist/services/search/bright-nodes.d.ts +41 -0
  311. package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
  312. package/engine/dist/services/search/bright-nodes.js +117 -0
  313. package/engine/dist/services/search/bright-nodes.js.map +1 -0
  314. package/engine/dist/services/search/context-inflator.d.ts +63 -0
  315. package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
  316. package/engine/dist/services/search/context-inflator.js +649 -0
  317. package/engine/dist/services/search/context-inflator.js.map +1 -0
  318. package/engine/dist/services/search/context-manager.d.ts +34 -0
  319. package/engine/dist/services/search/context-manager.d.ts.map +1 -0
  320. package/engine/dist/services/search/context-manager.js +124 -0
  321. package/engine/dist/services/search/context-manager.js.map +1 -0
  322. package/engine/dist/services/search/distributed-query.d.ts +38 -0
  323. package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
  324. package/engine/dist/services/search/distributed-query.js +105 -0
  325. package/engine/dist/services/search/distributed-query.js.map +1 -0
  326. package/engine/dist/services/search/explore.d.ts +73 -0
  327. package/engine/dist/services/search/explore.d.ts.map +1 -0
  328. package/engine/dist/services/search/explore.js +388 -0
  329. package/engine/dist/services/search/explore.js.map +1 -0
  330. package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
  331. package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
  332. package/engine/dist/services/search/graph-context-serializer.js +435 -0
  333. package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
  334. package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
  335. package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
  336. package/engine/dist/services/search/llm-context-formatter.js +394 -0
  337. package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
  338. package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
  339. package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
  340. package/engine/dist/services/search/physics-tag-walker.js +611 -0
  341. package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
  342. package/engine/dist/services/search/query-parser.d.ts +66 -0
  343. package/engine/dist/services/search/query-parser.d.ts.map +1 -0
  344. package/engine/dist/services/search/query-parser.js +346 -0
  345. package/engine/dist/services/search/query-parser.js.map +1 -0
  346. package/engine/dist/services/search/search-utils.d.ts +100 -0
  347. package/engine/dist/services/search/search-utils.d.ts.map +1 -0
  348. package/engine/dist/services/search/search-utils.js +473 -0
  349. package/engine/dist/services/search/search-utils.js.map +1 -0
  350. package/engine/dist/services/search/search.d.ts +116 -0
  351. package/engine/dist/services/search/search.d.ts.map +1 -0
  352. package/engine/dist/services/search/search.js +1286 -0
  353. package/engine/dist/services/search/search.js.map +1 -0
  354. package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
  355. package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
  356. package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
  357. package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
  358. package/engine/dist/services/search/streaming-search.d.ts +51 -0
  359. package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
  360. package/engine/dist/services/search/streaming-search.js +94 -0
  361. package/engine/dist/services/search/streaming-search.js.map +1 -0
  362. package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
  363. package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
  364. package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
  365. package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
  366. package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
  367. package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
  368. package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
  369. package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
  370. package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
  371. package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
  372. package/engine/dist/services/semantic/semantic-search.js +649 -0
  373. package/engine/dist/services/semantic/semantic-search.js.map +1 -0
  374. package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
  375. package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
  376. package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
  377. package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
  378. package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
  379. package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
  380. package/engine/dist/services/semantic/types/semantic.js +7 -0
  381. package/engine/dist/services/semantic/types/semantic.js.map +1 -0
  382. package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
  383. package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
  384. package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
  385. package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
  386. package/engine/dist/services/system-status.d.ts +68 -0
  387. package/engine/dist/services/system-status.d.ts.map +1 -0
  388. package/engine/dist/services/system-status.js +107 -0
  389. package/engine/dist/services/system-status.js.map +1 -0
  390. package/engine/dist/services/tags/discovery.d.ts +16 -0
  391. package/engine/dist/services/tags/discovery.d.ts.map +1 -0
  392. package/engine/dist/services/tags/discovery.js +206 -0
  393. package/engine/dist/services/tags/discovery.js.map +1 -0
  394. package/engine/dist/services/tags/gliner.d.ts +18 -0
  395. package/engine/dist/services/tags/gliner.d.ts.map +1 -0
  396. package/engine/dist/services/tags/gliner.js +119 -0
  397. package/engine/dist/services/tags/gliner.js.map +1 -0
  398. package/engine/dist/services/tags/infector.d.ts +21 -0
  399. package/engine/dist/services/tags/infector.d.ts.map +1 -0
  400. package/engine/dist/services/tags/infector.js +168 -0
  401. package/engine/dist/services/tags/infector.js.map +1 -0
  402. package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
  403. package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
  404. package/engine/dist/services/tags/tag-auditor.js +283 -0
  405. package/engine/dist/services/tags/tag-auditor.js.map +1 -0
  406. package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
  407. package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
  408. package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
  409. package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
  410. package/engine/dist/services/vision/vision_service.d.ts +4 -0
  411. package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
  412. package/engine/dist/services/vision/vision_service.js +197 -0
  413. package/engine/dist/services/vision/vision_service.js.map +1 -0
  414. package/engine/dist/test-framework/core.d.ts +133 -0
  415. package/engine/dist/test-framework/core.d.ts.map +1 -0
  416. package/engine/dist/test-framework/core.js +313 -0
  417. package/engine/dist/test-framework/core.js.map +1 -0
  418. package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
  419. package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
  420. package/engine/dist/test-framework/dataset-runner.js +223 -0
  421. package/engine/dist/test-framework/dataset-runner.js.map +1 -0
  422. package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
  423. package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
  424. package/engine/dist/test-framework/diagnostic-tests.js +283 -0
  425. package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
  426. package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
  427. package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
  428. package/engine/dist/test-framework/performance-regression-tests.js +331 -0
  429. package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
  430. package/engine/dist/types/api.d.ts +53 -0
  431. package/engine/dist/types/api.d.ts.map +1 -0
  432. package/engine/dist/types/api.js +2 -0
  433. package/engine/dist/types/api.js.map +1 -0
  434. package/engine/dist/types/atomic.d.ts +42 -0
  435. package/engine/dist/types/atomic.d.ts.map +1 -0
  436. package/engine/dist/types/atomic.js +10 -0
  437. package/engine/dist/types/atomic.js.map +1 -0
  438. package/engine/dist/types/context-protocol.d.ts +137 -0
  439. package/engine/dist/types/context-protocol.d.ts.map +1 -0
  440. package/engine/dist/types/context-protocol.js +28 -0
  441. package/engine/dist/types/context-protocol.js.map +1 -0
  442. package/engine/dist/types/context.d.ts +2 -0
  443. package/engine/dist/types/context.d.ts.map +1 -0
  444. package/engine/dist/types/context.js +2 -0
  445. package/engine/dist/types/context.js.map +1 -0
  446. package/engine/dist/types/index.d.ts +20 -0
  447. package/engine/dist/types/index.d.ts.map +1 -0
  448. package/engine/dist/types/index.js +18 -0
  449. package/engine/dist/types/index.js.map +1 -0
  450. package/engine/dist/types/search.d.ts +31 -0
  451. package/engine/dist/types/search.d.ts.map +1 -0
  452. package/engine/dist/types/search.js +2 -0
  453. package/engine/dist/types/search.js.map +1 -0
  454. package/engine/dist/types/taxonomy.d.ts +137 -0
  455. package/engine/dist/types/taxonomy.d.ts.map +1 -0
  456. package/engine/dist/types/taxonomy.js +138 -0
  457. package/engine/dist/types/taxonomy.js.map +1 -0
  458. package/engine/dist/types/taxonomy.simple.d.ts +131 -0
  459. package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
  460. package/engine/dist/types/taxonomy.simple.js +132 -0
  461. package/engine/dist/types/taxonomy.simple.js.map +1 -0
  462. package/engine/dist/types/tool-call.d.ts +16 -0
  463. package/engine/dist/types/tool-call.d.ts.map +1 -0
  464. package/engine/dist/types/tool-call.js +6 -0
  465. package/engine/dist/types/tool-call.js.map +1 -0
  466. package/engine/dist/types/trace.d.ts +25 -0
  467. package/engine/dist/types/trace.d.ts.map +1 -0
  468. package/engine/dist/types/trace.js +5 -0
  469. package/engine/dist/types/trace.js.map +1 -0
  470. package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
  471. package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
  472. package/engine/dist/utils/adaptive-concurrency.js +266 -0
  473. package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
  474. package/engine/dist/utils/date_extractor.d.ts +2 -0
  475. package/engine/dist/utils/date_extractor.d.ts.map +1 -0
  476. package/engine/dist/utils/date_extractor.js +32 -0
  477. package/engine/dist/utils/date_extractor.js.map +1 -0
  478. package/engine/dist/utils/native-module-manager.d.ts +48 -0
  479. package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
  480. package/engine/dist/utils/native-module-manager.js +265 -0
  481. package/engine/dist/utils/native-module-manager.js.map +1 -0
  482. package/engine/dist/utils/native-module-profiler.d.ts +66 -0
  483. package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
  484. package/engine/dist/utils/native-module-profiler.js +182 -0
  485. package/engine/dist/utils/native-module-profiler.js.map +1 -0
  486. package/engine/dist/utils/path-manager.d.ts +59 -0
  487. package/engine/dist/utils/path-manager.d.ts.map +1 -0
  488. package/engine/dist/utils/path-manager.js +154 -0
  489. package/engine/dist/utils/path-manager.js.map +1 -0
  490. package/engine/dist/utils/performance-monitor.d.ts +92 -0
  491. package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
  492. package/engine/dist/utils/performance-monitor.js +221 -0
  493. package/engine/dist/utils/performance-monitor.js.map +1 -0
  494. package/engine/dist/utils/process-manager.d.ts +18 -0
  495. package/engine/dist/utils/process-manager.d.ts.map +1 -0
  496. package/engine/dist/utils/process-manager.js +100 -0
  497. package/engine/dist/utils/process-manager.js.map +1 -0
  498. package/engine/dist/utils/request-tracer.d.ts +131 -0
  499. package/engine/dist/utils/request-tracer.d.ts.map +1 -0
  500. package/engine/dist/utils/request-tracer.js +414 -0
  501. package/engine/dist/utils/request-tracer.js.map +1 -0
  502. package/engine/dist/utils/resource-manager.d.ts +108 -0
  503. package/engine/dist/utils/resource-manager.d.ts.map +1 -0
  504. package/engine/dist/utils/resource-manager.js +235 -0
  505. package/engine/dist/utils/resource-manager.js.map +1 -0
  506. package/engine/dist/utils/safe-dns.d.ts +14 -0
  507. package/engine/dist/utils/safe-dns.d.ts.map +1 -0
  508. package/engine/dist/utils/safe-dns.js +105 -0
  509. package/engine/dist/utils/safe-dns.js.map +1 -0
  510. package/engine/dist/utils/structured-logger.d.ts +124 -0
  511. package/engine/dist/utils/structured-logger.d.ts.map +1 -0
  512. package/engine/dist/utils/structured-logger.js +332 -0
  513. package/engine/dist/utils/structured-logger.js.map +1 -0
  514. package/engine/dist/utils/tag-cleanup.d.ts +11 -0
  515. package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
  516. package/engine/dist/utils/tag-cleanup.js +111 -0
  517. package/engine/dist/utils/tag-cleanup.js.map +1 -0
  518. package/engine/dist/utils/tag-filter.d.ts +19 -0
  519. package/engine/dist/utils/tag-filter.d.ts.map +1 -0
  520. package/engine/dist/utils/tag-filter.js +147 -0
  521. package/engine/dist/utils/tag-filter.js.map +1 -0
  522. package/engine/dist/utils/tag-modulation.d.ts +80 -0
  523. package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
  524. package/engine/dist/utils/tag-modulation.js +284 -0
  525. package/engine/dist/utils/tag-modulation.js.map +1 -0
  526. package/engine/dist/utils/timer.d.ts +40 -0
  527. package/engine/dist/utils/timer.d.ts.map +1 -0
  528. package/engine/dist/utils/timer.js +76 -0
  529. package/engine/dist/utils/timer.js.map +1 -0
  530. package/engine/dist/utils/token-utils.d.ts +19 -0
  531. package/engine/dist/utils/token-utils.d.ts.map +1 -0
  532. package/engine/dist/utils/token-utils.js +71 -0
  533. package/engine/dist/utils/token-utils.js.map +1 -0
  534. package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
  535. package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
  536. package/engine/dist/utils/wasm-module-loader.js +136 -0
  537. package/engine/dist/utils/wasm-module-loader.js.map +1 -0
  538. package/engine/package.json +105 -0
  539. package/package.json +106 -0
@@ -0,0 +1,649 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import { db } from '../../core/db.js';
4
+ import { getMirrorPath } from '../mirror/mirror.js';
5
+ import { NOTEBOOK_DIR } from '../../config/paths.js';
6
+ import { processWithAdaptiveConcurrency, getOptimalBatchSize } from '../../utils/adaptive-concurrency.js';
7
+ export class ContextInflator {
8
+ /**
9
+ * Inflate search results into expanded Context Windows.
10
+ *
11
+ * Architecture: Atoms are POINTERS — the DB stores entity labels + byte coordinates.
12
+ * Content lives in the original files on disk (mirrored). This method:
13
+ * 1. Skips results already inflated by inflateFromAtomPositions (read from disk)
14
+ * 2. For results with compound coordinates: resolves file path → reads from disk with radial expansion
15
+ * 3. Falls back to compound_body in DB only if the disk file doesn't exist
16
+ *
17
+ * Progressive Inflation: Top results get larger radius for better budget allocation.
18
+ *
19
+ * The DB is a lightweight routing layer. Actual content comes from the filesystem.
20
+ */
21
+ static async inflate(results, totalBudget, radius = 0) {
22
+ if (results.length === 0)
23
+ return [];
24
+ // 0. Pre-sort results by score to ensure top items get priority
25
+ results.sort((a, b) => (b.score || 0) - (a.score || 0));
26
+ // Dynamic radius: if caller didn't specify, scale based on budget and result count
27
+ // Target: fill the budget evenly across results
28
+ let baseRadius = radius;
29
+ if (baseRadius <= 0 && totalBudget && results.length > 0) {
30
+ const targetWindowSize = Math.floor(totalBudget / Math.min(results.length, 10));
31
+ baseRadius = Math.max(200, Math.floor(targetWindowSize / 2));
32
+ // Cap to prevent massive reads
33
+ baseRadius = Math.min(baseRadius, 5000);
34
+ }
35
+ // Absolute minimum radius so we don't get zero-width slices
36
+ baseRadius = Math.max(baseRadius, 200);
37
+ // Cache: compound_id → Promise<{ filePath, provenance } | null>
38
+ // Use promises to deduplicate concurrent requests for the same compound
39
+ const compoundPathCache = new Map();
40
+ const processedResults = [];
41
+ let inflatedFromDisk = 0;
42
+ let inflatedFromDb = 0;
43
+ let skippedAlready = 0;
44
+ let skippedNoCoords = 0;
45
+ // Progressive inflation: allocate more budget to top results
46
+ // Top 10% get 2x radius, next 40% get 1.5x, rest get 1x
47
+ const topTenPercent = Math.max(1, Math.floor(results.length * 0.1));
48
+ const nextFortyPercent = Math.floor(results.length * 0.4);
49
+ // Process in batches to limit concurrency (file handles/DB connections)
50
+ // Use adaptive batch size based on available memory (Standard 132)
51
+ const BATCH_SIZE = getOptimalBatchSize();
52
+ for (let i = 0; i < results.length; i += BATCH_SIZE) {
53
+ const batch = results.slice(i, i + BATCH_SIZE);
54
+ const batchResults = await processWithAdaptiveConcurrency(batch, async (res, indexInBatch) => {
55
+ const globalIndex = i + indexInBatch;
56
+ // Progressive radius allocation based on rank
57
+ let radiusMultiplier = 1.0;
58
+ if (globalIndex < topTenPercent) {
59
+ radiusMultiplier = 2.0; // Top 10% get 2x radius
60
+ }
61
+ else if (globalIndex < topTenPercent + nextFortyPercent) {
62
+ radiusMultiplier = 1.5; // Next 40% get 1.5x
63
+ }
64
+ // Rest get 1.0x (base)
65
+ const effectiveRadius = Math.floor(baseRadius * radiusMultiplier);
66
+ // 1. Skip results already inflated from disk (e.g., by inflateFromAtomPositions)
67
+ if (res.is_inflated) {
68
+ skippedAlready++; // Not atomic but JS is single threaded event loop so OK
69
+ return res;
70
+ }
71
+ // 2. Skip if no compound coordinates — use as-is (entity label)
72
+ if (!res.compound_id || res.start_byte === undefined || res.end_byte === undefined) {
73
+ skippedNoCoords++;
74
+ return res;
75
+ }
76
+ try {
77
+ // 3. Try to inflate from DISK (mirrored file)
78
+ const diskContent = await this.inflateFromDisk(res, effectiveRadius, compoundPathCache);
79
+ if (diskContent !== null) {
80
+ inflatedFromDisk++;
81
+ return {
82
+ ...res,
83
+ content: `...${diskContent}...`,
84
+ is_inflated: true
85
+ };
86
+ }
87
+ // 4. Fallback: inflate from compound_body in DB (file may not exist yet)
88
+ const dbContent = await this.inflateFromCompoundBody(res, effectiveRadius);
89
+ if (dbContent !== null) {
90
+ inflatedFromDb++;
91
+ return {
92
+ ...res,
93
+ content: `...${dbContent}...`,
94
+ is_inflated: true
95
+ };
96
+ }
97
+ // 5. Nothing worked — use raw result as-is
98
+ return res;
99
+ }
100
+ catch (e) {
101
+ console.error(`[ContextInflator] Failed to inflate result for ${res.source}`, e);
102
+ return res;
103
+ }
104
+ });
105
+ processedResults.push(...batchResults);
106
+ }
107
+ console.log(`[ContextInflator] inflate(): ${inflatedFromDisk} from disk, ${inflatedFromDb} from DB fallback, ${skippedAlready} already inflated, ${skippedNoCoords} no coordinates. Base Radius: ${baseRadius}`);
108
+ // The processedResults array might not be in original sort order because promises resolve out of order within batch
109
+ // But since we sort results at start, we should re-sort or just assume score is king.
110
+ processedResults.sort((a, b) => (b.score || 0) - (a.score || 0));
111
+ return processedResults;
112
+ }
113
+ /**
114
+ * Helper: Expand logical window to nearest sentence boundary
115
+ */
116
+ static snapToSentenceBoundary(content, targetStart, targetEnd) {
117
+ // We look for sentence terminators: . ! ? followed by space or newline
118
+ // effectively we are operating on a "Chunk" of text that is likely larger than the target window
119
+ // targetStart/End are indices relative to the "content" string provided.
120
+ // 1. Snap Start (Move backwards to find previous sentence end)
121
+ let snappedStart = 0;
122
+ // Search backwards from targetStart for a sentence terminator
123
+ // We want the Start of the *current* sentence, so we look for the *end* of the *previous* sentence
124
+ // validation: ensure we don't go back too far? Content is already a window.
125
+ // Simple heuristic: valid sentence starts after (.!?)\s
126
+ const preceeding = content.substring(0, targetStart);
127
+ const matchStart = preceeding.match(/([.!?]\s|\n\s*\n)(?=[^.!?\n]*$)/);
128
+ if (matchStart && matchStart.index !== undefined) {
129
+ snappedStart = matchStart.index + matchStart[0].length;
130
+ }
131
+ else {
132
+ // If no sentence end found, maybe just snap to first spaces
133
+ const spaceMatch = preceeding.match(/\s(?=[^\s]*$)/);
134
+ if (spaceMatch && spaceMatch.index !== undefined) {
135
+ snappedStart = spaceMatch.index + 1;
136
+ }
137
+ else {
138
+ snappedStart = 0; // consistent with start of string
139
+ }
140
+ }
141
+ // 2. Snap End (Move forwards to find next sentence end)
142
+ let snappedEnd = content.length;
143
+ const succeeding = content.substring(targetEnd);
144
+ // Look for the *first* sentence terminator
145
+ const matchEnd = succeeding.match(/([.!?]\s|\n\s*\n)/);
146
+ if (matchEnd && matchEnd.index !== undefined) {
147
+ snappedEnd = targetEnd + matchEnd.index + 1; // Include the punctuation
148
+ }
149
+ else {
150
+ // Fallback to next space
151
+ const spaceMatch = succeeding.match(/\s/);
152
+ if (spaceMatch && spaceMatch.index !== undefined) {
153
+ snappedEnd = targetEnd + spaceMatch.index;
154
+ }
155
+ }
156
+ return {
157
+ start: snappedStart,
158
+ end: snappedEnd,
159
+ text: content.substring(snappedStart, snappedEnd).trim()
160
+ };
161
+ }
162
+ /**
163
+ * Inflate a single result from the mirrored file on disk.
164
+ * Returns the extracted content string, or null if the file doesn't exist.
165
+ */
166
+ static async inflateFromDisk(res, radius, pathCache) {
167
+ if (!res.compound_id)
168
+ return null;
169
+ // Look up the compound's file path (cached via promise to dedup)
170
+ let pathPromise = pathCache.get(res.compound_id);
171
+ if (!pathPromise) {
172
+ pathPromise = (async () => {
173
+ try {
174
+ const result = await db.run(`SELECT path, provenance FROM compounds WHERE id = $1`, [res.compound_id]);
175
+ if (result.rows && result.rows.length > 0) {
176
+ return { filePath: result.rows[0].path, provenance: result.rows[0].provenance };
177
+ }
178
+ return null;
179
+ }
180
+ catch {
181
+ return null;
182
+ }
183
+ })();
184
+ pathCache.set(res.compound_id, pathPromise);
185
+ }
186
+ const pathInfo = await pathPromise;
187
+ if (!pathInfo)
188
+ return null;
189
+ // Resolve to absolute path: try mirrored file first, then original
190
+ const mirrorPath = getMirrorPath(pathInfo.filePath, pathInfo.provenance);
191
+ let absolutePath = mirrorPath;
192
+ // Using fs.promises to avoid blocking the event loop
193
+ let fileExists = false;
194
+ try {
195
+ await fs.promises.access(mirrorPath, fs.constants.F_OK);
196
+ fileExists = true;
197
+ }
198
+ catch {
199
+ fileExists = false;
200
+ }
201
+ if (!fileExists) {
202
+ absolutePath = path.isAbsolute(pathInfo.filePath)
203
+ ? pathInfo.filePath
204
+ : path.join(NOTEBOOK_DIR, pathInfo.filePath);
205
+ try {
206
+ await fs.promises.access(absolutePath, fs.constants.F_OK);
207
+ fileExists = true;
208
+ }
209
+ catch {
210
+ fileExists = false;
211
+ }
212
+ }
213
+ if (!fileExists)
214
+ return null;
215
+ let fd = null;
216
+ try {
217
+ const stats = await fs.promises.stat(absolutePath);
218
+ const fileSize = stats.size;
219
+ // Over-read by 1000 bytes on each side to find boundaries
220
+ const lookahead = 1000;
221
+ const rawStart = Math.max(0, (res.start_byte ?? 0) - radius - lookahead);
222
+ const rawEnd = Math.min(fileSize, (res.end_byte ?? fileSize) + radius + lookahead);
223
+ const chunkLength = rawEnd - rawStart;
224
+ if (chunkLength <= 0)
225
+ return null;
226
+ const buffer = Buffer.alloc(chunkLength);
227
+ fd = await fs.promises.open(absolutePath, 'r');
228
+ // fs.promises.read returns { bytesRead, buffer }
229
+ await fd.read(buffer, 0, chunkLength, rawStart);
230
+ const rawContent = buffer.toString('utf-8');
231
+ // Calculate where our "Ideal" window sits within this raw buffer
232
+ // ideal window start (relative to buffer) = (res.start - radius) - rawStart
233
+ // But actually we just want to snap around the center roughly?
234
+ // Let's rely on snapToSentenceBoundary relative to the *whole buffer*.
235
+ // We want the text that *contains* the hit (res.start...res.end).
236
+ // Relative offsets of the HIT within the buffer
237
+ const hitStartRel = Math.max(0, (res.start_byte ?? 0) - rawStart);
238
+ const hitEndRel = Math.min(chunkLength, (res.end_byte ?? fileSize) - rawStart);
239
+ // Our "Target" window is the hit +/- radius
240
+ const targetStartRel = Math.max(0, hitStartRel - radius);
241
+ const targetEndRel = Math.min(chunkLength, hitEndRel + radius);
242
+ // Snap!
243
+ const snapped = this.snapToSentenceBoundary(rawContent, targetStartRel, targetEndRel);
244
+ return snapped.text.length > 0 ? snapped.text : null;
245
+ }
246
+ catch {
247
+ return null;
248
+ }
249
+ finally {
250
+ if (fd)
251
+ await fd.close();
252
+ }
253
+ }
254
+ /**
255
+ * Fallback: inflate from compound_body stored in the DB.
256
+ * Used when the disk file doesn't exist (e.g., during initial ingest before mirror).
257
+ */
258
+ static async inflateFromCompoundBody(res, radius) {
259
+ if (!res.compound_id)
260
+ return null;
261
+ try {
262
+ const result = await db.run(`SELECT compound_body FROM compounds WHERE id = $1`, [res.compound_id]);
263
+ if (!result.rows || result.rows.length === 0)
264
+ return null;
265
+ const compoundBody = result.rows[0].compound_body;
266
+ if (!compoundBody)
267
+ return null;
268
+ // Similar logic to inflateFromDisk but with string
269
+ const lookahead = 1000;
270
+ const bodyLen = compoundBody.length; // Approximate byte check? JS strings are UTF16-ish.
271
+ // Assuming 1 char = 1 byte index for simplicity roughly, or we blindly trust indices.
272
+ // Over-read logic
273
+ const rawStart = Math.max(0, (res.start_byte ?? 0) - radius - lookahead);
274
+ const rawEnd = Math.min(bodyLen, (res.end_byte ?? bodyLen) + radius + lookahead);
275
+ const rawChunk = compoundBody.substring(rawStart, rawEnd);
276
+ // Relative offsets
277
+ const hitStartRel = Math.max(0, (res.start_byte ?? 0) - rawStart);
278
+ const hitEndRel = Math.min(rawChunk.length, (res.end_byte ?? bodyLen) - rawStart);
279
+ const targetStartRel = Math.max(0, hitStartRel - radius);
280
+ const targetEndRel = Math.min(rawChunk.length, hitEndRel + radius);
281
+ const snapped = this.snapToSentenceBoundary(rawChunk, targetStartRel, targetEndRel);
282
+ return snapped.text.length > 0 ? snapped.text : null;
283
+ }
284
+ catch {
285
+ return null;
286
+ }
287
+ }
288
+ /**
289
+ * Get atom locations for Elastic Context sizing
290
+ * Returns the raw positions so we can calculate density/hits BEFORE inflating
291
+ */
292
+ static async getAtomLocations(term, limit = 100, options = {}) {
293
+ // Atoms are stored with # prefix, but we might search without
294
+ const termWithHash = term.startsWith('#') ? term : `#${term}`;
295
+ const termWithoutHash = term.startsWith('#') ? term.slice(1) : term;
296
+ let query = `
297
+ SELECT ap.compound_id, ap.byte_offset, c.path, c.timestamp, c.provenance
298
+ FROM atom_positions ap
299
+ JOIN compounds c ON ap.compound_id = c.id
300
+ WHERE
301
+ (LOWER(ap.atom_label) = LOWER($1)
302
+ OR LOWER(ap.atom_label) = LOWER($2)
303
+ OR ap.atom_label ILIKE $3)
304
+ `;
305
+ const params = [termWithHash, termWithoutHash, `${termWithoutHash}%`];
306
+ // Apply Provenance Filter
307
+ if (options.provenance && options.provenance !== 'all') {
308
+ params.push(options.provenance);
309
+ query += ` AND c.provenance = $${params.length}`;
310
+ }
311
+ // Apply Bucket Filter (Check if compound contains ANY atom with the bucket)
312
+ if (options.buckets && options.buckets.length > 0) {
313
+ params.push(options.buckets);
314
+ // We join atoms to check if any atom in this compound has the bucket
315
+ // optimize: use EXISTS instead of joining widely
316
+ query += ` AND EXISTS (
317
+ SELECT 1 FROM atoms a
318
+ WHERE a.compound_id = c.id
319
+ AND EXISTS (
320
+ SELECT 1 FROM unnest(a.buckets) as b WHERE b = ANY($${params.length})
321
+ )
322
+ )`;
323
+ }
324
+ query += ` ORDER BY c.timestamp DESC LIMIT $${params.length + 1}`;
325
+ params.push(limit);
326
+ try {
327
+ const result = await db.run(query, params);
328
+ if (!result.rows)
329
+ return [];
330
+ return result.rows.map((row) => ({
331
+ compoundId: row.compound_id,
332
+ byteOffset: row.byte_offset,
333
+ filePath: row.path,
334
+ timestamp: row.timestamp,
335
+ provenance: row.provenance
336
+ }));
337
+ }
338
+ catch (e) {
339
+ console.error(`[ContextInflator] Check locations failed for ${term}`, e);
340
+ return [];
341
+ }
342
+ }
343
+ /**
344
+ * Radial Inflation from Atom Positions (Lazy Molecule Architecture)
345
+ * Searches atom_positions for keyword occurrences and expands radially
346
+ *
347
+ * @param searchTerm - The atom/keyword to search for
348
+ * @param radius - How many bytes to expand in each direction (default 500)
349
+ * @param maxResults - Maximum results to return
350
+ */
351
+ static async inflateFromAtomPositions(searchTerm, radius = 500, maxResults = 20, maxWindowSize = radius * 3, // Default cap if not provided
352
+ options = {}) {
353
+ const results = [];
354
+ try {
355
+ // Find all positions where this atom appears
356
+ // Atoms are stored with # prefix (e.g. "#Rob") but search terms come without
357
+ // So we search for both formats: "#Rob" and "Rob"
358
+ const termWithHash = searchTerm.startsWith('#') ? searchTerm : `#${searchTerm}`;
359
+ const termWithoutHash = searchTerm.startsWith('#') ? searchTerm.slice(1) : searchTerm;
360
+ let query = `
361
+ SELECT ap.compound_id, ap.byte_offset, c.path, c.timestamp, c.provenance
362
+ FROM atom_positions ap
363
+ JOIN compounds c ON ap.compound_id = c.id
364
+ WHERE (LOWER(ap.atom_label) = LOWER($1) OR LOWER(ap.atom_label) = LOWER($2))
365
+ `;
366
+ const params = [termWithHash, termWithoutHash];
367
+ // Apply Provenance Filter
368
+ if (options.provenance && options.provenance !== 'all') {
369
+ params.push(options.provenance);
370
+ query += ` AND c.provenance = $${params.length}`;
371
+ }
372
+ // Apply Bucket Filter
373
+ if (options.buckets && options.buckets.length > 0) {
374
+ params.push(options.buckets);
375
+ query += ` AND EXISTS (
376
+ SELECT 1 FROM atoms a
377
+ WHERE a.compound_id = c.id
378
+ AND EXISTS (
379
+ SELECT 1 FROM unnest(a.buckets) as b WHERE b = ANY($${params.length})
380
+ )
381
+ )`;
382
+ }
383
+ query += ` ORDER BY c.timestamp DESC LIMIT $${params.length + 1}`;
384
+ params.push(maxResults * 2);
385
+ const positionsResult = await db.run(query, params);
386
+ if (!positionsResult.rows || positionsResult.rows.length === 0) {
387
+ return [];
388
+ }
389
+ // Group by compound to avoid duplicate reads
390
+ const compoundPositions = new Map();
391
+ for (const row of positionsResult.rows) {
392
+ const compoundId = row.compound_id;
393
+ const byteOffset = row.byte_offset;
394
+ const dbPath = row.path;
395
+ const timestamp = row.timestamp;
396
+ const provenance = row.provenance;
397
+ if (!compoundPositions.has(compoundId)) {
398
+ compoundPositions.set(compoundId, {
399
+ positions: [],
400
+ filePath: dbPath,
401
+ timestamp,
402
+ provenance
403
+ });
404
+ }
405
+ compoundPositions.get(compoundId).positions.push(byteOffset);
406
+ }
407
+ // Radially inflate from each position, MERGING overlapping windows
408
+ // Read content from MIRRORED FILES on disk, not from database
409
+ // [Standard 132] Use adaptive concurrency based on available memory
410
+ const compoundEntries = Array.from(compoundPositions.entries());
411
+ const resultsArrays = await processWithAdaptiveConcurrency(compoundEntries, async ([compoundId, data]) => {
412
+ // Resolve the file path - try mirrored file first, then original
413
+ const mirrorPath = getMirrorPath(data.filePath, data.provenance);
414
+ let absolutePath = mirrorPath;
415
+ // If mirror doesn't exist, try original path
416
+ if (!fs.existsSync(mirrorPath)) {
417
+ absolutePath = path.isAbsolute(data.filePath)
418
+ ? data.filePath
419
+ : path.join(NOTEBOOK_DIR, data.filePath);
420
+ }
421
+ // Skip if file doesn't exist
422
+ if (!fs.existsSync(absolutePath)) {
423
+ return [];
424
+ }
425
+ // Read file stats to get size for window clamping
426
+ let fileSize = 0;
427
+ try {
428
+ const stats = await fs.promises.stat(absolutePath);
429
+ fileSize = stats.size;
430
+ }
431
+ catch (e) {
432
+ console.warn(`[ContextInflator] Failed to stat file: ${absolutePath}`);
433
+ return [];
434
+ }
435
+ // Calculate raw windows for all positions using file size
436
+ const rawWindows = data.positions.map(byteOffset => ({
437
+ start: Math.max(0, byteOffset - radius),
438
+ end: Math.min(fileSize, byteOffset + radius),
439
+ offset: byteOffset
440
+ }));
441
+ // Sort by start position for merge algorithm
442
+ rawWindows.sort((a, b) => a.start - b.start);
443
+ // Merge overlapping OR ADJACENT windows
444
+ const MERGE_GAP_THRESHOLD = 500;
445
+ const mergedWindows = [];
446
+ for (const window of rawWindows) {
447
+ const last = mergedWindows[mergedWindows.length - 1];
448
+ if (last && (window.start <= last.end || (window.start - last.end) < MERGE_GAP_THRESHOLD)) {
449
+ const newEnd = Math.max(last.end, window.end);
450
+ if ((newEnd - last.start) <= maxWindowSize) {
451
+ last.end = newEnd;
452
+ last.offsets.push(window.offset);
453
+ }
454
+ else {
455
+ mergedWindows.push({ start: window.start, end: window.end, offsets: [window.offset] });
456
+ }
457
+ }
458
+ else {
459
+ mergedWindows.push({ start: window.start, end: window.end, offsets: [window.offset] });
460
+ }
461
+ }
462
+ const compoundResults = [];
463
+ let fd = null;
464
+ try {
465
+ fd = await fs.promises.open(absolutePath, 'r');
466
+ for (const window of mergedWindows) {
467
+ const chunkLength = window.end - window.start;
468
+ if (chunkLength <= 0)
469
+ continue;
470
+ const buffer = Buffer.alloc(chunkLength);
471
+ await fd.read(buffer, 0, chunkLength, window.start);
472
+ let inflatedContent = buffer.toString('utf-8');
473
+ // Clean up partial words at boundaries
474
+ if (window.start > 0) {
475
+ const firstSpace = inflatedContent.indexOf(' ');
476
+ if (firstSpace !== -1 && firstSpace < 50) {
477
+ inflatedContent = inflatedContent.substring(firstSpace + 1);
478
+ }
479
+ }
480
+ if (window.end < fileSize) {
481
+ const lastSpace = inflatedContent.lastIndexOf(' ');
482
+ if (lastSpace > inflatedContent.length - 50) {
483
+ inflatedContent = inflatedContent.substring(0, lastSpace);
484
+ }
485
+ }
486
+ if (inflatedContent.trim().length === 0)
487
+ continue;
488
+ compoundResults.push({
489
+ id: `virtual_${compoundId}_${window.start}_${window.end}`,
490
+ content: `...${inflatedContent}...`,
491
+ source: data.filePath,
492
+ timestamp: data.timestamp,
493
+ buckets: ['core'],
494
+ tags: [searchTerm],
495
+ epochs: '',
496
+ provenance: data.provenance,
497
+ score: 500,
498
+ compound_id: compoundId,
499
+ start_byte: window.start,
500
+ end_byte: window.end,
501
+ is_inflated: true
502
+ });
503
+ }
504
+ }
505
+ catch (err) {
506
+ console.warn(`[ContextInflator] Error reading file ${absolutePath}:`, err);
507
+ }
508
+ finally {
509
+ if (fd)
510
+ await fd.close();
511
+ }
512
+ return compoundResults;
513
+ });
514
+ // Flatten results
515
+ resultsArrays.forEach(arr => results.push(...arr));
516
+ // Sort by score/relevance (simple approximation for now)
517
+ results.sort((a, b) => (b.score || 0) - (a.score || 0));
518
+ // Slice to maxResults
519
+ if (results.length > maxResults) {
520
+ results.length = maxResults;
521
+ }
522
+ console.log(`[ContextInflator] Radially inflated ${results.length} merged virtual molecules for "${searchTerm}"`);
523
+ return results;
524
+ }
525
+ catch (e) {
526
+ console.error(`[ContextInflator] Failed to inflate from atom positions: `, e);
527
+ return [];
528
+ }
529
+ }
530
+ /**
531
+ * Fetch additional context to fill the token budget with less directly connected but still relevant data
532
+ */
533
+ static async fetchAdditionalContext(baseResults, remainingBudget) {
534
+ // Only run if we have significant budget left (> 50% of typical large window)
535
+ // or if we have very primitive results.
536
+ if (remainingBudget < 1000)
537
+ return [];
538
+ // Extract tags and buckets from base results to find related content
539
+ const allTags = new Set();
540
+ const allBuckets = new Set();
541
+ // We only consider tags/buckets from TOP results to avoid drift
542
+ const topResults = baseResults.slice(0, 5);
543
+ for (const result of topResults) {
544
+ if (result.tags) {
545
+ result.tags.forEach(tag => allTags.add(tag));
546
+ }
547
+ if (result.buckets) {
548
+ result.buckets.forEach(bucket => allBuckets.add(bucket));
549
+ }
550
+ }
551
+ // Convert sets to arrays for use in queries
552
+ const tagsArray = Array.from(allTags);
553
+ const bucketsArray = Array.from(allBuckets);
554
+ // Query for related content that shares tags or buckets but wasn't in the original results
555
+ let query = `
556
+ SELECT id, content, source_path as source, timestamp,
557
+ buckets, tags, epochs, provenance, simhash as molecular_signature,
558
+ 100 as score --Lower score for less directly connected content
559
+ FROM atoms
560
+ WHERE `;
561
+ const params = [];
562
+ const conditions = [];
563
+ // Add conditions for tags if we have any
564
+ if (tagsArray.length > 0) {
565
+ conditions.push(`EXISTS(
566
+ SELECT 1 FROM unnest(tags) as tag WHERE tag = ANY($${params.length + 1})
567
+ )`);
568
+ params.push(tagsArray);
569
+ }
570
+ // Add conditions for buckets if we have any
571
+ if (bucketsArray.length > 0) {
572
+ const bucketParamIndex = params.length + 1;
573
+ conditions.push(`EXISTS(
574
+ SELECT 1 FROM unnest(buckets) as bucket WHERE bucket = ANY($${bucketParamIndex})
575
+ )`);
576
+ params.push(bucketsArray);
577
+ }
578
+ // Combine conditions with OR (so we get content that matches either tags OR buckets)
579
+ let queryConditions = '';
580
+ if (conditions.length > 0) {
581
+ queryConditions = `(${conditions.join(' OR ')})`;
582
+ }
583
+ else {
584
+ // If no tags or buckets to match, just get some random content
585
+ queryConditions = 'TRUE';
586
+ }
587
+ // Exclude original results
588
+ const originalIds = baseResults.map(r => r.id);
589
+ let fullQuery = query + queryConditions;
590
+ if (originalIds.length > 0) {
591
+ const excludeParamIndex = params.length + 1;
592
+ fullQuery += ` AND id != ALL($${excludeParamIndex})`;
593
+ params.push(originalIds);
594
+ }
595
+ // Limit to avoid fetching too much
596
+ fullQuery += ` ORDER BY timestamp DESC LIMIT 10`;
597
+ try {
598
+ const result = await db.run(fullQuery, params);
599
+ if (!result.rows)
600
+ return [];
601
+ // Convert rows to SearchResult objects
602
+ const additionalResults = result.rows.map((row) => ({
603
+ id: row.id,
604
+ content: row.content,
605
+ source: row.source,
606
+ timestamp: row.timestamp,
607
+ buckets: row.buckets,
608
+ tags: row.tags,
609
+ epochs: row.epochs,
610
+ provenance: row.provenance,
611
+ molecular_signature: row.simhash,
612
+ score: row.score || 100, // Default score if not provided
613
+ is_inflated: true
614
+ }));
615
+ // Further filter and truncate content to fit the remaining budget
616
+ let totalChars = 0;
617
+ const filteredResults = [];
618
+ for (const result of additionalResults) {
619
+ if (!result.content)
620
+ continue;
621
+ const availableSpace = remainingBudget - totalChars;
622
+ if (availableSpace <= 0)
623
+ break;
624
+ if (result.content.length <= availableSpace) {
625
+ // If the content fits entirely, add it
626
+ filteredResults.push(result);
627
+ totalChars += result.content.length;
628
+ }
629
+ else {
630
+ // If the content is too large, truncate it to fit
631
+ const truncatedContent = result.content.substring(0, availableSpace);
632
+ filteredResults.push({
633
+ ...result,
634
+ content: truncatedContent
635
+ });
636
+ totalChars += truncatedContent.length;
637
+ break; // Budget is filled
638
+ }
639
+ }
640
+ console.log(`[ContextInflator] Fetched ${filteredResults.length} additional results to fill budget`);
641
+ return filteredResults;
642
+ }
643
+ catch (e) {
644
+ console.error(`[ContextInflator] Failed to fetch additional context: `, e);
645
+ return [];
646
+ }
647
+ }
648
+ }
649
+ //# sourceMappingURL=context-inflator.js.map