@rbalchii/anchor-engine 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. package/LICENSE +609 -0
  2. package/README.md +317 -0
  3. package/anchor.bat +5 -0
  4. package/docs/API.md +314 -0
  5. package/docs/DEPLOYMENT.md +448 -0
  6. package/docs/INDEX.md +226 -0
  7. package/docs/STAR_Whitepaper_Executive.md +216 -0
  8. package/docs/TROUBLESHOOTING.md +535 -0
  9. package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
  10. package/docs/archive/adoption-guide.md +264 -0
  11. package/docs/archive/adoption-preparation.md +179 -0
  12. package/docs/archive/agent-harness-integration.md +227 -0
  13. package/docs/archive/api-reference.md +106 -0
  14. package/docs/archive/api_flows_diagram.md +118 -0
  15. package/docs/archive/architecture.md +410 -0
  16. package/docs/archive/architecture_diagram.md +174 -0
  17. package/docs/archive/broader-adoption-preparation.md +175 -0
  18. package/docs/archive/browser-paradigm-architecture.md +163 -0
  19. package/docs/archive/chat-integration.md +124 -0
  20. package/docs/archive/community-adoption-materials.md +103 -0
  21. package/docs/archive/community-adoption.md +147 -0
  22. package/docs/archive/comparison-with-siloed-solutions.md +192 -0
  23. package/docs/archive/comprehensive-docs.md +156 -0
  24. package/docs/archive/data_flow_diagram.md +251 -0
  25. package/docs/archive/enhancement-implementation-summary.md +146 -0
  26. package/docs/archive/evolution-summary.md +141 -0
  27. package/docs/archive/ingestion_pipeline_diagram.md +198 -0
  28. package/docs/archive/native-module-profiling-results.md +135 -0
  29. package/docs/archive/positioning-document.md +158 -0
  30. package/docs/archive/positioning.md +175 -0
  31. package/docs/archive/query-builder-documentation.md +218 -0
  32. package/docs/archive/quick-reference.md +40 -0
  33. package/docs/archive/quickstart.md +63 -0
  34. package/docs/archive/relationship-narrative-discovery.md +141 -0
  35. package/docs/archive/search-logic-improvement-plan.md +336 -0
  36. package/docs/archive/search_architecture_diagram.md +212 -0
  37. package/docs/archive/semantic-architecture-guide.md +97 -0
  38. package/docs/archive/sequence-diagrams.md +128 -0
  39. package/docs/archive/system_components_diagram.md +296 -0
  40. package/docs/archive/test-framework-integration.md +109 -0
  41. package/docs/archive/testing-framework-documentation.md +397 -0
  42. package/docs/archive/testing-framework-summary.md +121 -0
  43. package/docs/archive/testing-framework.md +377 -0
  44. package/docs/archive/ui-architecture.md +75 -0
  45. package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
  46. package/docs/arxiv/RELATED_WORK.tex +39 -0
  47. package/docs/arxiv/compile.bat +48 -0
  48. package/docs/arxiv/joss_response.md +33 -0
  49. package/docs/arxiv/prepare-submission.bat +46 -0
  50. package/docs/arxiv/review.md +128 -0
  51. package/docs/arxiv/star-whitepaper.tex +657 -0
  52. package/docs/code-patterns.md +289 -0
  53. package/docs/whitepaper.md +445 -0
  54. package/engine/dist/agent/runtime.d.ts +41 -0
  55. package/engine/dist/agent/runtime.d.ts.map +1 -0
  56. package/engine/dist/agent/runtime.js +73 -0
  57. package/engine/dist/agent/runtime.js.map +1 -0
  58. package/engine/dist/commands/audit-tags.d.ts +14 -0
  59. package/engine/dist/commands/audit-tags.d.ts.map +1 -0
  60. package/engine/dist/commands/audit-tags.js +180 -0
  61. package/engine/dist/commands/audit-tags.js.map +1 -0
  62. package/engine/dist/commands/distill.d.ts +19 -0
  63. package/engine/dist/commands/distill.d.ts.map +1 -0
  64. package/engine/dist/commands/distill.js +114 -0
  65. package/engine/dist/commands/distill.js.map +1 -0
  66. package/engine/dist/commands/generate-synonyms.d.ts +14 -0
  67. package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
  68. package/engine/dist/commands/generate-synonyms.js +91 -0
  69. package/engine/dist/commands/generate-synonyms.js.map +1 -0
  70. package/engine/dist/config/index.d.ts +115 -0
  71. package/engine/dist/config/index.d.ts.map +1 -0
  72. package/engine/dist/config/index.js +326 -0
  73. package/engine/dist/config/index.js.map +1 -0
  74. package/engine/dist/config/max-recall-config.d.ts +102 -0
  75. package/engine/dist/config/max-recall-config.d.ts.map +1 -0
  76. package/engine/dist/config/max-recall-config.js +102 -0
  77. package/engine/dist/config/max-recall-config.js.map +1 -0
  78. package/engine/dist/config/paths.d.ts +40 -0
  79. package/engine/dist/config/paths.d.ts.map +1 -0
  80. package/engine/dist/config/paths.js +49 -0
  81. package/engine/dist/config/paths.js.map +1 -0
  82. package/engine/dist/core/batch.d.ts +19 -0
  83. package/engine/dist/core/batch.d.ts.map +1 -0
  84. package/engine/dist/core/batch.js +37 -0
  85. package/engine/dist/core/batch.js.map +1 -0
  86. package/engine/dist/core/db.d.ts +58 -0
  87. package/engine/dist/core/db.d.ts.map +1 -0
  88. package/engine/dist/core/db.js +563 -0
  89. package/engine/dist/core/db.js.map +1 -0
  90. package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
  91. package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
  92. package/engine/dist/core/inference/ChatWorker.js +28 -0
  93. package/engine/dist/core/inference/ChatWorker.js.map +1 -0
  94. package/engine/dist/core/inference/context_manager.d.ts +49 -0
  95. package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
  96. package/engine/dist/core/inference/context_manager.js +199 -0
  97. package/engine/dist/core/inference/context_manager.js.map +1 -0
  98. package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
  99. package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
  100. package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
  101. package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
  102. package/engine/dist/core/vector.d.ts +40 -0
  103. package/engine/dist/core/vector.d.ts.map +1 -0
  104. package/engine/dist/core/vector.js +167 -0
  105. package/engine/dist/core/vector.js.map +1 -0
  106. package/engine/dist/index.d.ts +4 -0
  107. package/engine/dist/index.d.ts.map +1 -0
  108. package/engine/dist/index.js +400 -0
  109. package/engine/dist/index.js.map +1 -0
  110. package/engine/dist/middleware/auth.d.ts +14 -0
  111. package/engine/dist/middleware/auth.d.ts.map +1 -0
  112. package/engine/dist/middleware/auth.js +44 -0
  113. package/engine/dist/middleware/auth.js.map +1 -0
  114. package/engine/dist/middleware/request-tracing.d.ts +29 -0
  115. package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
  116. package/engine/dist/middleware/request-tracing.js +115 -0
  117. package/engine/dist/middleware/request-tracing.js.map +1 -0
  118. package/engine/dist/middleware/validate.d.ts +30 -0
  119. package/engine/dist/middleware/validate.d.ts.map +1 -0
  120. package/engine/dist/middleware/validate.js +117 -0
  121. package/engine/dist/middleware/validate.js.map +1 -0
  122. package/engine/dist/native/index.d.ts +106 -0
  123. package/engine/dist/native/index.d.ts.map +1 -0
  124. package/engine/dist/native/index.js +230 -0
  125. package/engine/dist/native/index.js.map +1 -0
  126. package/engine/dist/native/types.d.ts +45 -0
  127. package/engine/dist/native/types.d.ts.map +1 -0
  128. package/engine/dist/native/types.js +6 -0
  129. package/engine/dist/native/types.js.map +1 -0
  130. package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
  131. package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
  132. package/engine/dist/profiling/atomization-profiling.js +108 -0
  133. package/engine/dist/profiling/atomization-profiling.js.map +1 -0
  134. package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
  135. package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
  136. package/engine/dist/profiling/bottleneck-identification.js +249 -0
  137. package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
  138. package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
  139. package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
  140. package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
  141. package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
  142. package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
  143. package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
  144. package/engine/dist/profiling/simhash-profiling.js +168 -0
  145. package/engine/dist/profiling/simhash-profiling.js.map +1 -0
  146. package/engine/dist/routes/api.d.ts +9 -0
  147. package/engine/dist/routes/api.d.ts.map +1 -0
  148. package/engine/dist/routes/api.js +37 -0
  149. package/engine/dist/routes/api.js.map +1 -0
  150. package/engine/dist/routes/enhanced-api.d.ts +9 -0
  151. package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
  152. package/engine/dist/routes/enhanced-api.js +139 -0
  153. package/engine/dist/routes/enhanced-api.js.map +1 -0
  154. package/engine/dist/routes/health.d.ts +8 -0
  155. package/engine/dist/routes/health.d.ts.map +1 -0
  156. package/engine/dist/routes/health.js +89 -0
  157. package/engine/dist/routes/health.js.map +1 -0
  158. package/engine/dist/routes/monitoring.d.ts +8 -0
  159. package/engine/dist/routes/monitoring.d.ts.map +1 -0
  160. package/engine/dist/routes/monitoring.js +509 -0
  161. package/engine/dist/routes/monitoring.js.map +1 -0
  162. package/engine/dist/routes/v1/admin.d.ts +3 -0
  163. package/engine/dist/routes/v1/admin.d.ts.map +1 -0
  164. package/engine/dist/routes/v1/admin.js +261 -0
  165. package/engine/dist/routes/v1/admin.js.map +1 -0
  166. package/engine/dist/routes/v1/atoms.d.ts +3 -0
  167. package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
  168. package/engine/dist/routes/v1/atoms.js +172 -0
  169. package/engine/dist/routes/v1/atoms.js.map +1 -0
  170. package/engine/dist/routes/v1/backup.d.ts +3 -0
  171. package/engine/dist/routes/v1/backup.d.ts.map +1 -0
  172. package/engine/dist/routes/v1/backup.js +100 -0
  173. package/engine/dist/routes/v1/backup.js.map +1 -0
  174. package/engine/dist/routes/v1/git.d.ts +3 -0
  175. package/engine/dist/routes/v1/git.d.ts.map +1 -0
  176. package/engine/dist/routes/v1/git.js +316 -0
  177. package/engine/dist/routes/v1/git.js.map +1 -0
  178. package/engine/dist/routes/v1/ingest.d.ts +3 -0
  179. package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
  180. package/engine/dist/routes/v1/ingest.js +66 -0
  181. package/engine/dist/routes/v1/ingest.js.map +1 -0
  182. package/engine/dist/routes/v1/memory.d.ts +14 -0
  183. package/engine/dist/routes/v1/memory.d.ts.map +1 -0
  184. package/engine/dist/routes/v1/memory.js +87 -0
  185. package/engine/dist/routes/v1/memory.js.map +1 -0
  186. package/engine/dist/routes/v1/research.d.ts +3 -0
  187. package/engine/dist/routes/v1/research.d.ts.map +1 -0
  188. package/engine/dist/routes/v1/research.js +109 -0
  189. package/engine/dist/routes/v1/research.js.map +1 -0
  190. package/engine/dist/routes/v1/search.d.ts +3 -0
  191. package/engine/dist/routes/v1/search.d.ts.map +1 -0
  192. package/engine/dist/routes/v1/search.js +180 -0
  193. package/engine/dist/routes/v1/search.js.map +1 -0
  194. package/engine/dist/routes/v1/settings.d.ts +8 -0
  195. package/engine/dist/routes/v1/settings.d.ts.map +1 -0
  196. package/engine/dist/routes/v1/settings.js +211 -0
  197. package/engine/dist/routes/v1/settings.js.map +1 -0
  198. package/engine/dist/routes/v1/system.d.ts +3 -0
  199. package/engine/dist/routes/v1/system.d.ts.map +1 -0
  200. package/engine/dist/routes/v1/system.js +326 -0
  201. package/engine/dist/routes/v1/system.js.map +1 -0
  202. package/engine/dist/routes/v1/tags.d.ts +3 -0
  203. package/engine/dist/routes/v1/tags.d.ts.map +1 -0
  204. package/engine/dist/routes/v1/tags.js +102 -0
  205. package/engine/dist/routes/v1/tags.js.map +1 -0
  206. package/engine/dist/server-8080.d.ts +2 -0
  207. package/engine/dist/server-8080.d.ts.map +1 -0
  208. package/engine/dist/server-8080.js +74 -0
  209. package/engine/dist/server-8080.js.map +1 -0
  210. package/engine/dist/services/backup/backup-restore.d.ts +37 -0
  211. package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
  212. package/engine/dist/services/backup/backup-restore.js +385 -0
  213. package/engine/dist/services/backup/backup-restore.js.map +1 -0
  214. package/engine/dist/services/backup/backup.d.ts +14 -0
  215. package/engine/dist/services/backup/backup.d.ts.map +1 -0
  216. package/engine/dist/services/backup/backup.js +442 -0
  217. package/engine/dist/services/backup/backup.js.map +1 -0
  218. package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
  219. package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
  220. package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
  221. package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
  222. package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
  223. package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
  224. package/engine/dist/services/distillation/radial-distiller.js +394 -0
  225. package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
  226. package/engine/dist/services/health-check-enhanced.d.ts +89 -0
  227. package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
  228. package/engine/dist/services/health-check-enhanced.js +417 -0
  229. package/engine/dist/services/health-check-enhanced.js.map +1 -0
  230. package/engine/dist/services/idle-manager.d.ts +56 -0
  231. package/engine/dist/services/idle-manager.d.ts.map +1 -0
  232. package/engine/dist/services/idle-manager.js +210 -0
  233. package/engine/dist/services/idle-manager.js.map +1 -0
  234. package/engine/dist/services/inference/inference-service.d.ts +27 -0
  235. package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
  236. package/engine/dist/services/inference/inference-service.js +89 -0
  237. package/engine/dist/services/inference/inference-service.js.map +1 -0
  238. package/engine/dist/services/inference/inference.d.ts +59 -0
  239. package/engine/dist/services/inference/inference.d.ts.map +1 -0
  240. package/engine/dist/services/inference/inference.js +131 -0
  241. package/engine/dist/services/inference/inference.js.map +1 -0
  242. package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
  243. package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
  244. package/engine/dist/services/ingest/atomizer-service.js +982 -0
  245. package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
  246. package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
  247. package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
  248. package/engine/dist/services/ingest/content-cleaner.js +166 -0
  249. package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
  250. package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
  251. package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
  252. package/engine/dist/services/ingest/github-ingest-service.js +537 -0
  253. package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
  254. package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
  255. package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
  256. package/engine/dist/services/ingest/ingest-atomic.js +437 -0
  257. package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
  258. package/engine/dist/services/ingest/ingest.d.ts +50 -0
  259. package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
  260. package/engine/dist/services/ingest/ingest.js +230 -0
  261. package/engine/dist/services/ingest/ingest.js.map +1 -0
  262. package/engine/dist/services/ingest/watchdog.d.ts +31 -0
  263. package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
  264. package/engine/dist/services/ingest/watchdog.js +400 -0
  265. package/engine/dist/services/ingest/watchdog.js.map +1 -0
  266. package/engine/dist/services/llm/context.d.ts +6 -0
  267. package/engine/dist/services/llm/context.d.ts.map +1 -0
  268. package/engine/dist/services/llm/context.js +80 -0
  269. package/engine/dist/services/llm/context.js.map +1 -0
  270. package/engine/dist/services/llm/provider.d.ts +23 -0
  271. package/engine/dist/services/llm/provider.d.ts.map +1 -0
  272. package/engine/dist/services/llm/provider.js +338 -0
  273. package/engine/dist/services/llm/provider.js.map +1 -0
  274. package/engine/dist/services/llm/reader.d.ts +12 -0
  275. package/engine/dist/services/llm/reader.d.ts.map +1 -0
  276. package/engine/dist/services/llm/reader.js +40 -0
  277. package/engine/dist/services/llm/reader.js.map +1 -0
  278. package/engine/dist/services/mirror/mirror.d.ts +28 -0
  279. package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
  280. package/engine/dist/services/mirror/mirror.js +208 -0
  281. package/engine/dist/services/mirror/mirror.js.map +1 -0
  282. package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
  283. package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
  284. package/engine/dist/services/nlp/nlp-service.js +151 -0
  285. package/engine/dist/services/nlp/nlp-service.js.map +1 -0
  286. package/engine/dist/services/nlp/query-parser.d.ts +9 -0
  287. package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
  288. package/engine/dist/services/nlp/query-parser.js +29 -0
  289. package/engine/dist/services/nlp/query-parser.js.map +1 -0
  290. package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
  291. package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
  292. package/engine/dist/services/query-builder/DataFrame.js +263 -0
  293. package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
  294. package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
  295. package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
  296. package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
  297. package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
  298. package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
  299. package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
  300. package/engine/dist/services/query-builder/utils/export.js +130 -0
  301. package/engine/dist/services/query-builder/utils/export.js.map +1 -0
  302. package/engine/dist/services/research/researcher.d.ts +15 -0
  303. package/engine/dist/services/research/researcher.d.ts.map +1 -0
  304. package/engine/dist/services/research/researcher.js +123 -0
  305. package/engine/dist/services/research/researcher.js.map +1 -0
  306. package/engine/dist/services/scribe/scribe.d.ts +43 -0
  307. package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
  308. package/engine/dist/services/scribe/scribe.js +135 -0
  309. package/engine/dist/services/scribe/scribe.js.map +1 -0
  310. package/engine/dist/services/search/bright-nodes.d.ts +41 -0
  311. package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
  312. package/engine/dist/services/search/bright-nodes.js +117 -0
  313. package/engine/dist/services/search/bright-nodes.js.map +1 -0
  314. package/engine/dist/services/search/context-inflator.d.ts +63 -0
  315. package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
  316. package/engine/dist/services/search/context-inflator.js +649 -0
  317. package/engine/dist/services/search/context-inflator.js.map +1 -0
  318. package/engine/dist/services/search/context-manager.d.ts +34 -0
  319. package/engine/dist/services/search/context-manager.d.ts.map +1 -0
  320. package/engine/dist/services/search/context-manager.js +124 -0
  321. package/engine/dist/services/search/context-manager.js.map +1 -0
  322. package/engine/dist/services/search/distributed-query.d.ts +38 -0
  323. package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
  324. package/engine/dist/services/search/distributed-query.js +105 -0
  325. package/engine/dist/services/search/distributed-query.js.map +1 -0
  326. package/engine/dist/services/search/explore.d.ts +73 -0
  327. package/engine/dist/services/search/explore.d.ts.map +1 -0
  328. package/engine/dist/services/search/explore.js +388 -0
  329. package/engine/dist/services/search/explore.js.map +1 -0
  330. package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
  331. package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
  332. package/engine/dist/services/search/graph-context-serializer.js +435 -0
  333. package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
  334. package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
  335. package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
  336. package/engine/dist/services/search/llm-context-formatter.js +394 -0
  337. package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
  338. package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
  339. package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
  340. package/engine/dist/services/search/physics-tag-walker.js +611 -0
  341. package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
  342. package/engine/dist/services/search/query-parser.d.ts +66 -0
  343. package/engine/dist/services/search/query-parser.d.ts.map +1 -0
  344. package/engine/dist/services/search/query-parser.js +346 -0
  345. package/engine/dist/services/search/query-parser.js.map +1 -0
  346. package/engine/dist/services/search/search-utils.d.ts +100 -0
  347. package/engine/dist/services/search/search-utils.d.ts.map +1 -0
  348. package/engine/dist/services/search/search-utils.js +473 -0
  349. package/engine/dist/services/search/search-utils.js.map +1 -0
  350. package/engine/dist/services/search/search.d.ts +116 -0
  351. package/engine/dist/services/search/search.d.ts.map +1 -0
  352. package/engine/dist/services/search/search.js +1286 -0
  353. package/engine/dist/services/search/search.js.map +1 -0
  354. package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
  355. package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
  356. package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
  357. package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
  358. package/engine/dist/services/search/streaming-search.d.ts +51 -0
  359. package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
  360. package/engine/dist/services/search/streaming-search.js +94 -0
  361. package/engine/dist/services/search/streaming-search.js.map +1 -0
  362. package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
  363. package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
  364. package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
  365. package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
  366. package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
  367. package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
  368. package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
  369. package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
  370. package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
  371. package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
  372. package/engine/dist/services/semantic/semantic-search.js +649 -0
  373. package/engine/dist/services/semantic/semantic-search.js.map +1 -0
  374. package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
  375. package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
  376. package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
  377. package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
  378. package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
  379. package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
  380. package/engine/dist/services/semantic/types/semantic.js +7 -0
  381. package/engine/dist/services/semantic/types/semantic.js.map +1 -0
  382. package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
  383. package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
  384. package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
  385. package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
  386. package/engine/dist/services/system-status.d.ts +68 -0
  387. package/engine/dist/services/system-status.d.ts.map +1 -0
  388. package/engine/dist/services/system-status.js +107 -0
  389. package/engine/dist/services/system-status.js.map +1 -0
  390. package/engine/dist/services/tags/discovery.d.ts +16 -0
  391. package/engine/dist/services/tags/discovery.d.ts.map +1 -0
  392. package/engine/dist/services/tags/discovery.js +206 -0
  393. package/engine/dist/services/tags/discovery.js.map +1 -0
  394. package/engine/dist/services/tags/gliner.d.ts +18 -0
  395. package/engine/dist/services/tags/gliner.d.ts.map +1 -0
  396. package/engine/dist/services/tags/gliner.js +119 -0
  397. package/engine/dist/services/tags/gliner.js.map +1 -0
  398. package/engine/dist/services/tags/infector.d.ts +21 -0
  399. package/engine/dist/services/tags/infector.d.ts.map +1 -0
  400. package/engine/dist/services/tags/infector.js +168 -0
  401. package/engine/dist/services/tags/infector.js.map +1 -0
  402. package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
  403. package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
  404. package/engine/dist/services/tags/tag-auditor.js +283 -0
  405. package/engine/dist/services/tags/tag-auditor.js.map +1 -0
  406. package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
  407. package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
  408. package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
  409. package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
  410. package/engine/dist/services/vision/vision_service.d.ts +4 -0
  411. package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
  412. package/engine/dist/services/vision/vision_service.js +197 -0
  413. package/engine/dist/services/vision/vision_service.js.map +1 -0
  414. package/engine/dist/test-framework/core.d.ts +133 -0
  415. package/engine/dist/test-framework/core.d.ts.map +1 -0
  416. package/engine/dist/test-framework/core.js +313 -0
  417. package/engine/dist/test-framework/core.js.map +1 -0
  418. package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
  419. package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
  420. package/engine/dist/test-framework/dataset-runner.js +223 -0
  421. package/engine/dist/test-framework/dataset-runner.js.map +1 -0
  422. package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
  423. package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
  424. package/engine/dist/test-framework/diagnostic-tests.js +283 -0
  425. package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
  426. package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
  427. package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
  428. package/engine/dist/test-framework/performance-regression-tests.js +331 -0
  429. package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
  430. package/engine/dist/types/api.d.ts +53 -0
  431. package/engine/dist/types/api.d.ts.map +1 -0
  432. package/engine/dist/types/api.js +2 -0
  433. package/engine/dist/types/api.js.map +1 -0
  434. package/engine/dist/types/atomic.d.ts +42 -0
  435. package/engine/dist/types/atomic.d.ts.map +1 -0
  436. package/engine/dist/types/atomic.js +10 -0
  437. package/engine/dist/types/atomic.js.map +1 -0
  438. package/engine/dist/types/context-protocol.d.ts +137 -0
  439. package/engine/dist/types/context-protocol.d.ts.map +1 -0
  440. package/engine/dist/types/context-protocol.js +28 -0
  441. package/engine/dist/types/context-protocol.js.map +1 -0
  442. package/engine/dist/types/context.d.ts +2 -0
  443. package/engine/dist/types/context.d.ts.map +1 -0
  444. package/engine/dist/types/context.js +2 -0
  445. package/engine/dist/types/context.js.map +1 -0
  446. package/engine/dist/types/index.d.ts +20 -0
  447. package/engine/dist/types/index.d.ts.map +1 -0
  448. package/engine/dist/types/index.js +18 -0
  449. package/engine/dist/types/index.js.map +1 -0
  450. package/engine/dist/types/search.d.ts +31 -0
  451. package/engine/dist/types/search.d.ts.map +1 -0
  452. package/engine/dist/types/search.js +2 -0
  453. package/engine/dist/types/search.js.map +1 -0
  454. package/engine/dist/types/taxonomy.d.ts +137 -0
  455. package/engine/dist/types/taxonomy.d.ts.map +1 -0
  456. package/engine/dist/types/taxonomy.js +138 -0
  457. package/engine/dist/types/taxonomy.js.map +1 -0
  458. package/engine/dist/types/taxonomy.simple.d.ts +131 -0
  459. package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
  460. package/engine/dist/types/taxonomy.simple.js +132 -0
  461. package/engine/dist/types/taxonomy.simple.js.map +1 -0
  462. package/engine/dist/types/tool-call.d.ts +16 -0
  463. package/engine/dist/types/tool-call.d.ts.map +1 -0
  464. package/engine/dist/types/tool-call.js +6 -0
  465. package/engine/dist/types/tool-call.js.map +1 -0
  466. package/engine/dist/types/trace.d.ts +25 -0
  467. package/engine/dist/types/trace.d.ts.map +1 -0
  468. package/engine/dist/types/trace.js +5 -0
  469. package/engine/dist/types/trace.js.map +1 -0
  470. package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
  471. package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
  472. package/engine/dist/utils/adaptive-concurrency.js +266 -0
  473. package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
  474. package/engine/dist/utils/date_extractor.d.ts +2 -0
  475. package/engine/dist/utils/date_extractor.d.ts.map +1 -0
  476. package/engine/dist/utils/date_extractor.js +32 -0
  477. package/engine/dist/utils/date_extractor.js.map +1 -0
  478. package/engine/dist/utils/native-module-manager.d.ts +48 -0
  479. package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
  480. package/engine/dist/utils/native-module-manager.js +265 -0
  481. package/engine/dist/utils/native-module-manager.js.map +1 -0
  482. package/engine/dist/utils/native-module-profiler.d.ts +66 -0
  483. package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
  484. package/engine/dist/utils/native-module-profiler.js +182 -0
  485. package/engine/dist/utils/native-module-profiler.js.map +1 -0
  486. package/engine/dist/utils/path-manager.d.ts +59 -0
  487. package/engine/dist/utils/path-manager.d.ts.map +1 -0
  488. package/engine/dist/utils/path-manager.js +154 -0
  489. package/engine/dist/utils/path-manager.js.map +1 -0
  490. package/engine/dist/utils/performance-monitor.d.ts +92 -0
  491. package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
  492. package/engine/dist/utils/performance-monitor.js +221 -0
  493. package/engine/dist/utils/performance-monitor.js.map +1 -0
  494. package/engine/dist/utils/process-manager.d.ts +18 -0
  495. package/engine/dist/utils/process-manager.d.ts.map +1 -0
  496. package/engine/dist/utils/process-manager.js +100 -0
  497. package/engine/dist/utils/process-manager.js.map +1 -0
  498. package/engine/dist/utils/request-tracer.d.ts +131 -0
  499. package/engine/dist/utils/request-tracer.d.ts.map +1 -0
  500. package/engine/dist/utils/request-tracer.js +414 -0
  501. package/engine/dist/utils/request-tracer.js.map +1 -0
  502. package/engine/dist/utils/resource-manager.d.ts +108 -0
  503. package/engine/dist/utils/resource-manager.d.ts.map +1 -0
  504. package/engine/dist/utils/resource-manager.js +235 -0
  505. package/engine/dist/utils/resource-manager.js.map +1 -0
  506. package/engine/dist/utils/safe-dns.d.ts +14 -0
  507. package/engine/dist/utils/safe-dns.d.ts.map +1 -0
  508. package/engine/dist/utils/safe-dns.js +105 -0
  509. package/engine/dist/utils/safe-dns.js.map +1 -0
  510. package/engine/dist/utils/structured-logger.d.ts +124 -0
  511. package/engine/dist/utils/structured-logger.d.ts.map +1 -0
  512. package/engine/dist/utils/structured-logger.js +332 -0
  513. package/engine/dist/utils/structured-logger.js.map +1 -0
  514. package/engine/dist/utils/tag-cleanup.d.ts +11 -0
  515. package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
  516. package/engine/dist/utils/tag-cleanup.js +111 -0
  517. package/engine/dist/utils/tag-cleanup.js.map +1 -0
  518. package/engine/dist/utils/tag-filter.d.ts +19 -0
  519. package/engine/dist/utils/tag-filter.d.ts.map +1 -0
  520. package/engine/dist/utils/tag-filter.js +147 -0
  521. package/engine/dist/utils/tag-filter.js.map +1 -0
  522. package/engine/dist/utils/tag-modulation.d.ts +80 -0
  523. package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
  524. package/engine/dist/utils/tag-modulation.js +284 -0
  525. package/engine/dist/utils/tag-modulation.js.map +1 -0
  526. package/engine/dist/utils/timer.d.ts +40 -0
  527. package/engine/dist/utils/timer.d.ts.map +1 -0
  528. package/engine/dist/utils/timer.js +76 -0
  529. package/engine/dist/utils/timer.js.map +1 -0
  530. package/engine/dist/utils/token-utils.d.ts +19 -0
  531. package/engine/dist/utils/token-utils.d.ts.map +1 -0
  532. package/engine/dist/utils/token-utils.js +71 -0
  533. package/engine/dist/utils/token-utils.js.map +1 -0
  534. package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
  535. package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
  536. package/engine/dist/utils/wasm-module-loader.js +136 -0
  537. package/engine/dist/utils/wasm-module-loader.js.map +1 -0
  538. package/engine/package.json +105 -0
  539. package/package.json +106 -0
@@ -0,0 +1,625 @@
1
+ /**
2
+ * Semantic Ingestion Service for ECE (Semantic Shift Refactor)
3
+ *
4
+ * Replaces the old atomizer with semantic molecule processing
5
+ * that creates high-level semantic tags and atomic entities.
6
+ */
7
+ import { SemanticMoleculeProcessor } from './semantic-molecule-processor.js';
8
+ import { db } from '../../core/db.js';
9
+ import * as crypto from 'crypto';
10
+ import { Timer } from '../../utils/timer.js';
11
+ export class SemanticIngestionService {
12
+ moleculeProcessor;
13
+ constructor() {
14
+ this.moleculeProcessor = new SemanticMoleculeProcessor();
15
+ }
16
+ /**
17
+ * Ingest content using the new semantic architecture
18
+ * Creates molecules with high-level semantic tags and atomic entities
19
+ */
20
+ async ingestContent(content, source, type = 'text', bucket = 'default', buckets = [], tags = [] // These will be high-level semantic categories
21
+ ) {
22
+ const timer = new Timer('IngestionService');
23
+ try {
24
+ console.log(`[IngestionService] Starting ingestion for source: ${source}, type: ${type}, length: ${content.length} chars`);
25
+ // Handle legacy single-bucket param
26
+ const allBuckets = bucket ? [...buckets, bucket] : buckets;
27
+ console.log(`[IngestionService] Processing with buckets: [${allBuckets.join(', ')}], tags: [${tags.join(', ')}]`);
28
+ // Ensure explicit metadata tags exist (Fix for missing UI toggles when NER fails)
29
+ // This ensures 'indexTags' never receives an empty list, so buckets are always indexed.
30
+ const metadataTags = [`source:${source}`, `type:${type}`];
31
+ const effectiveTags = [...new Set([...tags, ...metadataTags])];
32
+ console.log(`[IngestionService] Effective tags after adding metadata: [${effectiveTags.join(', ')}]`);
33
+ // Validate content length to prevent oversized atoms
34
+ const MAX_CONTENT_LENGTH = 500 * 1024; // 500KB limit
35
+ if (content.length > MAX_CONTENT_LENGTH) {
36
+ console.warn(`[SemanticIngestionService] Content exceeds maximum length (${content.length} chars), performing automatic chunking...`);
37
+ // Split the content into smaller chunks and process each separately
38
+ timer.log('Starting large content ingestion');
39
+ const result = await this.ingestLargeContent(content, source, type, bucket, buckets, effectiveTags);
40
+ timer.logTotalAndReset(`Completed large content ingestion for ${source}`);
41
+ return result;
42
+ }
43
+ timer.log('Starting content splitting');
44
+ // Split content into text chunks (molecules)
45
+ const textChunks = this.splitIntoMolecules(content);
46
+ console.log(`[IngestionService] Content split into ${textChunks.length} chunks`);
47
+ timer.logLap(`Split content into ${textChunks.length} chunks`);
48
+ timer.log('Starting molecule processing');
49
+ // Process each chunk into semantic molecules - OPTIMIZED FOR PARALLEL PROCESSING
50
+ const chunksWithMetadata = textChunks.map((chunk, index) => ({
51
+ content: chunk,
52
+ source: `${source}_chunk_${index}`,
53
+ timestamp: Date.now() + index, // Slightly offset timestamps
54
+ provenance: 'external'
55
+ }));
56
+ console.log(`[IngestionService] Processing ${chunksWithMetadata.length} chunks through molecule processor...`);
57
+ // Process chunks in parallel to reduce serial processing time
58
+ const semanticMolecules = await Promise.all(chunksWithMetadata.map(chunk => this.moleculeProcessor.processTextChunk(chunk.content, chunk.source, chunk.timestamp, chunk.provenance)));
59
+ console.log(`[IngestionService] Processed ${semanticMolecules.length} semantic molecules with a total of ${semanticMolecules.reduce((sum, mol) => sum + mol.containedEntities.length, 0)} atomic entities`);
60
+ timer.logLap(`Processed ${semanticMolecules.length} semantic molecules`);
61
+ // Refactored to use the shared helper method
62
+ const result = await this.saveMoleculesBatched([semanticMolecules], source, type, allBuckets, effectiveTags);
63
+ // Construct the compatible return object
64
+ return {
65
+ status: result.status,
66
+ id: semanticMolecules[0]?.id || 'unknown',
67
+ message: result.message
68
+ };
69
+ }
70
+ catch (e) {
71
+ console.error('[SemanticIngestionService] Ingest Error:', e);
72
+ return { status: 'error', id: 'unknown', message: e.message };
73
+ }
74
+ }
75
+ /**
76
+ * Helper to validate and save a batch of molecules to the database
77
+ * Handles the transaction, deduplication, and bulk insertion
78
+ */
79
+ async saveMoleculesBatched(moleculeBatches, source, type, buckets, tags) {
80
+ const timer = new Timer('SaveMoleculesBatched');
81
+ // Flatten the batches for this transaction (or we could process per batch)
82
+ // For ingestContent (single file), it's one batch.
83
+ // For ingestLargeContent, we might call this iteratively.
84
+ const molecules = moleculeBatches.flat();
85
+ if (molecules.length === 0) {
86
+ return { status: 'success', message: 'No molecules to save' };
87
+ }
88
+ // SHARED ZERO VECTOR OPTIMIZATION
89
+ const ZERO_VECTOR_STR = JSON.stringify(new Array(768).fill(0.1));
90
+ const allAtomsToInsert = [];
91
+ // Prepare atoms
92
+ for (const molecule of molecules) {
93
+ // Use the ID from the molecule if it exists (it was generated by the processor)
94
+ // or generate a new one if strictly necessary.
95
+ // The processor should be the source of truth, but the original code overrode it.
96
+ // Let's respect the processor's ID to keep the object consistent.
97
+ const id = molecule.id || `mol_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
98
+ const timestamp = molecule.timestamp;
99
+ const hash = crypto.createHash('sha256').update(molecule.content).digest('hex');
100
+ // Prepare molecule atom
101
+ allAtomsToInsert.push({
102
+ id,
103
+ timestamp,
104
+ content: molecule.content,
105
+ source_path: source,
106
+ source_id: source,
107
+ sequence: 0,
108
+ type: type || 'semantic_molecule',
109
+ hash,
110
+ buckets: buckets,
111
+ tags: [...tags, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))],
112
+ epochs: [],
113
+ provenance: molecule.provenance,
114
+ simhash: "0",
115
+ embedding: ZERO_VECTOR_STR
116
+ });
117
+ // Prepare atomic entities
118
+ for (const entity of molecule.containedEntities) {
119
+ const entityHash = crypto.createHash('sha256').update(entity).digest('hex').substring(0, 16);
120
+ const atomId = `atom_${id}_${entityHash}`;
121
+ const atomHash = crypto.createHash('sha256').update(entity).digest('hex');
122
+ // Truncate entity tag
123
+ const entityTagRaw = `entity:${entity.toLowerCase()}`;
124
+ const entityTag = entityTagRaw.length > 255 ? entityTagRaw.substring(0, 255) : entityTagRaw;
125
+ allAtomsToInsert.push({
126
+ id: atomId,
127
+ timestamp,
128
+ content: entity,
129
+ source_path: `${source}_entities`,
130
+ source_id: id,
131
+ sequence: 0,
132
+ type: 'atomic_entity',
133
+ hash: atomHash,
134
+ buckets: [...buckets, 'entities'],
135
+ tags: [entityTag, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))],
136
+ epochs: [],
137
+ provenance: 'internal',
138
+ simhash: "0",
139
+ embedding: ZERO_VECTOR_STR
140
+ });
141
+ }
142
+ }
143
+ // Database Transaction
144
+ await db.run('BEGIN');
145
+ try {
146
+ // Bulk Insert Atoms
147
+ if (allAtomsToInsert.length > 0) {
148
+ // Deduplicate by ID
149
+ const uniqueAtomsMap = new Map();
150
+ for (const atom of allAtomsToInsert) {
151
+ if (!uniqueAtomsMap.has(atom.id)) {
152
+ uniqueAtomsMap.set(atom.id, atom);
153
+ }
154
+ }
155
+ const uniqueAtoms = Array.from(uniqueAtomsMap.values());
156
+ const ATOM_BATCH_SIZE = 100; // Smaller batch size to be safe
157
+ for (let i = 0; i < uniqueAtoms.length; i += ATOM_BATCH_SIZE) {
158
+ const batch = uniqueAtoms.slice(i, i + ATOM_BATCH_SIZE);
159
+ const atomValues = [];
160
+ const atomPlaceholders = [];
161
+ let pIdx = 1;
162
+ for (const atom of batch) {
163
+ atomPlaceholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2}, $${pIdx + 3}, $${pIdx + 4}, $${pIdx + 5}, $${pIdx + 6}, $${pIdx + 7}, $${pIdx + 8}, $${pIdx + 9}, $${pIdx + 10}, $${pIdx + 11}, $${pIdx + 12}, $${pIdx + 13})`);
164
+ atomValues.push(atom.id, atom.timestamp, atom.content, atom.source_path, atom.source_id, atom.sequence, atom.type, atom.hash, atom.buckets, atom.tags, atom.epochs, atom.provenance, atom.simhash, atom.embedding);
165
+ pIdx += 14;
166
+ }
167
+ const atomQuery = `
168
+ INSERT INTO atoms (id, timestamp, content, source_path, source_id, sequence, type, hash, buckets, tags, epochs, provenance, simhash, embedding)
169
+ VALUES ${atomPlaceholders.join(', ')}
170
+ ON CONFLICT (id) DO UPDATE SET
171
+ content = EXCLUDED.content,
172
+ timestamp = EXCLUDED.timestamp,
173
+ source_path = EXCLUDED.source_path,
174
+ source_id = EXCLUDED.source_id,
175
+ sequence = EXCLUDED.sequence,
176
+ type = EXCLUDED.type,
177
+ hash = EXCLUDED.hash,
178
+ buckets = EXCLUDED.buckets,
179
+ tags = EXCLUDED.tags,
180
+ epochs = EXCLUDED.epochs,
181
+ provenance = EXCLUDED.provenance,
182
+ simhash = EXCLUDED.simhash,
183
+ embedding = EXCLUDED.embedding
184
+ `;
185
+ await db.run(atomQuery, atomValues);
186
+ }
187
+ }
188
+ // Bulk Insert Tags
189
+ const allTagEntries = [];
190
+ const tagEntrySet = new Set();
191
+ for (const atom of allAtomsToInsert) {
192
+ for (const bucket of atom.buckets) {
193
+ for (const tag of atom.tags) {
194
+ if (!tag || tag.length > 255)
195
+ continue;
196
+ const entryKey = `${atom.id}-${tag}-${bucket}`;
197
+ if (!tagEntrySet.has(entryKey)) {
198
+ tagEntrySet.add(entryKey);
199
+ allTagEntries.push({ atomId: atom.id, tag, bucket });
200
+ }
201
+ }
202
+ }
203
+ }
204
+ if (allTagEntries.length > 0) {
205
+ const TAG_BATCH_SIZE = 500;
206
+ for (let i = 0; i < allTagEntries.length; i += TAG_BATCH_SIZE) {
207
+ const batch = allTagEntries.slice(i, i + TAG_BATCH_SIZE);
208
+ const tagValues = [];
209
+ const tagPlaceholders = [];
210
+ let pIdx = 1;
211
+ for (const entry of batch) {
212
+ tagPlaceholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2})`);
213
+ tagValues.push(entry.atomId, entry.tag, entry.bucket);
214
+ pIdx += 3;
215
+ }
216
+ const tagQuery = `
217
+ INSERT INTO tags (atom_id, tag, bucket)
218
+ VALUES ${tagPlaceholders.join(', ')}
219
+ ON CONFLICT (atom_id, tag, bucket) DO NOTHING
220
+ `;
221
+ await db.run(tagQuery, tagValues);
222
+ }
223
+ }
224
+ await db.run('COMMIT');
225
+ timer.logTotalAndReset(`Saved batch of ${molecules.length} molecules`);
226
+ return {
227
+ status: 'success',
228
+ message: `Saved ${molecules.length} molecules with ${molecules.reduce((sum, m) => sum + m.containedEntities.length, 0)} entities`
229
+ };
230
+ }
231
+ catch (error) {
232
+ console.error('[IngestionService] Database transaction error:', error);
233
+ await db.run('ROLLBACK');
234
+ throw error;
235
+ }
236
+ }
237
+ /**
238
+ * Split content into semantic molecules (text chunks)
239
+ * This replaces the old atomizer logic
240
+ */
241
+ splitIntoMolecules(content) {
242
+ // Split by paragraphs or sentences, preserving semantic meaning
243
+ // This is a simplified version - could be enhanced with more sophisticated NLP
244
+ // First, try to split by paragraphs
245
+ const paragraphs = content.split(/\n\s*\n/).filter(p => p.trim().length > 0);
246
+ // If paragraphs are too long, split further by sentences
247
+ const chunks = [];
248
+ for (const paragraph of paragraphs) {
249
+ if (paragraph.length <= 500) { // Max length for a semantic molecule
250
+ chunks.push(paragraph.trim());
251
+ }
252
+ else {
253
+ // Split long paragraphs into sentences
254
+ const sentences = this.splitIntoSentences(paragraph);
255
+ let currentChunk = '';
256
+ for (const sentence of sentences) {
257
+ if ((currentChunk + ' ' + sentence).length > 500) {
258
+ if (currentChunk) {
259
+ chunks.push(currentChunk.trim());
260
+ }
261
+ currentChunk = sentence;
262
+ }
263
+ else {
264
+ currentChunk += (currentChunk ? ' ' : '') + sentence;
265
+ }
266
+ }
267
+ if (currentChunk) {
268
+ chunks.push(currentChunk.trim());
269
+ }
270
+ }
271
+ }
272
+ return chunks.filter(chunk => chunk.length > 10); // Filter out very short chunks
273
+ }
274
+ /**
275
+ * Split text into sentences
276
+ */
277
+ splitIntoSentences(text) {
278
+ // Simple sentence splitting - could be enhanced with NLP
279
+ return text
280
+ .split(/(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s+/g)
281
+ .map(s => s.trim())
282
+ .filter(s => s.length > 0);
283
+ }
284
+ /**
285
+ * Process a single text chunk into a semantic molecule
286
+ */
287
+ async processSingleChunk(content, source, timestamp = Date.now()) {
288
+ return await this.moleculeProcessor.processTextChunk(content, source, timestamp);
289
+ }
290
+ /**
291
+ * Ingest large content by automatically chunking it into smaller pieces
292
+ * HEAVILY OPTIMIZED: Process all chunks in parallel with maximum concurrency and use single bulk database operation
293
+ */
294
+ async ingestLargeContent(content, source, type = 'text', bucket = 'default', buckets = [], tags = []) {
295
+ const allBuckets = bucket ? [...buckets, bucket] : buckets;
296
+ const chunkSize = 100 * 1024; // Reduced to 100KB to prevent memory issues with PGlite while maintaining reasonable performance
297
+ const overlapSize = 1 * 1024; // Reduced overlap to 1KB to minimize redundancy
298
+ const chunks = [];
299
+ let start = 0;
300
+ while (start < content.length) {
301
+ let end = start + chunkSize;
302
+ // If we're near the end, just take the remainder
303
+ if (end >= content.length) {
304
+ end = content.length;
305
+ }
306
+ else {
307
+ // Try to find a good break point (sentence or paragraph boundary)
308
+ let breakPoint = end;
309
+ const searchWindow = content.substring(end, Math.min(end + 5000, content.length));
310
+ // Look for a good break point
311
+ const paragraphBreak = searchWindow.lastIndexOf('\n\n');
312
+ const sentenceBreak = searchWindow.lastIndexOf('. ');
313
+ const newlineBreak = searchWindow.lastIndexOf('\n');
314
+ // Choose the closest appropriate break point
315
+ if (paragraphBreak !== -1) {
316
+ breakPoint = end + paragraphBreak + 2; // +2 for \n\n
317
+ }
318
+ else if (sentenceBreak !== -1) {
319
+ breakPoint = end + sentenceBreak + 2; // +2 for '. '
320
+ }
321
+ else if (newlineBreak !== -1) {
322
+ breakPoint = end + newlineBreak + 1; // +1 for '\n'
323
+ }
324
+ else {
325
+ // If no good break point found, just break at chunkSize
326
+ breakPoint = end;
327
+ }
328
+ // Ensure we don't go beyond the content length
329
+ breakPoint = Math.min(breakPoint, content.length);
330
+ // If the break point is too close to start, just break at chunkSize
331
+ if (breakPoint - start < chunkSize * 0.5) {
332
+ breakPoint = Math.min(start + chunkSize, content.length);
333
+ }
334
+ end = breakPoint;
335
+ }
336
+ // Add overlap from previous chunk if not the first chunk
337
+ const overlapStart = start > 0 ? Math.max(0, start - overlapSize) : start;
338
+ const chunk = content.substring(overlapStart, end);
339
+ chunks.push(chunk);
340
+ start = end;
341
+ }
342
+ console.log(`[IngestionService] Split large content (${content.length} chars) into ${chunks.length} chunks of ~${Math.round(chunkSize / 1024)}KB each`);
343
+ // STREAMING BATCH IMPLEMENTATION
344
+ // We process chunks in groups (Strides) to avoid OOM and CPU starvation
345
+ // Since NLP is CPU-bound, parallel processing of batches doesn't help throughput and only hurts RAM/GC.
346
+ // Process 1 chunk (100KB) at a time to ensure maximum stability and lowest memory footprint.
347
+ const BATCH_SIZE = 1; // Reduced from 50 to 1 for serial processing of large chunks
348
+ let totalMolecules = 0;
349
+ let totalEntities = 0;
350
+ console.log(`[IngestionService] Split large content (${content.length} chars) into ${chunks.length} chunks. Processing in batches of ${BATCH_SIZE}...`);
351
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
352
+ const batchChunks = chunks.slice(i, i + BATCH_SIZE);
353
+ console.log(`[IngestionService] Processing batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(chunks.length / BATCH_SIZE)} (${batchChunks.length} chunks)...`);
354
+ // 1. Process text chunks into molecules (Parallel within the batch)
355
+ const batchPromptResults = await Promise.all(batchChunks.map(async (chunk, batchIndex) => {
356
+ const globalIndex = i + batchIndex;
357
+ const chunkSource = `${source}_chunk_${globalIndex + 1}_of_${chunks.length}`;
358
+ const textChunks = this.splitIntoMolecules(chunk);
359
+ const chunksWithMetadata = textChunks.map((textChunk, idx) => ({
360
+ content: textChunk,
361
+ source: `${chunkSource}_molecule_${idx}`,
362
+ timestamp: Date.now() + globalIndex * 1000 + idx,
363
+ provenance: 'external'
364
+ }));
365
+ return await this.moleculeProcessor.processTextChunks(chunksWithMetadata);
366
+ }));
367
+ // Flatten the batch results
368
+ const batchMolecules = batchPromptResults.flat();
369
+ if (batchMolecules.length > 0) {
370
+ // 2. Save this batch immediately to releasing memory
371
+ await this.saveMoleculesBatched([batchMolecules], source, type, allBuckets, tags);
372
+ totalMolecules += batchMolecules.length;
373
+ totalEntities += batchMolecules.reduce((sum, m) => sum + m.containedEntities.length, 0);
374
+ // Optional: Hint at GC (not available in standard JS, but ensuring scope clear helps)
375
+ }
376
+ }
377
+ return {
378
+ status: 'success',
379
+ id: `multi_chunk_${Date.now()}`,
380
+ message: `Processed large content in ${chunks.length} chunks (streaming), ingested ${totalMolecules} semantic molecules with ${totalEntities} atomic entities`
381
+ };
382
+ }
383
+ /**
384
+ * Internal method to ingest a single chunk without length validation
385
+ * Optimized for Big O performance using Batched Transactions
386
+ */
387
+ async ingestSingleChunk(content, source, type = 'text', bucket = 'default', buckets = [], tags = []) {
388
+ // This method bypasses the length validation to avoid recursion
389
+ try {
390
+ // Handle legacy single-bucket param
391
+ const allBuckets = bucket ? [...buckets, bucket] : buckets;
392
+ // Split content into text chunks (molecules)
393
+ const textChunks = this.splitIntoMolecules(content);
394
+ // Process each chunk into semantic molecules - OPTIMIZED FOR PARALLEL PROCESSING
395
+ const chunksWithMetadata = textChunks.map((chunk, index) => ({
396
+ content: chunk,
397
+ source: `${source}_chunk_${index}`,
398
+ timestamp: Date.now() + index, // Slightly offset timestamps
399
+ provenance: 'external'
400
+ }));
401
+ // Process chunks in parallel to reduce serial processing time
402
+ const semanticMolecules = await Promise.all(chunksWithMetadata.map(chunk => this.moleculeProcessor.processTextChunk(chunk.content, chunk.source, chunk.timestamp, chunk.provenance)));
403
+ // Batched Ingestion Logic
404
+ // Use Map for deduplication (Fixes "ON CONFLICT... cannot affect row a second time")
405
+ const atomsToInsert = new Map();
406
+ const tagsToInsert = [];
407
+ const edgesToInsert = []; // For variant relationships
408
+ // Optimize: Reuse zero vector string to save RAM
409
+ const ZERO_VECTOR_STR = JSON.stringify(new Array(768).fill(0.1));
410
+ for (const molecule of semanticMolecules) {
411
+ const id = `mol_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`;
412
+ const timestamp = molecule.timestamp;
413
+ const hash = crypto.createHash('sha256').update(molecule.content).digest('hex');
414
+ // Prepare Payload (always happens regardless of vector processing)
415
+ const atomType = type || 'semantic_molecule';
416
+ const embeddingStr = ZERO_VECTOR_STR; // Use pre-computed zero vector string
417
+ atomsToInsert.set(id, {
418
+ id,
419
+ timestamp,
420
+ content: molecule.content,
421
+ source_path: source,
422
+ source_id: source,
423
+ sequence: 0,
424
+ type: atomType,
425
+ hash,
426
+ buckets: allBuckets,
427
+ tags: [...tags, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))],
428
+ epochs: [],
429
+ provenance: molecule.provenance,
430
+ simhash: "0",
431
+ embedding: embeddingStr,
432
+ vector_id: null // No vector ID when not using vectors
433
+ });
434
+ // Prepare Tags for Molecule
435
+ tagsToInsert.push({
436
+ atomId: id,
437
+ tags: [...tags, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))],
438
+ buckets: allBuckets
439
+ });
440
+ // Also store the atomic entities separately if needed
441
+ for (const entity of molecule.containedEntities) {
442
+ // Fix for index size limit: Hash the entity for the ID
443
+ const entityHash = crypto.createHash('sha256').update(entity).digest('hex').substring(0, 16);
444
+ const atomId = `atom_${id}_${entityHash}`;
445
+ const atomHash = crypto.createHash('sha256').update(entity).digest('hex');
446
+ // Truncate entity tag
447
+ const entityTagRaw = `entity:${entity.toLowerCase()}`;
448
+ const entityTag = entityTagRaw.length > 255 ? entityTagRaw.substring(0, 255) : entityTagRaw;
449
+ const entityTags = [entityTag, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))];
450
+ const entityBuckets = [...allBuckets, 'entities'];
451
+ // Prepare Payload for Entity
452
+ // DEDUP CHECK: If this entity already exists in the map (from another sentence), ignore duplicate push
453
+ if (!atomsToInsert.has(atomId)) {
454
+ atomsToInsert.set(atomId, {
455
+ id: atomId,
456
+ timestamp,
457
+ content: entity,
458
+ source_path: `${source}_entities`,
459
+ source_id: id,
460
+ sequence: 0,
461
+ type: 'atomic_entity',
462
+ hash: atomHash,
463
+ buckets: entityBuckets,
464
+ tags: entityTags,
465
+ epochs: [],
466
+ provenance: 'internal',
467
+ simhash: "0",
468
+ embedding: ZERO_VECTOR_STR, // Use shared zero vector string
469
+ vector_id: null
470
+ });
471
+ // Prepare Tags for Entity
472
+ tagsToInsert.push({
473
+ atomId: atomId,
474
+ tags: entityTags,
475
+ buckets: entityBuckets
476
+ });
477
+ }
478
+ }
479
+ }
480
+ // Execute Batch Transaction
481
+ if (atomsToInsert.size > 0) {
482
+ await db.run('BEGIN');
483
+ try {
484
+ // 1. Bulk Insert Atoms (Optimized batch size)
485
+ const atomList = Array.from(atomsToInsert.values());
486
+ const ATOM_BATCH_SIZE = 500; // Increased batch size for better performance
487
+ for (let i = 0; i < atomList.length; i += ATOM_BATCH_SIZE) {
488
+ const batch = atomList.slice(i, i + ATOM_BATCH_SIZE);
489
+ const atomValues = [];
490
+ const atomPlaceholders = [];
491
+ let pIdx = 1;
492
+ for (const atom of batch) {
493
+ atomPlaceholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2}, $${pIdx + 3}, $${pIdx + 4}, $${pIdx + 5}, $${pIdx + 6}, $${pIdx + 7}, $${pIdx + 8}, $${pIdx + 9}, $${pIdx + 10}, $${pIdx + 11}, $${pIdx + 12}, $${pIdx + 13})`);
494
+ atomValues.push(atom.id, atom.timestamp, atom.content, atom.source_path, atom.source_id, atom.sequence, atom.type, atom.hash, atom.buckets, atom.tags, atom.epochs, atom.provenance, atom.simhash, atom.embedding);
495
+ pIdx += 14;
496
+ }
497
+ const atomQuery = `
498
+ INSERT INTO atoms (id, timestamp, content, source_path, source_id, sequence, type, hash, buckets, tags, epochs, provenance, simhash, embedding)
499
+ VALUES ${atomPlaceholders.join(', ')}
500
+ ON CONFLICT (id) DO UPDATE SET
501
+ content = EXCLUDED.content,
502
+ timestamp = EXCLUDED.timestamp,
503
+ source_path = EXCLUDED.source_path,
504
+ source_id = EXCLUDED.source_id,
505
+ sequence = EXCLUDED.sequence,
506
+ type = EXCLUDED.type,
507
+ hash = EXCLUDED.hash,
508
+ buckets = EXCLUDED.buckets,
509
+ tags = EXCLUDED.tags,
510
+ epochs = EXCLUDED.epochs,
511
+ provenance = EXCLUDED.provenance,
512
+ simhash = EXCLUDED.simhash,
513
+ embedding = EXCLUDED.embedding
514
+ `;
515
+ await db.run(atomQuery, atomValues);
516
+ }
517
+ // 2. Bulk Insert Tags (Optimized batch size)
518
+ const allTagEntries = [];
519
+ for (const item of tagsToInsert) {
520
+ for (const bucket of item.buckets) {
521
+ for (const tag of item.tags) {
522
+ if (!tag || tag.length > 255)
523
+ continue;
524
+ allTagEntries.push({ atomId: item.atomId, tag, bucket });
525
+ }
526
+ }
527
+ }
528
+ const TAG_BATCH_SIZE = 1000; // Increased batch size for better performance
529
+ for (let i = 0; i < allTagEntries.length; i += TAG_BATCH_SIZE) {
530
+ const batch = allTagEntries.slice(i, i + TAG_BATCH_SIZE);
531
+ const batchValues = [];
532
+ const placeholders = [];
533
+ let pIdx = 1;
534
+ for (const entry of batch) {
535
+ placeholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2})`);
536
+ batchValues.push(entry.atomId, entry.tag, entry.bucket);
537
+ pIdx += 3;
538
+ }
539
+ if (batchValues.length > 0) {
540
+ const tagQuery = `
541
+ INSERT INTO tags (atom_id, tag, bucket)
542
+ VALUES ${placeholders.join(', ')}
543
+ ON CONFLICT (atom_id, tag, bucket) DO NOTHING
544
+ `;
545
+ await db.run(tagQuery, batchValues);
546
+ }
547
+ }
548
+ // 3. Bulk Insert Edges (Sub-batched)
549
+ if (edgesToInsert.length > 0) {
550
+ const EDGE_BATCH_SIZE = 100; // Increased batch size for better performance
551
+ for (let i = 0; i < edgesToInsert.length; i += EDGE_BATCH_SIZE) {
552
+ const batch = edgesToInsert.slice(i, i + EDGE_BATCH_SIZE);
553
+ const batchValues = [];
554
+ const placeholders = [];
555
+ let pIdx = 1;
556
+ for (const edge of batch) {
557
+ placeholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2}, $${pIdx + 3})`);
558
+ batchValues.push(edge.source, edge.target, edge.relation, edge.weight);
559
+ pIdx += 4;
560
+ }
561
+ const edgeQuery = `
562
+ INSERT INTO edges (source_id, target_id, relation, weight)
563
+ VALUES ${placeholders.join(', ')}
564
+ ON CONFLICT (source_id, target_id, relation) DO NOTHING
565
+ `;
566
+ await db.run(edgeQuery, batchValues);
567
+ }
568
+ }
569
+ await db.run('COMMIT');
570
+ }
571
+ catch (error) {
572
+ await db.run('ROLLBACK');
573
+ throw error;
574
+ }
575
+ }
576
+ return {
577
+ status: 'success',
578
+ id: semanticMolecules[0]?.id || 'unknown',
579
+ message: `Ingested ${semanticMolecules.length} semantic molecules with ${semanticMolecules.reduce((sum, mol) => sum + mol.containedEntities.length, 0)} atomic entities`
580
+ };
581
+ }
582
+ catch (e) {
583
+ console.error('[SemanticIngestionService] Single Chunk Ingest Error:', e);
584
+ return { status: 'error', id: 'unknown', message: e.message };
585
+ }
586
+ }
587
+ /**
588
+ * Index tags in the separate tags table for efficient retrieval/filtering
589
+ */
590
+ async indexTags(atomId, tags, buckets) {
591
+ if (!tags.length || !buckets.length)
592
+ return;
593
+ // Use a simple Set to deduplicate quickly
594
+ const uniqueEntries = new Set();
595
+ const values = [];
596
+ const placeholders = [];
597
+ let i = 1;
598
+ for (const bucket of buckets) {
599
+ for (const tag of tags) {
600
+ if (!tag)
601
+ continue;
602
+ if (tag.length > 255)
603
+ continue; // Skip tags that are too long for the index
604
+ const key = `${atomId}:${tag}:${bucket}`;
605
+ if (uniqueEntries.has(key))
606
+ continue;
607
+ uniqueEntries.add(key);
608
+ placeholders.push(`($${i}, $${i + 1}, $${i + 2})`);
609
+ values.push(atomId, tag, bucket);
610
+ i += 3;
611
+ }
612
+ }
613
+ if (values.length === 0)
614
+ return;
615
+ try {
616
+ await db.run(`INSERT INTO tags (atom_id, tag, bucket) VALUES ${placeholders.join(', ')}
617
+ ON CONFLICT (atom_id, tag, bucket) DO NOTHING`, values);
618
+ }
619
+ catch (e) {
620
+ // Warn but don't fail ingestion
621
+ console.warn(`[SemanticIngestionService] Failed to index tags`, e);
622
+ }
623
+ }
624
+ }
625
+ //# sourceMappingURL=semantic-ingestion-service.js.map