@rbalchii/anchor-engine 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. package/LICENSE +609 -0
  2. package/README.md +317 -0
  3. package/anchor.bat +5 -0
  4. package/docs/API.md +314 -0
  5. package/docs/DEPLOYMENT.md +448 -0
  6. package/docs/INDEX.md +226 -0
  7. package/docs/STAR_Whitepaper_Executive.md +216 -0
  8. package/docs/TROUBLESHOOTING.md +535 -0
  9. package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
  10. package/docs/archive/adoption-guide.md +264 -0
  11. package/docs/archive/adoption-preparation.md +179 -0
  12. package/docs/archive/agent-harness-integration.md +227 -0
  13. package/docs/archive/api-reference.md +106 -0
  14. package/docs/archive/api_flows_diagram.md +118 -0
  15. package/docs/archive/architecture.md +410 -0
  16. package/docs/archive/architecture_diagram.md +174 -0
  17. package/docs/archive/broader-adoption-preparation.md +175 -0
  18. package/docs/archive/browser-paradigm-architecture.md +163 -0
  19. package/docs/archive/chat-integration.md +124 -0
  20. package/docs/archive/community-adoption-materials.md +103 -0
  21. package/docs/archive/community-adoption.md +147 -0
  22. package/docs/archive/comparison-with-siloed-solutions.md +192 -0
  23. package/docs/archive/comprehensive-docs.md +156 -0
  24. package/docs/archive/data_flow_diagram.md +251 -0
  25. package/docs/archive/enhancement-implementation-summary.md +146 -0
  26. package/docs/archive/evolution-summary.md +141 -0
  27. package/docs/archive/ingestion_pipeline_diagram.md +198 -0
  28. package/docs/archive/native-module-profiling-results.md +135 -0
  29. package/docs/archive/positioning-document.md +158 -0
  30. package/docs/archive/positioning.md +175 -0
  31. package/docs/archive/query-builder-documentation.md +218 -0
  32. package/docs/archive/quick-reference.md +40 -0
  33. package/docs/archive/quickstart.md +63 -0
  34. package/docs/archive/relationship-narrative-discovery.md +141 -0
  35. package/docs/archive/search-logic-improvement-plan.md +336 -0
  36. package/docs/archive/search_architecture_diagram.md +212 -0
  37. package/docs/archive/semantic-architecture-guide.md +97 -0
  38. package/docs/archive/sequence-diagrams.md +128 -0
  39. package/docs/archive/system_components_diagram.md +296 -0
  40. package/docs/archive/test-framework-integration.md +109 -0
  41. package/docs/archive/testing-framework-documentation.md +397 -0
  42. package/docs/archive/testing-framework-summary.md +121 -0
  43. package/docs/archive/testing-framework.md +377 -0
  44. package/docs/archive/ui-architecture.md +75 -0
  45. package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
  46. package/docs/arxiv/RELATED_WORK.tex +39 -0
  47. package/docs/arxiv/compile.bat +48 -0
  48. package/docs/arxiv/joss_response.md +33 -0
  49. package/docs/arxiv/prepare-submission.bat +46 -0
  50. package/docs/arxiv/review.md +128 -0
  51. package/docs/arxiv/star-whitepaper.tex +657 -0
  52. package/docs/code-patterns.md +289 -0
  53. package/docs/whitepaper.md +445 -0
  54. package/engine/dist/agent/runtime.d.ts +41 -0
  55. package/engine/dist/agent/runtime.d.ts.map +1 -0
  56. package/engine/dist/agent/runtime.js +73 -0
  57. package/engine/dist/agent/runtime.js.map +1 -0
  58. package/engine/dist/commands/audit-tags.d.ts +14 -0
  59. package/engine/dist/commands/audit-tags.d.ts.map +1 -0
  60. package/engine/dist/commands/audit-tags.js +180 -0
  61. package/engine/dist/commands/audit-tags.js.map +1 -0
  62. package/engine/dist/commands/distill.d.ts +19 -0
  63. package/engine/dist/commands/distill.d.ts.map +1 -0
  64. package/engine/dist/commands/distill.js +114 -0
  65. package/engine/dist/commands/distill.js.map +1 -0
  66. package/engine/dist/commands/generate-synonyms.d.ts +14 -0
  67. package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
  68. package/engine/dist/commands/generate-synonyms.js +91 -0
  69. package/engine/dist/commands/generate-synonyms.js.map +1 -0
  70. package/engine/dist/config/index.d.ts +115 -0
  71. package/engine/dist/config/index.d.ts.map +1 -0
  72. package/engine/dist/config/index.js +326 -0
  73. package/engine/dist/config/index.js.map +1 -0
  74. package/engine/dist/config/max-recall-config.d.ts +102 -0
  75. package/engine/dist/config/max-recall-config.d.ts.map +1 -0
  76. package/engine/dist/config/max-recall-config.js +102 -0
  77. package/engine/dist/config/max-recall-config.js.map +1 -0
  78. package/engine/dist/config/paths.d.ts +40 -0
  79. package/engine/dist/config/paths.d.ts.map +1 -0
  80. package/engine/dist/config/paths.js +49 -0
  81. package/engine/dist/config/paths.js.map +1 -0
  82. package/engine/dist/core/batch.d.ts +19 -0
  83. package/engine/dist/core/batch.d.ts.map +1 -0
  84. package/engine/dist/core/batch.js +37 -0
  85. package/engine/dist/core/batch.js.map +1 -0
  86. package/engine/dist/core/db.d.ts +58 -0
  87. package/engine/dist/core/db.d.ts.map +1 -0
  88. package/engine/dist/core/db.js +563 -0
  89. package/engine/dist/core/db.js.map +1 -0
  90. package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
  91. package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
  92. package/engine/dist/core/inference/ChatWorker.js +28 -0
  93. package/engine/dist/core/inference/ChatWorker.js.map +1 -0
  94. package/engine/dist/core/inference/context_manager.d.ts +49 -0
  95. package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
  96. package/engine/dist/core/inference/context_manager.js +199 -0
  97. package/engine/dist/core/inference/context_manager.js.map +1 -0
  98. package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
  99. package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
  100. package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
  101. package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
  102. package/engine/dist/core/vector.d.ts +40 -0
  103. package/engine/dist/core/vector.d.ts.map +1 -0
  104. package/engine/dist/core/vector.js +167 -0
  105. package/engine/dist/core/vector.js.map +1 -0
  106. package/engine/dist/index.d.ts +4 -0
  107. package/engine/dist/index.d.ts.map +1 -0
  108. package/engine/dist/index.js +400 -0
  109. package/engine/dist/index.js.map +1 -0
  110. package/engine/dist/middleware/auth.d.ts +14 -0
  111. package/engine/dist/middleware/auth.d.ts.map +1 -0
  112. package/engine/dist/middleware/auth.js +44 -0
  113. package/engine/dist/middleware/auth.js.map +1 -0
  114. package/engine/dist/middleware/request-tracing.d.ts +29 -0
  115. package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
  116. package/engine/dist/middleware/request-tracing.js +115 -0
  117. package/engine/dist/middleware/request-tracing.js.map +1 -0
  118. package/engine/dist/middleware/validate.d.ts +30 -0
  119. package/engine/dist/middleware/validate.d.ts.map +1 -0
  120. package/engine/dist/middleware/validate.js +117 -0
  121. package/engine/dist/middleware/validate.js.map +1 -0
  122. package/engine/dist/native/index.d.ts +106 -0
  123. package/engine/dist/native/index.d.ts.map +1 -0
  124. package/engine/dist/native/index.js +230 -0
  125. package/engine/dist/native/index.js.map +1 -0
  126. package/engine/dist/native/types.d.ts +45 -0
  127. package/engine/dist/native/types.d.ts.map +1 -0
  128. package/engine/dist/native/types.js +6 -0
  129. package/engine/dist/native/types.js.map +1 -0
  130. package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
  131. package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
  132. package/engine/dist/profiling/atomization-profiling.js +108 -0
  133. package/engine/dist/profiling/atomization-profiling.js.map +1 -0
  134. package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
  135. package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
  136. package/engine/dist/profiling/bottleneck-identification.js +249 -0
  137. package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
  138. package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
  139. package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
  140. package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
  141. package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
  142. package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
  143. package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
  144. package/engine/dist/profiling/simhash-profiling.js +168 -0
  145. package/engine/dist/profiling/simhash-profiling.js.map +1 -0
  146. package/engine/dist/routes/api.d.ts +9 -0
  147. package/engine/dist/routes/api.d.ts.map +1 -0
  148. package/engine/dist/routes/api.js +37 -0
  149. package/engine/dist/routes/api.js.map +1 -0
  150. package/engine/dist/routes/enhanced-api.d.ts +9 -0
  151. package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
  152. package/engine/dist/routes/enhanced-api.js +139 -0
  153. package/engine/dist/routes/enhanced-api.js.map +1 -0
  154. package/engine/dist/routes/health.d.ts +8 -0
  155. package/engine/dist/routes/health.d.ts.map +1 -0
  156. package/engine/dist/routes/health.js +89 -0
  157. package/engine/dist/routes/health.js.map +1 -0
  158. package/engine/dist/routes/monitoring.d.ts +8 -0
  159. package/engine/dist/routes/monitoring.d.ts.map +1 -0
  160. package/engine/dist/routes/monitoring.js +509 -0
  161. package/engine/dist/routes/monitoring.js.map +1 -0
  162. package/engine/dist/routes/v1/admin.d.ts +3 -0
  163. package/engine/dist/routes/v1/admin.d.ts.map +1 -0
  164. package/engine/dist/routes/v1/admin.js +261 -0
  165. package/engine/dist/routes/v1/admin.js.map +1 -0
  166. package/engine/dist/routes/v1/atoms.d.ts +3 -0
  167. package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
  168. package/engine/dist/routes/v1/atoms.js +172 -0
  169. package/engine/dist/routes/v1/atoms.js.map +1 -0
  170. package/engine/dist/routes/v1/backup.d.ts +3 -0
  171. package/engine/dist/routes/v1/backup.d.ts.map +1 -0
  172. package/engine/dist/routes/v1/backup.js +100 -0
  173. package/engine/dist/routes/v1/backup.js.map +1 -0
  174. package/engine/dist/routes/v1/git.d.ts +3 -0
  175. package/engine/dist/routes/v1/git.d.ts.map +1 -0
  176. package/engine/dist/routes/v1/git.js +316 -0
  177. package/engine/dist/routes/v1/git.js.map +1 -0
  178. package/engine/dist/routes/v1/ingest.d.ts +3 -0
  179. package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
  180. package/engine/dist/routes/v1/ingest.js +66 -0
  181. package/engine/dist/routes/v1/ingest.js.map +1 -0
  182. package/engine/dist/routes/v1/memory.d.ts +14 -0
  183. package/engine/dist/routes/v1/memory.d.ts.map +1 -0
  184. package/engine/dist/routes/v1/memory.js +87 -0
  185. package/engine/dist/routes/v1/memory.js.map +1 -0
  186. package/engine/dist/routes/v1/research.d.ts +3 -0
  187. package/engine/dist/routes/v1/research.d.ts.map +1 -0
  188. package/engine/dist/routes/v1/research.js +109 -0
  189. package/engine/dist/routes/v1/research.js.map +1 -0
  190. package/engine/dist/routes/v1/search.d.ts +3 -0
  191. package/engine/dist/routes/v1/search.d.ts.map +1 -0
  192. package/engine/dist/routes/v1/search.js +180 -0
  193. package/engine/dist/routes/v1/search.js.map +1 -0
  194. package/engine/dist/routes/v1/settings.d.ts +8 -0
  195. package/engine/dist/routes/v1/settings.d.ts.map +1 -0
  196. package/engine/dist/routes/v1/settings.js +211 -0
  197. package/engine/dist/routes/v1/settings.js.map +1 -0
  198. package/engine/dist/routes/v1/system.d.ts +3 -0
  199. package/engine/dist/routes/v1/system.d.ts.map +1 -0
  200. package/engine/dist/routes/v1/system.js +326 -0
  201. package/engine/dist/routes/v1/system.js.map +1 -0
  202. package/engine/dist/routes/v1/tags.d.ts +3 -0
  203. package/engine/dist/routes/v1/tags.d.ts.map +1 -0
  204. package/engine/dist/routes/v1/tags.js +102 -0
  205. package/engine/dist/routes/v1/tags.js.map +1 -0
  206. package/engine/dist/server-8080.d.ts +2 -0
  207. package/engine/dist/server-8080.d.ts.map +1 -0
  208. package/engine/dist/server-8080.js +74 -0
  209. package/engine/dist/server-8080.js.map +1 -0
  210. package/engine/dist/services/backup/backup-restore.d.ts +37 -0
  211. package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
  212. package/engine/dist/services/backup/backup-restore.js +385 -0
  213. package/engine/dist/services/backup/backup-restore.js.map +1 -0
  214. package/engine/dist/services/backup/backup.d.ts +14 -0
  215. package/engine/dist/services/backup/backup.d.ts.map +1 -0
  216. package/engine/dist/services/backup/backup.js +442 -0
  217. package/engine/dist/services/backup/backup.js.map +1 -0
  218. package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
  219. package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
  220. package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
  221. package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
  222. package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
  223. package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
  224. package/engine/dist/services/distillation/radial-distiller.js +394 -0
  225. package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
  226. package/engine/dist/services/health-check-enhanced.d.ts +89 -0
  227. package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
  228. package/engine/dist/services/health-check-enhanced.js +417 -0
  229. package/engine/dist/services/health-check-enhanced.js.map +1 -0
  230. package/engine/dist/services/idle-manager.d.ts +56 -0
  231. package/engine/dist/services/idle-manager.d.ts.map +1 -0
  232. package/engine/dist/services/idle-manager.js +210 -0
  233. package/engine/dist/services/idle-manager.js.map +1 -0
  234. package/engine/dist/services/inference/inference-service.d.ts +27 -0
  235. package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
  236. package/engine/dist/services/inference/inference-service.js +89 -0
  237. package/engine/dist/services/inference/inference-service.js.map +1 -0
  238. package/engine/dist/services/inference/inference.d.ts +59 -0
  239. package/engine/dist/services/inference/inference.d.ts.map +1 -0
  240. package/engine/dist/services/inference/inference.js +131 -0
  241. package/engine/dist/services/inference/inference.js.map +1 -0
  242. package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
  243. package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
  244. package/engine/dist/services/ingest/atomizer-service.js +982 -0
  245. package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
  246. package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
  247. package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
  248. package/engine/dist/services/ingest/content-cleaner.js +166 -0
  249. package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
  250. package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
  251. package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
  252. package/engine/dist/services/ingest/github-ingest-service.js +537 -0
  253. package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
  254. package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
  255. package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
  256. package/engine/dist/services/ingest/ingest-atomic.js +437 -0
  257. package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
  258. package/engine/dist/services/ingest/ingest.d.ts +50 -0
  259. package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
  260. package/engine/dist/services/ingest/ingest.js +230 -0
  261. package/engine/dist/services/ingest/ingest.js.map +1 -0
  262. package/engine/dist/services/ingest/watchdog.d.ts +31 -0
  263. package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
  264. package/engine/dist/services/ingest/watchdog.js +400 -0
  265. package/engine/dist/services/ingest/watchdog.js.map +1 -0
  266. package/engine/dist/services/llm/context.d.ts +6 -0
  267. package/engine/dist/services/llm/context.d.ts.map +1 -0
  268. package/engine/dist/services/llm/context.js +80 -0
  269. package/engine/dist/services/llm/context.js.map +1 -0
  270. package/engine/dist/services/llm/provider.d.ts +23 -0
  271. package/engine/dist/services/llm/provider.d.ts.map +1 -0
  272. package/engine/dist/services/llm/provider.js +338 -0
  273. package/engine/dist/services/llm/provider.js.map +1 -0
  274. package/engine/dist/services/llm/reader.d.ts +12 -0
  275. package/engine/dist/services/llm/reader.d.ts.map +1 -0
  276. package/engine/dist/services/llm/reader.js +40 -0
  277. package/engine/dist/services/llm/reader.js.map +1 -0
  278. package/engine/dist/services/mirror/mirror.d.ts +28 -0
  279. package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
  280. package/engine/dist/services/mirror/mirror.js +208 -0
  281. package/engine/dist/services/mirror/mirror.js.map +1 -0
  282. package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
  283. package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
  284. package/engine/dist/services/nlp/nlp-service.js +151 -0
  285. package/engine/dist/services/nlp/nlp-service.js.map +1 -0
  286. package/engine/dist/services/nlp/query-parser.d.ts +9 -0
  287. package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
  288. package/engine/dist/services/nlp/query-parser.js +29 -0
  289. package/engine/dist/services/nlp/query-parser.js.map +1 -0
  290. package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
  291. package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
  292. package/engine/dist/services/query-builder/DataFrame.js +263 -0
  293. package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
  294. package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
  295. package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
  296. package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
  297. package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
  298. package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
  299. package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
  300. package/engine/dist/services/query-builder/utils/export.js +130 -0
  301. package/engine/dist/services/query-builder/utils/export.js.map +1 -0
  302. package/engine/dist/services/research/researcher.d.ts +15 -0
  303. package/engine/dist/services/research/researcher.d.ts.map +1 -0
  304. package/engine/dist/services/research/researcher.js +123 -0
  305. package/engine/dist/services/research/researcher.js.map +1 -0
  306. package/engine/dist/services/scribe/scribe.d.ts +43 -0
  307. package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
  308. package/engine/dist/services/scribe/scribe.js +135 -0
  309. package/engine/dist/services/scribe/scribe.js.map +1 -0
  310. package/engine/dist/services/search/bright-nodes.d.ts +41 -0
  311. package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
  312. package/engine/dist/services/search/bright-nodes.js +117 -0
  313. package/engine/dist/services/search/bright-nodes.js.map +1 -0
  314. package/engine/dist/services/search/context-inflator.d.ts +63 -0
  315. package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
  316. package/engine/dist/services/search/context-inflator.js +649 -0
  317. package/engine/dist/services/search/context-inflator.js.map +1 -0
  318. package/engine/dist/services/search/context-manager.d.ts +34 -0
  319. package/engine/dist/services/search/context-manager.d.ts.map +1 -0
  320. package/engine/dist/services/search/context-manager.js +124 -0
  321. package/engine/dist/services/search/context-manager.js.map +1 -0
  322. package/engine/dist/services/search/distributed-query.d.ts +38 -0
  323. package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
  324. package/engine/dist/services/search/distributed-query.js +105 -0
  325. package/engine/dist/services/search/distributed-query.js.map +1 -0
  326. package/engine/dist/services/search/explore.d.ts +73 -0
  327. package/engine/dist/services/search/explore.d.ts.map +1 -0
  328. package/engine/dist/services/search/explore.js +388 -0
  329. package/engine/dist/services/search/explore.js.map +1 -0
  330. package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
  331. package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
  332. package/engine/dist/services/search/graph-context-serializer.js +435 -0
  333. package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
  334. package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
  335. package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
  336. package/engine/dist/services/search/llm-context-formatter.js +394 -0
  337. package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
  338. package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
  339. package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
  340. package/engine/dist/services/search/physics-tag-walker.js +611 -0
  341. package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
  342. package/engine/dist/services/search/query-parser.d.ts +66 -0
  343. package/engine/dist/services/search/query-parser.d.ts.map +1 -0
  344. package/engine/dist/services/search/query-parser.js +346 -0
  345. package/engine/dist/services/search/query-parser.js.map +1 -0
  346. package/engine/dist/services/search/search-utils.d.ts +100 -0
  347. package/engine/dist/services/search/search-utils.d.ts.map +1 -0
  348. package/engine/dist/services/search/search-utils.js +473 -0
  349. package/engine/dist/services/search/search-utils.js.map +1 -0
  350. package/engine/dist/services/search/search.d.ts +116 -0
  351. package/engine/dist/services/search/search.d.ts.map +1 -0
  352. package/engine/dist/services/search/search.js +1286 -0
  353. package/engine/dist/services/search/search.js.map +1 -0
  354. package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
  355. package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
  356. package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
  357. package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
  358. package/engine/dist/services/search/streaming-search.d.ts +51 -0
  359. package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
  360. package/engine/dist/services/search/streaming-search.js +94 -0
  361. package/engine/dist/services/search/streaming-search.js.map +1 -0
  362. package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
  363. package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
  364. package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
  365. package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
  366. package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
  367. package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
  368. package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
  369. package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
  370. package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
  371. package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
  372. package/engine/dist/services/semantic/semantic-search.js +649 -0
  373. package/engine/dist/services/semantic/semantic-search.js.map +1 -0
  374. package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
  375. package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
  376. package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
  377. package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
  378. package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
  379. package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
  380. package/engine/dist/services/semantic/types/semantic.js +7 -0
  381. package/engine/dist/services/semantic/types/semantic.js.map +1 -0
  382. package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
  383. package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
  384. package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
  385. package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
  386. package/engine/dist/services/system-status.d.ts +68 -0
  387. package/engine/dist/services/system-status.d.ts.map +1 -0
  388. package/engine/dist/services/system-status.js +107 -0
  389. package/engine/dist/services/system-status.js.map +1 -0
  390. package/engine/dist/services/tags/discovery.d.ts +16 -0
  391. package/engine/dist/services/tags/discovery.d.ts.map +1 -0
  392. package/engine/dist/services/tags/discovery.js +206 -0
  393. package/engine/dist/services/tags/discovery.js.map +1 -0
  394. package/engine/dist/services/tags/gliner.d.ts +18 -0
  395. package/engine/dist/services/tags/gliner.d.ts.map +1 -0
  396. package/engine/dist/services/tags/gliner.js +119 -0
  397. package/engine/dist/services/tags/gliner.js.map +1 -0
  398. package/engine/dist/services/tags/infector.d.ts +21 -0
  399. package/engine/dist/services/tags/infector.d.ts.map +1 -0
  400. package/engine/dist/services/tags/infector.js +168 -0
  401. package/engine/dist/services/tags/infector.js.map +1 -0
  402. package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
  403. package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
  404. package/engine/dist/services/tags/tag-auditor.js +283 -0
  405. package/engine/dist/services/tags/tag-auditor.js.map +1 -0
  406. package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
  407. package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
  408. package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
  409. package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
  410. package/engine/dist/services/vision/vision_service.d.ts +4 -0
  411. package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
  412. package/engine/dist/services/vision/vision_service.js +197 -0
  413. package/engine/dist/services/vision/vision_service.js.map +1 -0
  414. package/engine/dist/test-framework/core.d.ts +133 -0
  415. package/engine/dist/test-framework/core.d.ts.map +1 -0
  416. package/engine/dist/test-framework/core.js +313 -0
  417. package/engine/dist/test-framework/core.js.map +1 -0
  418. package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
  419. package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
  420. package/engine/dist/test-framework/dataset-runner.js +223 -0
  421. package/engine/dist/test-framework/dataset-runner.js.map +1 -0
  422. package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
  423. package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
  424. package/engine/dist/test-framework/diagnostic-tests.js +283 -0
  425. package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
  426. package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
  427. package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
  428. package/engine/dist/test-framework/performance-regression-tests.js +331 -0
  429. package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
  430. package/engine/dist/types/api.d.ts +53 -0
  431. package/engine/dist/types/api.d.ts.map +1 -0
  432. package/engine/dist/types/api.js +2 -0
  433. package/engine/dist/types/api.js.map +1 -0
  434. package/engine/dist/types/atomic.d.ts +42 -0
  435. package/engine/dist/types/atomic.d.ts.map +1 -0
  436. package/engine/dist/types/atomic.js +10 -0
  437. package/engine/dist/types/atomic.js.map +1 -0
  438. package/engine/dist/types/context-protocol.d.ts +137 -0
  439. package/engine/dist/types/context-protocol.d.ts.map +1 -0
  440. package/engine/dist/types/context-protocol.js +28 -0
  441. package/engine/dist/types/context-protocol.js.map +1 -0
  442. package/engine/dist/types/context.d.ts +2 -0
  443. package/engine/dist/types/context.d.ts.map +1 -0
  444. package/engine/dist/types/context.js +2 -0
  445. package/engine/dist/types/context.js.map +1 -0
  446. package/engine/dist/types/index.d.ts +20 -0
  447. package/engine/dist/types/index.d.ts.map +1 -0
  448. package/engine/dist/types/index.js +18 -0
  449. package/engine/dist/types/index.js.map +1 -0
  450. package/engine/dist/types/search.d.ts +31 -0
  451. package/engine/dist/types/search.d.ts.map +1 -0
  452. package/engine/dist/types/search.js +2 -0
  453. package/engine/dist/types/search.js.map +1 -0
  454. package/engine/dist/types/taxonomy.d.ts +137 -0
  455. package/engine/dist/types/taxonomy.d.ts.map +1 -0
  456. package/engine/dist/types/taxonomy.js +138 -0
  457. package/engine/dist/types/taxonomy.js.map +1 -0
  458. package/engine/dist/types/taxonomy.simple.d.ts +131 -0
  459. package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
  460. package/engine/dist/types/taxonomy.simple.js +132 -0
  461. package/engine/dist/types/taxonomy.simple.js.map +1 -0
  462. package/engine/dist/types/tool-call.d.ts +16 -0
  463. package/engine/dist/types/tool-call.d.ts.map +1 -0
  464. package/engine/dist/types/tool-call.js +6 -0
  465. package/engine/dist/types/tool-call.js.map +1 -0
  466. package/engine/dist/types/trace.d.ts +25 -0
  467. package/engine/dist/types/trace.d.ts.map +1 -0
  468. package/engine/dist/types/trace.js +5 -0
  469. package/engine/dist/types/trace.js.map +1 -0
  470. package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
  471. package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
  472. package/engine/dist/utils/adaptive-concurrency.js +266 -0
  473. package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
  474. package/engine/dist/utils/date_extractor.d.ts +2 -0
  475. package/engine/dist/utils/date_extractor.d.ts.map +1 -0
  476. package/engine/dist/utils/date_extractor.js +32 -0
  477. package/engine/dist/utils/date_extractor.js.map +1 -0
  478. package/engine/dist/utils/native-module-manager.d.ts +48 -0
  479. package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
  480. package/engine/dist/utils/native-module-manager.js +265 -0
  481. package/engine/dist/utils/native-module-manager.js.map +1 -0
  482. package/engine/dist/utils/native-module-profiler.d.ts +66 -0
  483. package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
  484. package/engine/dist/utils/native-module-profiler.js +182 -0
  485. package/engine/dist/utils/native-module-profiler.js.map +1 -0
  486. package/engine/dist/utils/path-manager.d.ts +59 -0
  487. package/engine/dist/utils/path-manager.d.ts.map +1 -0
  488. package/engine/dist/utils/path-manager.js +154 -0
  489. package/engine/dist/utils/path-manager.js.map +1 -0
  490. package/engine/dist/utils/performance-monitor.d.ts +92 -0
  491. package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
  492. package/engine/dist/utils/performance-monitor.js +221 -0
  493. package/engine/dist/utils/performance-monitor.js.map +1 -0
  494. package/engine/dist/utils/process-manager.d.ts +18 -0
  495. package/engine/dist/utils/process-manager.d.ts.map +1 -0
  496. package/engine/dist/utils/process-manager.js +100 -0
  497. package/engine/dist/utils/process-manager.js.map +1 -0
  498. package/engine/dist/utils/request-tracer.d.ts +131 -0
  499. package/engine/dist/utils/request-tracer.d.ts.map +1 -0
  500. package/engine/dist/utils/request-tracer.js +414 -0
  501. package/engine/dist/utils/request-tracer.js.map +1 -0
  502. package/engine/dist/utils/resource-manager.d.ts +108 -0
  503. package/engine/dist/utils/resource-manager.d.ts.map +1 -0
  504. package/engine/dist/utils/resource-manager.js +235 -0
  505. package/engine/dist/utils/resource-manager.js.map +1 -0
  506. package/engine/dist/utils/safe-dns.d.ts +14 -0
  507. package/engine/dist/utils/safe-dns.d.ts.map +1 -0
  508. package/engine/dist/utils/safe-dns.js +105 -0
  509. package/engine/dist/utils/safe-dns.js.map +1 -0
  510. package/engine/dist/utils/structured-logger.d.ts +124 -0
  511. package/engine/dist/utils/structured-logger.d.ts.map +1 -0
  512. package/engine/dist/utils/structured-logger.js +332 -0
  513. package/engine/dist/utils/structured-logger.js.map +1 -0
  514. package/engine/dist/utils/tag-cleanup.d.ts +11 -0
  515. package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
  516. package/engine/dist/utils/tag-cleanup.js +111 -0
  517. package/engine/dist/utils/tag-cleanup.js.map +1 -0
  518. package/engine/dist/utils/tag-filter.d.ts +19 -0
  519. package/engine/dist/utils/tag-filter.d.ts.map +1 -0
  520. package/engine/dist/utils/tag-filter.js +147 -0
  521. package/engine/dist/utils/tag-filter.js.map +1 -0
  522. package/engine/dist/utils/tag-modulation.d.ts +80 -0
  523. package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
  524. package/engine/dist/utils/tag-modulation.js +284 -0
  525. package/engine/dist/utils/tag-modulation.js.map +1 -0
  526. package/engine/dist/utils/timer.d.ts +40 -0
  527. package/engine/dist/utils/timer.d.ts.map +1 -0
  528. package/engine/dist/utils/timer.js +76 -0
  529. package/engine/dist/utils/timer.js.map +1 -0
  530. package/engine/dist/utils/token-utils.d.ts +19 -0
  531. package/engine/dist/utils/token-utils.d.ts.map +1 -0
  532. package/engine/dist/utils/token-utils.js +71 -0
  533. package/engine/dist/utils/token-utils.js.map +1 -0
  534. package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
  535. package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
  536. package/engine/dist/utils/wasm-module-loader.js +136 -0
  537. package/engine/dist/utils/wasm-module-loader.js.map +1 -0
  538. package/engine/package.json +105 -0
  539. package/package.json +106 -0
@@ -0,0 +1,657 @@
1
+ \documentclass[11pt]{article}
2
+
3
+ % Basic packages
4
+ \usepackage[utf8]{inputenc}
5
+ \usepackage[T1]{fontenc}
6
+ \usepackage{amsmath,amssymb,amsfonts}
7
+ \usepackage{graphicx}
8
+ \usepackage{hyperref}
9
+ \usepackage{booktabs}
10
+ \usepackage{geometry}
11
+ \usepackage{listings}
12
+ \usepackage{xcolor}
13
+ \usepackage{caption}
14
+ \usepackage{subcaption}
15
+ \usepackage{multirow}
16
+ \usepackage{array}
17
+ \usepackage[numbers]{natbib} % For citations
18
+
19
+ % Page layout
20
+ \geometry{letterpaper, margin=1in}
21
+
22
+ % Code listing style
23
+ \definecolor{codegreen}{rgb}{0,0.6,0}
24
+ \definecolor{codegray}{rgb}{0.5,0.5,0.5}
25
+ \definecolor{codepurple}{rgb}{0.58,0,0.82}
26
+ \definecolor{backcolour}{rgb}{0.95,0.95,0.92}
27
+
28
+ \lstdefinestyle{mystyle}{
29
+ backgroundcolor=\color{backcolour},
30
+ commentstyle=\color{codegreen},
31
+ keywordstyle=\color{magenta},
32
+ numberstyle=\tiny\color{codegray},
33
+ stringstyle=\color{codepurple},
34
+ basicstyle=\ttfamily\footnotesize,
35
+ breakatwhitespace=false,
36
+ breaklines=true,
37
+ captionpos=b,
38
+ keepspaces=true,
39
+ numbers=left,
40
+ numbersep=5pt,
41
+ showspaces=false,
42
+ showstringspaces=false,
43
+ showtabs=false,
44
+ tabsize=2
45
+ }
46
+
47
+ \lstset{style=mystyle}
48
+
49
+ % Hyperref setup
50
+ \hypersetup{
51
+ colorlinks=true,
52
+ linkcolor=blue,
53
+ filecolor=magenta,
54
+ urlcolor=cyan,
55
+ pdftitle={STAR: Semantic Temporal Associative Retrieval},
56
+ pdfauthor={R.S. Balch II},
57
+ pdfsubject={Information Retrieval, Graph-Based Search, Local-First AI},
58
+ pdfkeywords={Information Retrieval, Graph-Based Search, SimHash, Local-First AI, Explainable AI, PGlite}
59
+ }
60
+
61
+ % Title information
62
+ \title{STAR: Semantic Temporal Associative Retrieval\\
63
+ \large The Browser Paradigm for AI Memory}
64
+
65
+ \author{
66
+ R.S. Balch II\\
67
+ \texttt{rsbalchii@gmail.com}\\
68
+ \url{https://github.com/RSBalchII/anchor-engine-node}
69
+ }
70
+
71
+ \date{\today}
72
+
73
+ \begin{document}
74
+
75
+ \maketitle
76
+
77
+ \begin{abstract}
78
+ AI memory is broken. To achieve serious context retrieval, practitioners need server racks, GPU budgets, and cloud subscriptions. Intelligence is locked in black boxes---massive vector indices consuming gigabytes of RAM and tying users to proprietary systems.
79
+
80
+ This paper presents \textbf{STAR} (Semantic Temporal Associative Retrieval), a novel retrieval algorithm implementing the ``Browser Paradigm'' for AI memory. Like a browser rendering websites by loading only necessary shards, STAR enables any device from \$200 laptops to supercomputers to navigate massive context by retrieving only atoms required for the current query.
81
+
82
+ We present the mathematical foundation, implementation details, and production benchmarks from real workloads: 91MB chat history ingested in under 3 minutes, 280,000 molecules indexed, zero data loss. STAR achieves $O(k \cdot \bar{d})$ retrieval complexity where $k$ = query tags and $\bar{d}$ = average tag degree, compared to $O(n \log n)$ for dense vector ANN.
83
+
84
+ The future of AI memory isn't bigger silos---it's universal, sharded utility running on hardware you already own.
85
+ \end{abstract}
86
+
87
+ \section{Introduction}
88
+ \label{sec:introduction}
89
+
90
+ Web browsers are universal. The same website renders identically on a \$300 Chromebook and a \$5000 MacBook Pro because browsers download only necessary shards (HTML, CSS, JS) for the current view---not the entire internet.
91
+
92
+ AI memory should operate similarly. Current Retrieval-Augmented Generation (RAG) systems require loading complete HNSW indices into RAM---gigabytes of vector data---before searching. This restricts deployment to high-spec servers, creating artificial scarcity.
93
+
94
+ \subsection{Contributions}
95
+
96
+ This paper makes the following contributions:
97
+
98
+ \begin{enumerate}
99
+ \item \textbf{STAR Algorithm}: Physics-based graph traversal with temporal decay and SimHash fingerprinting
100
+ \item \textbf{Browser Paradigm}: Sharded atomization enabling 4GB RAM laptops to navigate 10TB+ datasets
101
+ \item \textbf{Production Benchmarks}: Real-world performance on 100MB dataset (280K molecules, 151K atoms)
102
+ \item \textbf{SQL-Native Implementation}: Unified Field Equation executed in PGlite in $\sim$10ms
103
+ \end{enumerate}
104
+
105
+ \section{Mathematical Foundation}
106
+ \label{sec:math}
107
+
108
+ \subsection{Bipartite Graph Formalization}
109
+
110
+ Let $G = (A, T, E)$ be a bipartite graph where:
111
+
112
+ \begin{itemize}
113
+ \item \textbf{$A = \{a_1, a_2, \ldots, a_n\}$}: Set of \textit{Atoms} (text/code/data chunks with byte-offset pointers)
114
+ \item \textbf{$T = \{t_1, t_2, \ldots, t_m\}$}: Set of \textit{Tags} (extracted semantic entities/concepts)
115
+ \item \textbf{$E \subseteq A \times T$}: Sparse edges where $|E| \ll |A| \times |T|$
116
+ \end{itemize}
117
+
118
+ Our bipartite structure draws inspiration from PageRank's graph model \cite{brin1998anatomy}, adapted for personal knowledge graphs with explicit tag-based provenance.
119
+
120
+ Each atom $a_i \in A$ has:
121
+ \begin{itemize}
122
+ \item \textbf{Content pointer}: $(source_i, start_i, end_i)$ --- file path and byte offsets
123
+ \item \textbf{Tag set}: $T(a_i) = \{t \in T : (a_i, t) \in E\}$
124
+ \item \textbf{Timestamp}: $\tau_i \in \mathbb{R}^+$ (Unix epoch)
125
+ \item \textbf{SimHash fingerprint}: $h_i \in \{0,1\}^{64}$
126
+ \end{itemize}
127
+
128
+ \subsection{The Unified Field Equation}
129
+
130
+ For query $q$ with tag set $T(q)$ and candidate atom $a$, the \textbf{gravity score} is:
131
+
132
+ \begin{equation}
133
+ \label{eq:unified_field}
134
+ W(q, a) = \underbrace{\left(\sum_{t \in T(q) \cap T(a)} 1\right) \cdot \gamma^{d(q,a)}}_{\text{Semantic Gravity}} \times \underbrace{e^{-\lambda \Delta t}}_{\text{Temporal Decay}} \times \underbrace{\left(1 - \frac{H(h_q, h_a)}{64}\right)}_{\text{Structural Gravity}}
135
+ \end{equation}
136
+
137
+ Where:
138
+
139
+ \begin{table}[h]
140
+ \centering
141
+ \caption{Unified Field Equation Parameters}
142
+ \label{tab:parameters}
143
+ \begin{tabular}{@{}lll@{}}
144
+ \toprule
145
+ \textbf{Symbol} & \textbf{Meaning} & \textbf{Default} \\ \midrule
146
+ $\gamma$ & Damping factor (controls walk viscosity) & 0.85 \\
147
+ $\lambda$ & Decay constant (half-life $\approx$ 7.9 years) & 0.00001 h$^{-1}$ \\
148
+ $d(q,a)$ & Graph hop distance (0 = direct, 1 = 1-hop) & $\in \{0,1,2,3\}$ \\
149
+ $\Delta t$ & Time difference $|\tau_q - \tau_a|$ in hours & --- \\
150
+ $H(\cdot,\cdot)$ & Hamming distance on 64-bit SimHash & 0--63 \\
151
+ $h_q, h_a$ & SimHash fingerprints of query and atom & $\{0,1\}^{64}$ \\ \bottomrule
152
+ \end{tabular}
153
+ \end{table}
154
+
155
+ \paragraph{Component Breakdown:}
156
+
157
+ \begin{enumerate}
158
+ \item \textbf{Semantic Gravity}: $|T(q) \cap T(a)| \cdot \gamma^{d(q,a)}$
159
+ \begin{itemize}
160
+ \item Shared tag count weighted by graph distance
161
+ \item Exponential decay with hop distance (damping)
162
+ \item Graph distance $d(q,a)$ is the minimum number of hops from any anchor atom (direct match to the query) to candidate $a$.
163
+ \end{itemize}
164
+
165
+ \item \textbf{Temporal Decay}: $e^{-\lambda \Delta t}$
166
+ \begin{itemize}
167
+ \item Recent memories exert stronger gravitational pull
168
+ \item Half-life: $t_{1/2} = \ln(2)/\lambda \approx 69,314$ hours $\approx$ 7.9 years
169
+ \item The small decay constant ($\lambda = 0.00001$ h$^{-1}$) ensures memories remain accessible for years, suitable for personal knowledge graphs
170
+ \end{itemize}
171
+
172
+ \item \textbf{Structural Gravity}: $1 - \frac{H(h_q, h_a)}{64}$
173
+ \begin{itemize}
174
+ \item SimHash proximity (1 = identical, 0.5 = uncorrelated, 0 = completely different)
175
+ \item Hamming distance normalized to [0,1]; lower distance = higher similarity
176
+ \item Enables O(1) deduplication via 64-bit fingerprinting \cite{charikar2002similar}
177
+ \end{itemize}
178
+ \end{enumerate}
179
+
180
+ \subsection{SQL-Native Implementation}
181
+
182
+ The Unified Field Equation executes as a single SQL operation in PGlite:
183
+
184
+ \begin{lstlisting}[language=SQL, caption={SQL Implementation of Unified Field Equation}]
185
+ WITH anchor_stats AS (
186
+ SELECT id, timestamp, simhash
187
+ FROM atoms WHERE id IN ($1::text[])
188
+ ),
189
+ candidates AS (
190
+ SELECT t.atom_id, a.timestamp, a.simhash,
191
+ COUNT(DISTINCT t.tag) as shared_tags
192
+ FROM tags t
193
+ JOIN atoms a ON t.atom_id = a.id
194
+ WHERE t.tag IN (SELECT DISTINCT tag FROM tags
195
+ WHERE atom_id IN (SELECT id FROM anchor_stats))
196
+ GROUP BY t.atom_id, a.timestamp, a.simhash
197
+ )
198
+ SELECT atom_id,
199
+ MAX(
200
+ GREATEST(0.0, LEAST(1.0,
201
+ ((shared_tags / 10.0) * 0.85) *
202
+ EXP(-0.00001 * ABS(timestamp - anchor_ts) / 3600000.0) *
203
+ (1.0 - (bit_count(('x' || LPAD(simhash, 16, '0'))::bit(64)
204
+ # ('x' || LPAD(anchor_sh, 16, '0'))::bit(64)) / 64.0))
205
+ ))
206
+ ) as gravity_score
207
+ FROM candidates
208
+ CROSS JOIN anchor_stats
209
+ GROUP BY atom_id
210
+ HAVING gravity_score > 0.1
211
+ ORDER BY gravity_score DESC
212
+ LIMIT 200;
213
+ \end{lstlisting}
214
+
215
+ \paragraph{Implementation Notes:}
216
+ \begin{itemize}
217
+ \item \textbf{Normalization}: The $shared\_tags / 10.0$ term normalizes tag counts, assuming $\sim$10 shared tags maximum for typical queries; the final gravity score is clamped to $[0,1]$ via \texttt{GREATEST}/\texttt{LEAST}.
218
+ \item \textbf{Damping}: The $POWER(0.85, hop)$ factor applies per-hop decay; multi-hop results decay exponentially (hop 1: 85\%, hop 2: 72\%, hop 3: 61\%)
219
+ \item \textbf{Recursive Tag‑Walker}: The recursive CTE computes semantic overlap between successive atoms (not directly with the query) because the anchor atoms' tags already capture the query intent; this allows discovery of indirect associations while preserving semantic coherence.
220
+ \item \textbf{Hop Tracking}: Recursive CTE tracks graph distance from anchors for proper damping application
221
+ \item \textbf{Physical Bonus}: Production implementations may add proximity-based bonuses for co-located atoms
222
+ \item \textbf{Bitwise Operations}: SimHash distance uses XOR (\texttt{\#}) and \texttt{bit\_count} for O(1) computation
223
+ \end{itemize}
224
+
225
+ \paragraph{Performance Characteristics:}
226
+ \begin{itemize}
227
+ \item Sparse matrix multiplication via \texttt{JOIN} operations
228
+ \item Bitwise XOR and \texttt{bit\_count} for SimHash distance
229
+ \item Zero transport overhead (only weighted results returned)
230
+ \item \textbf{Latency}: $\sim$10ms for 1M+ atoms on consumer hardware
231
+ \end{itemize}
232
+
233
+ \section{Related Work}
234
+ \label{sec:related}
235
+
236
+ \subsection{Vector-Based Retrieval-Augmented Generation}
237
+
238
+ Modern RAG systems predominantly rely on dense vector representations and approximate nearest neighbor (ANN) search. HNSW (Hierarchical Navigable Small World) graphs \cite{malkov2018efficient} and FAISS \cite{johnson2019billion} represent the state-of-the-art for vector retrieval, offering sub-linear query complexity. However, these approaches require loading complete indices into RAM---often gigabytes for modest corpora---restricting deployment to high-specification servers. Furthermore, vector similarity provides limited explainability: a result matches because its embedding is ``close'' to the query, but the specific reasoning remains opaque. STAR addresses these limitations through sparse graph traversal, enabling CPU-only deployment on resource-constrained devices while providing explicit tag-based provenance for every result.
239
+
240
+ \subsection{Graph-Based Memory Systems}
241
+
242
+ Recent work has explored graph structures as alternatives to dense vectors. T-Retriever \cite{wei2026tretriever} introduces tree-based hierarchical retrieval using semantic-structural entropy for encoding textual graphs. While effective for hierarchical document structures, T-Retriever does not incorporate temporal decay---a key requirement for personal memory systems where recency matters. PersonalAI \cite{menschikov2025personalai} proposes a knowledge graph framework with hyper-edges for personalized LLM agents, achieving strong results on TriviaQA and HotpotQA benchmarks. However, PersonalAI focuses on framework design rather than production implementation; STAR contributes a complete, deployed system with validated performance on 28M tokens of real-world data.
243
+
244
+ Our bipartite graph approach (Atoms $\times$ Tags) differs from general knowledge graphs by enforcing a strict separation between content and metadata. This enables O(1) deduplication via SimHash \cite{charikar2002similar} and supports disposable index architectures where the database can be rebuilt entirely from the source-of-truth filesystem.
245
+
246
+ \subsection{Personal AI Memory Systems}
247
+
248
+ The advent of large context windows has renewed interest in personal AI memory. Second Me \cite{wei2025second} proposes LLM-based memory parameterization, using language models themselves to structure and retrieve personal knowledge. While powerful, this approach requires significant computational resources and offers limited explainability. STAR achieves similar associative retrieval goals through deterministic physics-based scoring, enabling deployment on 4GB RAM laptops without GPU acceleration.
249
+
250
+ Cognitive AI frameworks \cite{salas2025cognitive} emphasize governed memory architectures for long-term coherence. STAR's ephemeral index design (Standard 110) aligns with these principles while adding practical constraints for local-first deployment: zero cloud dependencies, AGPL-3.0 licensing, and real-world validation.
251
+
252
+ \subsection{Temporal Information Retrieval}
253
+
254
+ Temporal decay has been explored in web archive search \cite{kanhabua2008surviving} and recency-weighted ranking, but is rarely integrated into RAG systems as a fundamental scoring component. STAR's Unified Field Equation (Equation~\ref{eq:unified_field}) embeds temporal decay multiplicatively alongside semantic and structural factors, ensuring that any zero factor eliminates irrelevant results. This differs from additive scoring approaches where weak signals can accumulate noise.
255
+
256
+ \subsection{Local-First and Edge Computing}
257
+
258
+ The local-first software movement \cite{haque2023local} emphasizes user data ownership and offline capability. STAR's browser paradigm extends these principles to AI memory: just as browsers render content without downloading the entire internet, STAR retrieves context without loading complete vector indices. This enables sovereign operation---users maintain complete control over their data without cloud dependencies.
259
+
260
+ \subsection{Summary of Contributions}
261
+
262
+ STAR distinguishes itself from prior work through:
263
+ \begin{enumerate}
264
+ \item \textbf{Sparse Graph Physics:} Multiplicative scoring combining co-occurrence, temporal decay, and SimHash similarity (Section \ref{sec:math}).
265
+ \item \textbf{Browser Paradigm:} Sharded atomization enabling resource-constrained devices to navigate large corpora (Section \ref{sec:architecture}).
266
+ \item \textbf{Production Validation:} Real-world deployment with 28M tokens, $<$200ms p95 latency, and 4GB RAM compatibility (Section \ref{sec:benchmarks}).
267
+ \item \textbf{Explainable Retrieval:} Tag paths provide deterministic provenance for every result (Section \ref{sec:retrieval}).
268
+ \end{enumerate}
269
+
270
+ \section{System Architecture}
271
+ \label{sec:architecture}
272
+
273
+ \subsection{Data Hierarchy}
274
+
275
+ \begin{table}[h]
276
+ \centering
277
+ \caption{Three-Tier Data Hierarchy}
278
+ \label{tab:hierarchy}
279
+ \begin{tabular}{@{}llll@{}}
280
+ \toprule
281
+ \textbf{Level} & \textbf{Role} & \textbf{Content Stored} & \textbf{Example} \\ \midrule
282
+ \textbf{Compound} & Document reference & File path + metadata & \texttt{ChatSessions.yaml} (91.88MB) \\
283
+ \textbf{Molecule} & Semantic chunk & Chunk text + byte offsets & Bytes 1024--2048 \\
284
+ \textbf{Atom} & Content unit & Byte-offset pointer + tags & Text chunk with \texttt{\#auth} tag \\
285
+ \textbf{Tag} & Concept/label & Semantic label only & \texttt{\#authentication}, \texttt{\#session} \\ \bottomrule
286
+ \end{tabular}
287
+ \end{table}
288
+
289
+ \textbf{Key Design Decision:} Content lives in \texttt{mirrored\_brain/} filesystem. Database stores pointers only (byte offsets + tags), creating a \textbf{disposable, rebuildable index}.
290
+
291
+ \subsection{The Browser Paradigm}
292
+
293
+ \begin{table}[h]
294
+ \centering
295
+ \caption{Browser Paradigm Mapping}
296
+ \label{tab:browser_paradigm}
297
+ \begin{tabular}{@{}p{0.3\linewidth}p{0.3\linewidth}p{0.3\linewidth}@{}}
298
+ \toprule
299
+ \textbf{Component} & \textbf{Browser Equivalent} & \textbf{Anchor Engine Implementation} \\ \midrule
300
+ HTML/CSS/JS shards & Web page components & Atoms (tags + byte offsets) \\
301
+ DOM tree & Document structure & Tag graph $G = (A, T, E)$ \\
302
+ Lazy loading & On-demand resource fetch & Radial inflation from disk \\
303
+ Cache & Browser cache & Ephemeral PGlite index \\ \bottomrule
304
+ \end{tabular}
305
+ \end{table}
306
+
307
+ \textbf{Universality Principle:} Just as browsers render any website on any machine by loading necessary shards, Anchor Engine navigates any dataset by loading only relevant atoms.
308
+
309
+ \subsection{Complexity Analysis}
310
+
311
+ \begin{table}[h]
312
+ \centering
313
+ \caption{Retrieval Complexity Comparison}
314
+ \label{tab:complexity}
315
+ \begin{tabular}{@{}lcccc@{}}
316
+ \toprule
317
+ \textbf{Method} & \textbf{Time} & \textbf{Space} & \textbf{Explainability} & \textbf{Hardware} \\ \midrule
318
+ \textbf{Dense Vector ANN (HNSW)} & $O(\log n)$ (query)\textsuperscript{$\ddagger$} & $O(n \cdot d)$ & Opaque & GPU preferred \\
319
+ \textbf{STAR (Sparse Graph)} & $\mathbf{O(k \cdot \bar{d})}$ & $\mathbf{O(|E|)}$ & \textbf{Native (tag paths)} & \textbf{CPU-only} \\ \bottomrule
320
+ \end{tabular}
321
+ \end{table}
322
+
323
+ \textsuperscript{$\ddagger$} $O(n \log n)$ is index construction complexity; query complexity is $O(\log n)$.
324
+
325
+ Where:
326
+ \begin{itemize}
327
+ \item $n$ = total atoms
328
+ \item $k$ = query tags (typically 5--20)
329
+ \item $\bar{d}$ = average tag degree (typically 10--100)
330
+ \item $d$ = vector dimension (typically 768--1536)
331
+ \item $|E|$ = sparse edges (typically $10 \cdot n$)
332
+ \end{itemize}
333
+
334
+ \textbf{Key Insight:} For personal knowledge graphs, $k \cdot \bar{d} \ll n$, making STAR query time $O(k\bar{d})$ potentially faster than HNSW's $O(\log n)$ query time.
335
+
336
+ \section{Retrieval Protocol: Planets and Moons}
337
+ \label{sec:retrieval}
338
+
339
+ \subsection{Phase 1 --- Anchor Discovery (Planets)}
340
+
341
+ \textbf{Goal:} High-precision seed set via direct matching.
342
+
343
+ \textbf{Strategies:}
344
+ \begin{enumerate}
345
+ \item \textbf{Full-Text Search (BM25-style)}: \texttt{to\_tsvector() @@ to\_tsquery()} in PGlite
346
+ \item \textbf{Radial Inflation}: Query \texttt{atom\_positions} table for keyword occurrences
347
+ \item \textbf{Engram Lookup:} O(1) cache for frequent entities
348
+ \end{enumerate}
349
+
350
+ \textbf{Output:} 20--200 anchor atoms with $d(q,a) = 0$
351
+
352
+ \subsection{Phase 2 --- Radial Inflation (Moons)}
353
+
354
+ \textbf{Goal:} High-recall expansion via tag-walker graph traversal.
355
+
356
+ \begin{lstlisting}[language=Python, caption={Radial Inflation Algorithm}]
357
+ def radial_inflation(anchors, radius=1, max_per_hop=50):
358
+ current_hop = anchors
359
+ all_results = set(anchors)
360
+
361
+ for hop in range(radius):
362
+ candidates = get_connected_nodes(current_hop)
363
+ weighted = apply_unified_field_equation(candidates, anchors)
364
+ top_k = select_by_gravity(weighted, max_per_hop)
365
+
366
+ all_results.update(top_k)
367
+ current_hop = top_k
368
+
369
+ return all_results
370
+ \end{lstlisting}
371
+
372
+ \paragraph{PhysicsMetadata Schema:}
373
+ \begin{lstlisting}[language=JSON, caption={PhysicsMetadata JSON Schema}]
374
+ {
375
+ "atom_id": "a7f3c2e1-4b5d-6789-abcd-ef0123456789",
376
+ "gravity_score": 0.82,
377
+ "decomposition": {
378
+ "semantic_overlap": 3,
379
+ "temporal_multiplier": 0.94,
380
+ "structural_similarity": 1.0,
381
+ "hop_distance": 1
382
+ },
383
+ "link_reason": "3 shared tags: #authentication, #session, #token",
384
+ "time_drift": "2h 14m ago",
385
+ "source_byte_range": [45210, 46890]
386
+ }
387
+ \end{lstlisting}
388
+
389
+ \subsection{Phase 3 --- Elastic Context Assembly}
390
+
391
+ \textbf{Goal:} Token-budget compliance with maximal coherence.
392
+
393
+ \paragraph{Snippets Coalescing:}
394
+ \begin{itemize}
395
+ \item Merge atoms within 500-byte proximity from same source
396
+ \item Snap to sentence boundaries for narrative flow
397
+ \item \textbf{Result:} 40--100 atoms $\to$ 8--12 coherent paragraphs (500--1000 chars each)
398
+ \end{itemize}
399
+
400
+ \paragraph{Progressive Inflation:}
401
+ \begin{itemize}
402
+ \item Top 10\% results: 2$\times$ inflation radius (1000 bytes)
403
+ \item Next 40\%: 1.5$\times$ radius (750 bytes)
404
+ \item Remaining 50\%: 1$\times$ radius (500 bytes)
405
+ \end{itemize}
406
+
407
+ \paragraph{Metadata Headers:}
408
+ \begin{verbatim}
409
+ [GROUP:1] [File:2025-07-16_to_2025-07-30.json] [Range: 0x4A20-0x4F80]
410
+ [Time: 2025-07-22T07:15:00Z] [Atoms: 5] [Chars: 847]
411
+ <atom id="abc12345" relevance="0.875" timestamp="..." persona="#work">
412
+ Full coherent paragraph content...
413
+ </atom>
414
+ \end{verbatim}
415
+
416
+ \section{Production Performance Benchmarks}
417
+ \label{sec:benchmarks}
418
+
419
+ \subsection{Dataset Characteristics (February 2026)}
420
+
421
+ \begin{table}[h]
422
+ \centering
423
+ \caption{Dataset Statistics}
424
+ \label{tab:dataset}
425
+ \begin{tabular}{@{}ll@{}}
426
+ \toprule
427
+ \textbf{Metric} & \textbf{Value} \\ \midrule
428
+ Total Files & 436 \\
429
+ Total Size & $\sim$100MB \\
430
+ Molecules & 280,000 \\
431
+ Atoms & 151,876 \\
432
+ Tags & $\sim$1,500 \\
433
+ Edges & $\sim$450,000 \\ \bottomrule
434
+ \end{tabular}
435
+ \end{table}
436
+
437
+ \subsection{Ingestion Performance}
438
+
439
+ \begin{table}[h]
440
+ \centering
441
+ \caption{Ingestion Performance by Dataset}
442
+ \label{tab:ingestion}
443
+ \begin{tabular}{@{}lrrrrr@{}}
444
+ \toprule
445
+ \textbf{Dataset} & \textbf{Size} & \textbf{Molecules} & \textbf{Atoms} & \textbf{Time} & \textbf{Throughput} \\ \midrule
446
+ \textbf{Chat Sessions} (monolith) & 91.88MB & 214,000 & 776 & 177.8s & 1,203 mol/s \\
447
+ \textbf{GitHub Archive} & 2.66MB & 36,793 & 497 & 22.4s & 1,642 mol/s \\
448
+ \textbf{Code Repository} & 0.94MB & 20,916 & 199 & 25.0s & 836 mol/s \\
449
+ \textbf{Total System} & $\sim$100MB & \textbf{280,000} & \textbf{1,500} & \textbf{$\sim$4 min} & \textbf{1,200 mol/s} \\ \bottomrule
450
+ \end{tabular}
451
+ \end{table}
452
+
453
+ \textbf{Optimization:} Monolithic files (single YAML) ingest 2$\times$ faster than hundreds of small files due to reduced I/O overhead and transaction batching.
454
+
455
+ \subsection{Search Performance}
456
+
457
+ \begin{table}[h]
458
+ \centering
459
+ \caption{Search Performance by Type}
460
+ \label{tab:search}
461
+ \begin{tabular}{@{}lcccc@{}}
462
+ \toprule
463
+ \textbf{Search Type} & \textbf{Budget} & \textbf{Results} & \textbf{Latency (p95)} & \textbf{Use Case} \\ \midrule
464
+ \textbf{Standard} (70/30) & 16k tokens & 40--100 atoms & \textbf{150ms} & Daily queries \\
465
+ \textbf{Max Recall} (3-hop) & 65k+ tokens & 200--500 atoms & \textbf{690ms} & Research \\
466
+ \textbf{Keyword} (direct FTS) & 4k tokens & 20--50 atoms & \textbf{100ms} & High precision \\ \bottomrule
467
+ \end{tabular}
468
+ \end{table}
469
+
470
+ \paragraph{Scaling Behavior (151K atoms):}
471
+ \begin{itemize}
472
+ \item Standard Search: \textbf{7.7s} (50$\times$ increase for 100$\times$ data growth)
473
+ \item Max Recall: \textbf{25--50s} (acceptable for 618k chars retrieved)
474
+ \end{itemize}
475
+
476
+ \paragraph{Trade-off Analysis:}
477
+ \begin{itemize}
478
+ \item \textbf{Vector RAG (HNSW):} Stable latency, memory-bound (4--8GB for 100MB)
479
+ \item \textbf{STAR:} Linear latency scaling, constant memory ($<$2GB)
480
+ \end{itemize}
481
+
482
+ For sovereign, local-first deployments on consumer hardware, latency scaling is acceptable.
483
+
484
+ \subsection{Memory Management}
485
+
486
+ \begin{table}[h]
487
+ \centering
488
+ \caption{Memory Usage by Phase}
489
+ \label{tab:memory}
490
+ \begin{tabular}{@{}lll@{}}
491
+ \toprule
492
+ \textbf{Phase} & \textbf{RSS Memory} & \textbf{Notes} \\ \midrule
493
+ \textbf{Peak (ingestion)} & 1,657MB & During 91MB file processing \\
494
+ \textbf{Idle (post-cleanup)} & 510MB & After 5min idle \\
495
+ \textbf{Reduction} & \textbf{-69\%} & 1,147MB saved via GC \\ \bottomrule
496
+ \end{tabular}
497
+ \end{table}
498
+
499
+ \textbf{Ephemeral Index Architecture (Standard 110):}
500
+ \begin{itemize}
501
+ \item Database wiped on shutdown
502
+ \item \texttt{mirrored\_brain/} preserved as source of truth
503
+ \item 338 files rehydrated from YAML on restart
504
+ \item Zero data loss guarantee
505
+ \end{itemize}
506
+
507
+ \section{Comparison with Vector-Based RAG}
508
+ \label{sec:comparison}
509
+
510
+ \begin{table}[h]
511
+ \centering
512
+ \caption{STAR vs. Vector RAG Comparison}
513
+ \label{tab:comparison}
514
+ \begin{tabular}{@{}p{0.45\linewidth}p{0.45\linewidth}@{}}
515
+ \toprule
516
+ \textbf{Anchor Engine (STAR)} & \textbf{Vector RAG (HNSW)} \\ \midrule
517
+ \textbf{90MB Ingestion: $\sim$178s} \checkmark 2$\times$ faster & $\sim$360s (batch) \\
518
+ \textbf{Memory Peak: $<$1.7GB} \checkmark 60--80\% less & 4--8GB \\
519
+ \textbf{Search (1.5K atoms): $\sim$150ms} \checkmark Comparable & $\sim$100ms \\
520
+ \textbf{Search (151K atoms): $\sim$7.7s} $\omega$ Linear scaling & $\sim$150ms (stable) \\
521
+ \textbf{CPU-only} \checkmark No GPU & GPU preferred \\
522
+ \textbf{Explainable (tag paths)} \checkmark & Opaque (black box) \\
523
+ \textbf{Local-first} \checkmark No cloud & Cloud-dependent \\ \bottomrule
524
+ \end{tabular}
525
+ \end{table}
526
+
527
+ \subsection{Use Case Fit}
528
+
529
+ \begin{table}[h]
530
+ \centering
531
+ \caption{Recommended Approach by Scenario}
532
+ \label{tab:use_cases}
533
+ \begin{tabular}{@{}ll@{}}
534
+ \toprule
535
+ \textbf{Scenario} & \textbf{Recommended Approach} \\ \midrule
536
+ High-throughput cloud deployment & Vector RAG (HNSW) \\
537
+ \textbf{Sovereign, local-first operation} & \textbf{STAR (Anchor Engine)} \\
538
+ \textbf{4GB RAM laptop} & \textbf{STAR} \\
539
+ \textbf{Explainable retrieval required} & \textbf{STAR} \\
540
+ GPU infrastructure available & Vector RAG \\ \bottomrule
541
+ \end{tabular}
542
+ \end{table}
543
+
544
+ \section{Economic Impact and Democratization}
545
+ \label{sec:economic}
546
+
547
+ \subsection{Breaking Down Silos}
548
+
549
+ Current AI memory landscape:
550
+ \begin{itemize}
551
+ \item \textbf{Proprietary systems}: Black boxes, artificial scarcity
552
+ \item \textbf{Cloud dependency}: Recurring costs, vendor lock-in
553
+ \item \textbf{Hardware barriers}: GPU requirements exclude most users
554
+ \end{itemize}
555
+
556
+ STAR enables:
557
+ \begin{itemize}
558
+ \item \textbf{Cognitive Sovereignty}: Users own data, context, memories
559
+ \item \textbf{Economic Efficiency}: No cloud bills, no GPU rentals
560
+ \item \textbf{Innovation Acceleration}: Open architecture (AGPL-3.0), extensible
561
+ \end{itemize}
562
+
563
+ \subsection{Public Research Foundation}
564
+
565
+ Foundational AI research was publicly funded. STAR builds on:
566
+ \begin{itemize}
567
+ \item \textbf{SimHash} (Charikar, 1997) --- Stanford University
568
+ \item \textbf{PageRank} (Brin \& Page, 1998) --- Stanford University
569
+ \item \textbf{Transformer architecture} (Vaswani et al., 2017) --- Google Brain
570
+ \end{itemize}
571
+
572
+ This is a return on public investment: tools serving individuals, not corporations.
573
+
574
+ \section{Conclusion}
575
+ \label{sec:conclusion}
576
+
577
+ STAR proves that ``Write Once, Run Everywhere'' applies to AI infrastructure. Decouple logic from data. Shard context into atoms. Implement universal distribution.
578
+
579
+ \subsection{Key Achievements}
580
+
581
+ \begin{itemize}
582
+ \item \checkmark \textbf{1,200 molecules/second} ingestion throughput
583
+ \item \checkmark \textbf{$<$200ms} search latency (p95, standard queries)
584
+ \item \checkmark \textbf{69\% memory reduction} after idle cleanup
585
+ \item \checkmark \textbf{Zero data loss} with ephemeral index architecture
586
+ \item \checkmark \textbf{151K atoms} navigable on 4GB RAM laptop
587
+ \end{itemize}
588
+
589
+ \subsection{Future Work}
590
+
591
+ \begin{enumerate}
592
+ \item \textbf{Caching Layer}: Frequent query result caching (target: 50\% latency reduction)
593
+ \item \textbf{Diffusion Models}: Graph-based reasoning over knowledge structures
594
+ \item \textbf{Mobile Applications}: iOS/Android ports via React Native
595
+ \item \textbf{Plugin Marketplace}: Community-contributed atomizers and taggers
596
+ \end{enumerate}
597
+
598
+ \subsection{Availability}
599
+
600
+ \begin{itemize}
601
+ \item \textbf{Repository}: \url{https://github.com/RSBalchII/anchor-engine-node}
602
+ \item \textbf{License}: AGPL-3.0
603
+ \item \textbf{Production Verified}: February 23, 2026
604
+ \end{itemize}
605
+
606
+ \appendix
607
+
608
+ \section{Recursive CTE for Tag-Walker}
609
+ \label{app:sql}
610
+
611
+ \begin{lstlisting}[language=SQL, caption={Recursive CTE Implementation of Tag-Walker}]
612
+ WITH RECURSIVE tag_walk AS (
613
+ -- Base case: anchor atoms
614
+ SELECT
615
+ a.id as atom_id,
616
+ a.simhash,
617
+ a.timestamp,
618
+ 0 as hop_distance,
619
+ 1.0 as gravity_score
620
+ FROM atoms a
621
+ WHERE a.id IN ($1::text[])
622
+
623
+ UNION ALL
624
+
625
+ -- Recursive case: 1-hop neighbors via shared tags
626
+ SELECT
627
+ t2.atom_id,
628
+ a2.simhash,
629
+ a2.timestamp,
630
+ tw.hop_distance + 1,
631
+ ((COUNT(DISTINCT t1.tag) / 10.0) * POWER(0.85, tw.hop_distance + 1)) *
632
+ EXP(-0.00001 * ABS(a2.timestamp - tw.timestamp) / 3600000.0) *
633
+ (1.0 - (bit_count(('x' || LPAD(a2.simhash, 16, '0'))::bit(64)
634
+ # ('x' || LPAD(tw.simhash, 16, '0'))::bit(64)) / 64.0))
635
+ FROM tag_walk tw
636
+ JOIN atoms a1 ON tw.atom_id = a1.id
637
+ JOIN tags t1 ON a1.id = t1.atom_id
638
+ JOIN tags t2 ON t1.tag = t2.tag
639
+ JOIN atoms a2 ON t2.atom_id = a2.id
640
+ WHERE tw.hop_distance < 3
641
+ AND a2.id NOT IN (SELECT atom_id FROM tag_walk)
642
+ GROUP BY t2.atom_id, a2.simhash, a2.timestamp, tw.hop_distance, tw.timestamp, tw.simhash
643
+ )
644
+ SELECT * FROM tag_walk
645
+ WHERE gravity_score > 0.1
646
+ ORDER BY gravity_score DESC
647
+ LIMIT 200;
648
+ \end{lstlisting}
649
+
650
+ % Bibliography
651
+ \bibliographystyle{plain}
652
+ \bibliography{BIBLIOGRAPHY}
653
+
654
+ % Ensure arXiv runs pdflatex 4 times for references to resolve
655
+ \typeout{get arXiv to do 4 passes: Label(s) may have changed. Rerun}
656
+
657
+ \end{document}