@rbalchii/anchor-engine 4.7.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/LICENSE +608 -608
  2. package/README.md +513 -317
  3. package/anchor.bat +5 -5
  4. package/docs/AGENT_CONTROLLED_ENGINE.md +581 -0
  5. package/docs/API.md +314 -314
  6. package/docs/DEPLOYMENT.md +448 -448
  7. package/docs/INDEX.md +226 -226
  8. package/docs/MD_FILES_INVENTORY.md +166 -0
  9. package/docs/STAR_Whitepaper_Executive.md +216 -216
  10. package/docs/TROUBLESHOOTING.md +535 -535
  11. package/docs/arxiv/BIBLIOGRAPHY.bib +145 -145
  12. package/docs/arxiv/RELATED_WORK.tex +38 -38
  13. package/docs/arxiv/compile.bat +48 -48
  14. package/docs/arxiv/joss_response.md +32 -32
  15. package/docs/arxiv/prepare-submission.bat +46 -46
  16. package/docs/arxiv/review.md +127 -127
  17. package/docs/arxiv/star-whitepaper.tex +656 -656
  18. package/docs/code-patterns.md +289 -289
  19. package/docs/daily/TODAY_SUMMARY.md +245 -0
  20. package/docs/guides/BUILDING.md +64 -0
  21. package/docs/guides/INSTALL_NPM.md +160 -0
  22. package/docs/guides/NPM_PUBLISH_SUMMARY.md +231 -0
  23. package/docs/paper.md +124 -0
  24. package/docs/project/PROJECT_STATE_ASSESSMENT.md +312 -0
  25. package/docs/reviews/code-review-v4.8.1-decision-record.md +165 -0
  26. package/docs/testing/TESTING.md +213 -0
  27. package/docs/testing/TESTING_FRAMEWORK_COMPLETE.md +271 -0
  28. package/docs/testing/search-test-report.md +76 -0
  29. package/docs/whitepaper.md +445 -445
  30. package/engine/dist/commands/distill.js +21 -21
  31. package/engine/dist/config/index.d.ts +7 -0
  32. package/engine/dist/config/index.d.ts.map +1 -1
  33. package/engine/dist/config/index.js +22 -0
  34. package/engine/dist/config/index.js.map +1 -1
  35. package/engine/dist/config/paths.d.ts +1 -1
  36. package/engine/dist/config/paths.js +3 -3
  37. package/engine/dist/config/paths.js.map +1 -1
  38. package/engine/dist/core/db.js +131 -131
  39. package/engine/dist/mcp/server.d.ts +44 -0
  40. package/engine/dist/mcp/server.d.ts.map +1 -0
  41. package/engine/dist/mcp/server.js +427 -0
  42. package/engine/dist/mcp/server.js.map +1 -0
  43. package/engine/dist/native/index.d.ts +20 -21
  44. package/engine/dist/native/index.d.ts.map +1 -1
  45. package/engine/dist/profiling/atomization-profiling.js +3 -3
  46. package/engine/dist/profiling/bottleneck-identification.js +35 -35
  47. package/engine/dist/profiling/content-sanitization-profiling.js +86 -86
  48. package/engine/dist/routes/monitoring.js +8 -8
  49. package/engine/dist/routes/v1/admin.js +8 -8
  50. package/engine/dist/routes/v1/atoms.js +15 -15
  51. package/engine/dist/routes/v1/ingest.d.ts.map +1 -1
  52. package/engine/dist/routes/v1/ingest.js +39 -0
  53. package/engine/dist/routes/v1/ingest.js.map +1 -1
  54. package/engine/dist/routes/v1/system.d.ts.map +1 -1
  55. package/engine/dist/routes/v1/system.js +305 -6
  56. package/engine/dist/routes/v1/system.js.map +1 -1
  57. package/engine/dist/routes/v1/tags.js +2 -2
  58. package/engine/dist/services/backup/backup-restore.js +23 -23
  59. package/engine/dist/services/backup/backup.js +14 -14
  60. package/engine/dist/services/distillation/radial-distiller.d.ts +1 -0
  61. package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -1
  62. package/engine/dist/services/distillation/radial-distiller.js +23 -16
  63. package/engine/dist/services/distillation/radial-distiller.js.map +1 -1
  64. package/engine/dist/services/ingest/github-ingest-service.js +18 -18
  65. package/engine/dist/services/ingest/ingest-atomic.js +79 -79
  66. package/engine/dist/services/ingest/ingest.d.ts.map +1 -1
  67. package/engine/dist/services/ingest/ingest.js +28 -25
  68. package/engine/dist/services/ingest/ingest.js.map +1 -1
  69. package/engine/dist/services/ingest/watchdog.d.ts.map +1 -1
  70. package/engine/dist/services/ingest/watchdog.js +14 -24
  71. package/engine/dist/services/ingest/watchdog.js.map +1 -1
  72. package/engine/dist/services/llm/reader.js +9 -9
  73. package/engine/dist/services/mirror/mirror.js +5 -5
  74. package/engine/dist/services/mirror/mirror.js.map +1 -1
  75. package/engine/dist/services/research/researcher.js +8 -8
  76. package/engine/dist/services/scribe/scribe.js +27 -27
  77. package/engine/dist/services/search/context-inflator.js +34 -34
  78. package/engine/dist/services/search/explore.js +20 -20
  79. package/engine/dist/services/search/physics-tag-walker.js +208 -208
  80. package/engine/dist/services/search/query-parser.js +5 -5
  81. package/engine/dist/services/search/search-utils.js +3 -3
  82. package/engine/dist/services/search/search.js +36 -36
  83. package/engine/dist/services/search/sovereign-system-prompt.js +22 -22
  84. package/engine/dist/services/semantic/semantic-ingestion-service.js +47 -47
  85. package/engine/dist/services/semantic/semantic-search.js +21 -21
  86. package/engine/dist/services/synonyms/auto-synonym-generator.js +35 -35
  87. package/engine/dist/services/system-status.d.ts +34 -0
  88. package/engine/dist/services/system-status.d.ts.map +1 -1
  89. package/engine/dist/services/system-status.js +57 -1
  90. package/engine/dist/services/system-status.js.map +1 -1
  91. package/engine/dist/services/tags/discovery.js +5 -5
  92. package/engine/dist/services/tags/infector.js +6 -6
  93. package/engine/dist/services/tags/tag-auditor.js +51 -51
  94. package/engine/dist/services/taxonomy/taxonomy-manager.js +6 -6
  95. package/engine/dist/utils/tag-cleanup.js +5 -5
  96. package/engine/dist/utils/tag-modulation.js +1 -1
  97. package/engine/dist/utils/tag-modulation.js.map +1 -1
  98. package/engine/package.json +104 -105
  99. package/mcp-server/README.md +404 -0
  100. package/mcp-server/dist/index.d.ts +16 -0
  101. package/mcp-server/dist/index.d.ts.map +1 -0
  102. package/mcp-server/dist/index.js +709 -0
  103. package/mcp-server/dist/index.js.map +1 -0
  104. package/mcp-server/package.json +34 -0
  105. package/package.json +10 -2
  106. package/docs/archive/GIT_BACKUP_VERIFICATION.md +0 -297
  107. package/docs/archive/adoption-guide.md +0 -264
  108. package/docs/archive/adoption-preparation.md +0 -179
  109. package/docs/archive/agent-harness-integration.md +0 -227
  110. package/docs/archive/api-reference.md +0 -106
  111. package/docs/archive/api_flows_diagram.md +0 -118
  112. package/docs/archive/architecture.md +0 -410
  113. package/docs/archive/architecture_diagram.md +0 -174
  114. package/docs/archive/broader-adoption-preparation.md +0 -175
  115. package/docs/archive/browser-paradigm-architecture.md +0 -163
  116. package/docs/archive/chat-integration.md +0 -124
  117. package/docs/archive/community-adoption-materials.md +0 -103
  118. package/docs/archive/community-adoption.md +0 -147
  119. package/docs/archive/comparison-with-siloed-solutions.md +0 -192
  120. package/docs/archive/comprehensive-docs.md +0 -156
  121. package/docs/archive/data_flow_diagram.md +0 -251
  122. package/docs/archive/enhancement-implementation-summary.md +0 -146
  123. package/docs/archive/evolution-summary.md +0 -141
  124. package/docs/archive/ingestion_pipeline_diagram.md +0 -198
  125. package/docs/archive/native-module-profiling-results.md +0 -135
  126. package/docs/archive/positioning-document.md +0 -158
  127. package/docs/archive/positioning.md +0 -175
  128. package/docs/archive/query-builder-documentation.md +0 -218
  129. package/docs/archive/quick-reference.md +0 -40
  130. package/docs/archive/quickstart.md +0 -63
  131. package/docs/archive/relationship-narrative-discovery.md +0 -141
  132. package/docs/archive/search-logic-improvement-plan.md +0 -336
  133. package/docs/archive/search_architecture_diagram.md +0 -212
  134. package/docs/archive/semantic-architecture-guide.md +0 -97
  135. package/docs/archive/sequence-diagrams.md +0 -128
  136. package/docs/archive/system_components_diagram.md +0 -296
  137. package/docs/archive/test-framework-integration.md +0 -109
  138. package/docs/archive/testing-framework-documentation.md +0 -397
  139. package/docs/archive/testing-framework-summary.md +0 -121
  140. package/docs/archive/testing-framework.md +0 -377
  141. package/docs/archive/ui-architecture.md +0 -75
@@ -1,145 +1,145 @@
1
- % STAR Algorithm Bibliography
2
- % Add these to your star-whitepaper.tex with \bibliography{BIBLIOGRAPHY}
3
-
4
- % Foundational Work
5
- @article{charikar2002similar,
6
- title={Similarity estimation techniques from rounding algorithms},
7
- author={Charikar, Moses S},
8
- journal={Proceedings of the thiry-fourth annual ACM symposium on Theory of computing},
9
- pages={380--388},
10
- year={2002},
11
- publisher={ACM}
12
- }
13
-
14
- @article{brin1998anatomy,
15
- title={The anatomy of a large-scale hypertextual web search engine},
16
- author={Brin, Sergey and Page, Lawrence},
17
- journal={Computer networks and ISDN systems},
18
- volume={30},
19
- number={1-7},
20
- pages={107--117},
21
- year={1998},
22
- publisher={Elsevier}
23
- }
24
-
25
- % Vector Retrieval
26
- @article{malkov2018efficient,
27
- title={Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs},
28
- author={Malkov, Yu A and Yashunin, Dmitry A},
29
- journal={IEEE transactions on pattern analysis and machine intelligence},
30
- volume={42},
31
- number={4},
32
- pages={824--836},
33
- year={2018},
34
- publisher={IEEE}
35
- }
36
-
37
- @article{johnson2019billion,
38
- title={Billion-scale similarity search with {GPUs}},
39
- author={Johnson, Jeff and Douze, Matthijs and J{\'e}gou, H{\'e}rve},
40
- journal={IEEE Transactions on Big Data},
41
- volume={7},
42
- number={3},
43
- pages={535--547},
44
- year={2019},
45
- publisher={IEEE}
46
- }
47
-
48
- % Graph-Based RAG
49
- @article{wei2026tretriever,
50
- title={{T-Retriever}: Tree-based Hierarchical Retrieval Augmented Generation for Textual Graphs},
51
- author={Wei, Chunyu and Qin, Huaiyu and He, Siyuan and Wang, Yunhai and Chen, Yueguo},
52
- journal={arXiv preprint arXiv:2601.04945},
53
- year={2026}
54
- }
55
-
56
- @article{menschikov2025personalai,
57
- title={{PersonalAI}: A Systematic Comparison of Knowledge Graph Storage and Retrieval Approaches for Personalized {LLM} agents},
58
- author={Menschikov, Mikhail and Evseev, Dmitry and Dochkina, Victoria and Kostoev, Ruslan and Perepechkin, Ilia and Anokhin, Petr and Burnaev, Evgeny and Semenov, Nikita},
59
- journal={arXiv preprint arXiv:2506.17001},
60
- year={2025}
61
- }
62
-
63
- % Personal AI Memory
64
- @article{wei2025second,
65
- title={{AI}-native Memory 2.0: Second Me},
66
- author={Wei, Jiale and Ying, Xiang and Gao, Tao and Bao, Fangyi and Tao, Felix and Shang, Jingbo},
67
- journal={arXiv preprint arXiv:2503.08102},
68
- year={2025}
69
- }
70
-
71
- @article{salas2025cognitive,
72
- title={Cognitive {AI} framework 2.0: advances in the simulation of human thought},
73
- author={Salas-Guerra, Rommel},
74
- journal={arXiv preprint arXiv:2502.04259},
75
- year={2025}
76
- }
77
-
78
- % RAG and Context Retrieval
79
- @article{lewis2020retrieval,
80
- title={Retrieval-augmented generation for knowledge-intensive {NLP} tasks},
81
- author={Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K{\"u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt{\"a}schel, Tim and others},
82
- journal={Advances in Neural Information Processing Systems},
83
- volume={33},
84
- pages={9459--9474},
85
- year={2020}
86
- }
87
-
88
- @article{guu2020realm,
89
- title={{REALM}: Retrieval-augmented language model pre-training},
90
- author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Mingwei},
91
- journal={Proceedings of the 37th International Conference on Machine Learning},
92
- pages={3929--3938},
93
- year={2020}
94
- }
95
-
96
- % Local-First and Edge Computing
97
- @article{haque2023local,
98
- title={Local-first software: You own your data, in spite of the cloud},
99
- author={Haque, Amjad and Kleppmann, Martin and Wiggins, Adam},
100
- journal={Proceedings of the 2023 ACM SIGPLAN International Symposium on New Ideas, New Paradigms, and Reflections on Programming and Software},
101
- pages={59--75},
102
- year={2023},
103
- publisher={ACM}
104
- }
105
-
106
- % Knowledge Graphs
107
- @article{hogan2021knowledge,
108
- title={Knowledge graphs},
109
- author={Hogan, Aidan and Blomqvist, Eva and Cochez, Michael and d'Amato, Claudia and de Melo, Gerard and Gutierrez, Claudio and Gayo, Jos{\'e} Emilio Labra and Kirrane, Sabrina and Neumaier, Sebastian and Polleres, Axel and others},
110
- journal={ACM Computing Surveys (CSUR)},
111
- volume={54},
112
- number={4},
113
- pages={1--37},
114
- year={2021},
115
- publisher={ACM New York, NY, USA}
116
- }
117
-
118
- % Temporal Information Retrieval
119
- @article{kanhabua2008surviving,
120
- title={Surviving the {World Wide Web}: A survey on web archive search},
121
- author={Kanhabua, Nattiya and N{\o}rv{\aa}g, Kjetil},
122
- journal={Proceedings of the 12th International Conference on Extending Database Technology: Advances in Database Technology},
123
- pages={675--676},
124
- year={2009}
125
- }
126
-
127
- % SimHash and Near-Duplicate Detection
128
- @article{sadowski2008algorithm,
129
- title={The algorithm behind the {Detect} project},
130
- author={Sadowski, Caitlin and Levin, Greg},
131
- journal={Google Inc.},
132
- year={2008}
133
- }
134
-
135
- % Sparse Retrieval
136
- @article{lin2021pretrained,
137
- title={Pretrained transformers for text ranking: {BERT} and beyond},
138
- author={Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew},
139
- journal={Synthesis Lectures on Human Language Technologies},
140
- volume={14},
141
- number={4},
142
- pages={1--325},
143
- year={2021},
144
- publisher={Morgan & Claypool Publishers}
145
- }
1
+ % STAR Algorithm Bibliography
2
+ % Add these to your star-whitepaper.tex with \bibliography{BIBLIOGRAPHY}
3
+
4
+ % Foundational Work
5
+ @article{charikar2002similar,
6
+ title={Similarity estimation techniques from rounding algorithms},
7
+ author={Charikar, Moses S},
8
+ journal={Proceedings of the thiry-fourth annual ACM symposium on Theory of computing},
9
+ pages={380--388},
10
+ year={2002},
11
+ publisher={ACM}
12
+ }
13
+
14
+ @article{brin1998anatomy,
15
+ title={The anatomy of a large-scale hypertextual web search engine},
16
+ author={Brin, Sergey and Page, Lawrence},
17
+ journal={Computer networks and ISDN systems},
18
+ volume={30},
19
+ number={1-7},
20
+ pages={107--117},
21
+ year={1998},
22
+ publisher={Elsevier}
23
+ }
24
+
25
+ % Vector Retrieval
26
+ @article{malkov2018efficient,
27
+ title={Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs},
28
+ author={Malkov, Yu A and Yashunin, Dmitry A},
29
+ journal={IEEE transactions on pattern analysis and machine intelligence},
30
+ volume={42},
31
+ number={4},
32
+ pages={824--836},
33
+ year={2018},
34
+ publisher={IEEE}
35
+ }
36
+
37
+ @article{johnson2019billion,
38
+ title={Billion-scale similarity search with {GPUs}},
39
+ author={Johnson, Jeff and Douze, Matthijs and J{\'e}gou, H{\'e}rve},
40
+ journal={IEEE Transactions on Big Data},
41
+ volume={7},
42
+ number={3},
43
+ pages={535--547},
44
+ year={2019},
45
+ publisher={IEEE}
46
+ }
47
+
48
+ % Graph-Based RAG
49
+ @article{wei2026tretriever,
50
+ title={{T-Retriever}: Tree-based Hierarchical Retrieval Augmented Generation for Textual Graphs},
51
+ author={Wei, Chunyu and Qin, Huaiyu and He, Siyuan and Wang, Yunhai and Chen, Yueguo},
52
+ journal={arXiv preprint arXiv:2601.04945},
53
+ year={2026}
54
+ }
55
+
56
+ @article{menschikov2025personalai,
57
+ title={{PersonalAI}: A Systematic Comparison of Knowledge Graph Storage and Retrieval Approaches for Personalized {LLM} agents},
58
+ author={Menschikov, Mikhail and Evseev, Dmitry and Dochkina, Victoria and Kostoev, Ruslan and Perepechkin, Ilia and Anokhin, Petr and Burnaev, Evgeny and Semenov, Nikita},
59
+ journal={arXiv preprint arXiv:2506.17001},
60
+ year={2025}
61
+ }
62
+
63
+ % Personal AI Memory
64
+ @article{wei2025second,
65
+ title={{AI}-native Memory 2.0: Second Me},
66
+ author={Wei, Jiale and Ying, Xiang and Gao, Tao and Bao, Fangyi and Tao, Felix and Shang, Jingbo},
67
+ journal={arXiv preprint arXiv:2503.08102},
68
+ year={2025}
69
+ }
70
+
71
+ @article{salas2025cognitive,
72
+ title={Cognitive {AI} framework 2.0: advances in the simulation of human thought},
73
+ author={Salas-Guerra, Rommel},
74
+ journal={arXiv preprint arXiv:2502.04259},
75
+ year={2025}
76
+ }
77
+
78
+ % RAG and Context Retrieval
79
+ @article{lewis2020retrieval,
80
+ title={Retrieval-augmented generation for knowledge-intensive {NLP} tasks},
81
+ author={Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K{\"u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt{\"a}schel, Tim and others},
82
+ journal={Advances in Neural Information Processing Systems},
83
+ volume={33},
84
+ pages={9459--9474},
85
+ year={2020}
86
+ }
87
+
88
+ @article{guu2020realm,
89
+ title={{REALM}: Retrieval-augmented language model pre-training},
90
+ author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Mingwei},
91
+ journal={Proceedings of the 37th International Conference on Machine Learning},
92
+ pages={3929--3938},
93
+ year={2020}
94
+ }
95
+
96
+ % Local-First and Edge Computing
97
+ @article{haque2023local,
98
+ title={Local-first software: You own your data, in spite of the cloud},
99
+ author={Haque, Amjad and Kleppmann, Martin and Wiggins, Adam},
100
+ journal={Proceedings of the 2023 ACM SIGPLAN International Symposium on New Ideas, New Paradigms, and Reflections on Programming and Software},
101
+ pages={59--75},
102
+ year={2023},
103
+ publisher={ACM}
104
+ }
105
+
106
+ % Knowledge Graphs
107
+ @article{hogan2021knowledge,
108
+ title={Knowledge graphs},
109
+ author={Hogan, Aidan and Blomqvist, Eva and Cochez, Michael and d'Amato, Claudia and de Melo, Gerard and Gutierrez, Claudio and Gayo, Jos{\'e} Emilio Labra and Kirrane, Sabrina and Neumaier, Sebastian and Polleres, Axel and others},
110
+ journal={ACM Computing Surveys (CSUR)},
111
+ volume={54},
112
+ number={4},
113
+ pages={1--37},
114
+ year={2021},
115
+ publisher={ACM New York, NY, USA}
116
+ }
117
+
118
+ % Temporal Information Retrieval
119
+ @article{kanhabua2008surviving,
120
+ title={Surviving the {World Wide Web}: A survey on web archive search},
121
+ author={Kanhabua, Nattiya and N{\o}rv{\aa}g, Kjetil},
122
+ journal={Proceedings of the 12th International Conference on Extending Database Technology: Advances in Database Technology},
123
+ pages={675--676},
124
+ year={2009}
125
+ }
126
+
127
+ % SimHash and Near-Duplicate Detection
128
+ @article{sadowski2008algorithm,
129
+ title={The algorithm behind the {Detect} project},
130
+ author={Sadowski, Caitlin and Levin, Greg},
131
+ journal={Google Inc.},
132
+ year={2008}
133
+ }
134
+
135
+ % Sparse Retrieval
136
+ @article{lin2021pretrained,
137
+ title={Pretrained transformers for text ranking: {BERT} and beyond},
138
+ author={Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew},
139
+ journal={Synthesis Lectures on Human Language Technologies},
140
+ volume={14},
141
+ number={4},
142
+ pages={1--325},
143
+ year={2021},
144
+ publisher={Morgan & Claypool Publishers}
145
+ }
@@ -1,39 +1,39 @@
1
- % Related Work Section for STAR Whitepaper
2
- % Insert after Section 2 (Mathematical Foundation) and before Section 3 (System Architecture)
3
-
4
- \section{Related Work}
5
- \label{sec:related}
6
-
7
- \subsection{Vector-Based Retrieval-Augmented Generation}
8
-
9
- Modern RAG systems predominantly rely on dense vector representations and approximate nearest neighbor (ANN) search. HNSW (Hierarchical Navigable Small World) graphs \cite{malkov2018efficient} and FAISS \cite{johnson2019billion} represent the state-of-the-art for vector retrieval, offering sub-linear query complexity. However, these approaches require loading complete indices into RAM-often gigabytes for modest corpora-restricting deployment to high-specification servers. Furthermore, vector similarity provides limited explainability: a result matches because its embedding is "close" to the query, but the specific reasoning remains opaque. STAR addresses these limitations through sparse graph traversal, enabling CPU-only deployment on resource-constrained devices while providing explicit tag-based provenance for every result.
10
-
11
- \subsection{Graph-Based Memory Systems}
12
-
13
- Recent work has explored graph structures as alternatives to dense vectors. T-Retriever \cite{wei2026tretriever} introduces tree-based hierarchical retrieval using semantic-structural entropy for encoding textual graphs. While effective for hierarchical document structures, T-Retriever does not incorporate temporal decay-a key requirement for personal memory systems where recency matters. PersonalAI \cite{menschikov2025personalai} proposes a knowledge graph framework with hyper-edges for personalized LLM agents, achieving strong results on TriviaQA and HotpotQA benchmarks. However, PersonalAI focuses on framework design rather than production implementation; STAR contributes a complete, deployed system with validated performance on 28M tokens of real-world data.
14
-
15
- Our bipartite graph approach (Atoms $\times$ Tags) differs from general knowledge graphs by enforcing a strict separation between content and metadata. This enables O(1) deduplication via SimHash \cite{charikar2002similar} and supports disposable index architectures where the database can be rebuilt entirely from the source-of-truth filesystem.
16
-
17
- \subsection{Personal AI Memory Systems}
18
-
19
- The advent of large context windows has renewed interest in personal AI memory. Second Me \cite{wei2025second} proposes LLM-based memory parameterization, using language models themselves to structure and retrieve personal knowledge. While powerful, this approach requires significant computational resources and offers limited explainability. STAR achieves similar associative retrieval goals through deterministic physics-based scoring, enabling deployment on 4GB RAM laptops without GPU acceleration.
20
-
21
- Cognitive AI frameworks \cite{salas2025cognitive} emphasize governed memory architectures for long-term coherence. STAR's ephemeral index design (Standard 110) aligns with these principles while adding practical constraints for local-first deployment: zero cloud dependencies, AGPL-3.0 licensing, and real-world validation.
22
-
23
- \subsection{Temporal Information Retrieval}
24
-
25
- Temporal decay has been explored in web archive search \cite{kanhabua2008surviving} and recency-weighted ranking, but is rarely integrated into RAG systems as a fundamental scoring component. STAR's Unified Field Equation (Equation~\ref{eq:unified_field}) embeds temporal decay multiplicatively alongside semantic and structural factors, ensuring that any zero factor eliminates irrelevant results. This differs from additive scoring approaches where weak signals can accumulate noise.
26
-
27
- \subsection{Local-First and Edge Computing}
28
-
29
- The local-first software movement \cite{haque2023local} emphasizes user data ownership and offline capability. STAR's browser paradigm extends these principles to AI memory: just as browsers render content without downloading the entire internet, STAR retrieves context without loading complete vector indices. This enables sovereign operation-users maintain complete control over their data without cloud dependencies.
30
-
31
- \subsection{Summary of Contributions}
32
-
33
- STAR distinguishes itself from prior work through:
34
- \begin{enumerate}
35
- \item \textbf{Sparse Graph Physics:} Multiplicative scoring combining co-occurrence, temporal decay, and SimHash similarity (Section \ref{sec:math}).
36
- \item \textbf{Browser Paradigm:} Sharded atomization enabling resource-constrained devices to navigate large corpora (Section \ref{sec:architecture}).
37
- \item \textbf{Production Validation:} Real-world deployment with 28M tokens, $<$200ms p95 latency, and 4GB RAM compatibility (Section \ref{sec:benchmarks}).
38
- \item \textbf{Explainable Retrieval:} Tag paths provide deterministic provenance for every result (Section \ref{sec:retrieval}).
1
+ % Related Work Section for STAR Whitepaper
2
+ % Insert after Section 2 (Mathematical Foundation) and before Section 3 (System Architecture)
3
+
4
+ \section{Related Work}
5
+ \label{sec:related}
6
+
7
+ \subsection{Vector-Based Retrieval-Augmented Generation}
8
+
9
+ Modern RAG systems predominantly rely on dense vector representations and approximate nearest neighbor (ANN) search. HNSW (Hierarchical Navigable Small World) graphs \cite{malkov2018efficient} and FAISS \cite{johnson2019billion} represent the state-of-the-art for vector retrieval, offering sub-linear query complexity. However, these approaches require loading complete indices into RAM-often gigabytes for modest corpora-restricting deployment to high-specification servers. Furthermore, vector similarity provides limited explainability: a result matches because its embedding is "close" to the query, but the specific reasoning remains opaque. STAR addresses these limitations through sparse graph traversal, enabling CPU-only deployment on resource-constrained devices while providing explicit tag-based provenance for every result.
10
+
11
+ \subsection{Graph-Based Memory Systems}
12
+
13
+ Recent work has explored graph structures as alternatives to dense vectors. T-Retriever \cite{wei2026tretriever} introduces tree-based hierarchical retrieval using semantic-structural entropy for encoding textual graphs. While effective for hierarchical document structures, T-Retriever does not incorporate temporal decay-a key requirement for personal memory systems where recency matters. PersonalAI \cite{menschikov2025personalai} proposes a knowledge graph framework with hyper-edges for personalized LLM agents, achieving strong results on TriviaQA and HotpotQA benchmarks. However, PersonalAI focuses on framework design rather than production implementation; STAR contributes a complete, deployed system with validated performance on 28M tokens of real-world data.
14
+
15
+ Our bipartite graph approach (Atoms $\times$ Tags) differs from general knowledge graphs by enforcing a strict separation between content and metadata. This enables O(1) deduplication via SimHash \cite{charikar2002similar} and supports disposable index architectures where the database can be rebuilt entirely from the source-of-truth filesystem.
16
+
17
+ \subsection{Personal AI Memory Systems}
18
+
19
+ The advent of large context windows has renewed interest in personal AI memory. Second Me \cite{wei2025second} proposes LLM-based memory parameterization, using language models themselves to structure and retrieve personal knowledge. While powerful, this approach requires significant computational resources and offers limited explainability. STAR achieves similar associative retrieval goals through deterministic physics-based scoring, enabling deployment on 4GB RAM laptops without GPU acceleration.
20
+
21
+ Cognitive AI frameworks \cite{salas2025cognitive} emphasize governed memory architectures for long-term coherence. STAR's ephemeral index design (Standard 110) aligns with these principles while adding practical constraints for local-first deployment: zero cloud dependencies, AGPL-3.0 licensing, and real-world validation.
22
+
23
+ \subsection{Temporal Information Retrieval}
24
+
25
+ Temporal decay has been explored in web archive search \cite{kanhabua2008surviving} and recency-weighted ranking, but is rarely integrated into RAG systems as a fundamental scoring component. STAR's Unified Field Equation (Equation~\ref{eq:unified_field}) embeds temporal decay multiplicatively alongside semantic and structural factors, ensuring that any zero factor eliminates irrelevant results. This differs from additive scoring approaches where weak signals can accumulate noise.
26
+
27
+ \subsection{Local-First and Edge Computing}
28
+
29
+ The local-first software movement \cite{haque2023local} emphasizes user data ownership and offline capability. STAR's browser paradigm extends these principles to AI memory: just as browsers render content without downloading the entire internet, STAR retrieves context without loading complete vector indices. This enables sovereign operation-users maintain complete control over their data without cloud dependencies.
30
+
31
+ \subsection{Summary of Contributions}
32
+
33
+ STAR distinguishes itself from prior work through:
34
+ \begin{enumerate}
35
+ \item \textbf{Sparse Graph Physics:} Multiplicative scoring combining co-occurrence, temporal decay, and SimHash similarity (Section \ref{sec:math}).
36
+ \item \textbf{Browser Paradigm:} Sharded atomization enabling resource-constrained devices to navigate large corpora (Section \ref{sec:architecture}).
37
+ \item \textbf{Production Validation:} Real-world deployment with 28M tokens, $<$200ms p95 latency, and 4GB RAM compatibility (Section \ref{sec:benchmarks}).
38
+ \item \textbf{Explainable Retrieval:} Tag paths provide deterministic provenance for every result (Section \ref{sec:retrieval}).
39
39
  \end{enumerate}
@@ -1,48 +1,48 @@
1
- @echo off
2
- REM STAR Whitepaper Compilation Script
3
- REM Run this 4 times for references to resolve
4
-
5
- echo ========================================
6
- echo STAR Whitepaper - Compilation Script
7
- echo ========================================
8
- echo.
9
-
10
- cd /d "%~dp0"
11
-
12
- echo Step 1/4: First pdflatex pass...
13
- pdflatex -interaction=nonstopmode star-whitepaper.tex
14
- if errorlevel 1 (
15
- echo ERROR: First pdflatex pass failed!
16
- pause
17
- exit /b 1
18
- )
19
-
20
- echo.
21
- echo Step 2/4: Running bibtex...
22
- bibtex star-whitepaper
23
- if errorlevel 1 (
24
- echo ERROR: BibTeX failed!
25
- pause
26
- exit /b 1
27
- )
28
-
29
- echo.
30
- echo Step 3/4: Second pdflatex pass...
31
- pdflatex -interaction=nonstopmode star-whitepaper.tex
32
-
33
- echo.
34
- echo Step 4/4: Third pdflatex pass (final)...
35
- pdflatex -interaction=nonstopmode star-whitepaper.tex
36
-
37
- echo.
38
- echo ========================================
39
- echo Compilation Complete!
40
- echo ========================================
41
- echo.
42
- echo Output: star-whitepaper.pdf
43
- echo.
44
- echo If you see "Label(s) may have changed" warnings,
45
- echo run this script one more time.
46
- echo.
47
-
48
- pause
1
+ @echo off
2
+ REM STAR Whitepaper Compilation Script
3
+ REM Run this 4 times for references to resolve
4
+
5
+ echo ========================================
6
+ echo STAR Whitepaper - Compilation Script
7
+ echo ========================================
8
+ echo.
9
+
10
+ cd /d "%~dp0"
11
+
12
+ echo Step 1/4: First pdflatex pass...
13
+ pdflatex -interaction=nonstopmode star-whitepaper.tex
14
+ if errorlevel 1 (
15
+ echo ERROR: First pdflatex pass failed!
16
+ pause
17
+ exit /b 1
18
+ )
19
+
20
+ echo.
21
+ echo Step 2/4: Running bibtex...
22
+ bibtex star-whitepaper
23
+ if errorlevel 1 (
24
+ echo ERROR: BibTeX failed!
25
+ pause
26
+ exit /b 1
27
+ )
28
+
29
+ echo.
30
+ echo Step 3/4: Second pdflatex pass...
31
+ pdflatex -interaction=nonstopmode star-whitepaper.tex
32
+
33
+ echo.
34
+ echo Step 4/4: Third pdflatex pass (final)...
35
+ pdflatex -interaction=nonstopmode star-whitepaper.tex
36
+
37
+ echo.
38
+ echo ========================================
39
+ echo Compilation Complete!
40
+ echo ========================================
41
+ echo.
42
+ echo Output: star-whitepaper.pdf
43
+ echo.
44
+ echo If you see "Label(s) may have changed" warnings,
45
+ echo run this script one more time.
46
+ echo.
47
+
48
+ pause
@@ -1,33 +1,33 @@
1
- # Response to JOSS Editor
2
-
3
- **To:** @danielskatz
4
- **From:** @RSBalchII
5
- **Date:** 2026-02-25
6
-
7
- Thank you for your help with the PDF generation and for your guidance on the submission requirements.
8
-
9
- ## Research Software Qualification
10
-
11
- To answer your question regarding how STAR qualifies as research software:
12
-
13
- As an independent researcher, my work focuses on Information Retrieval and Personal Knowledge Management—specifically solving the problem of deploying large-scale context retrieval systems on resource-constrained, consumer‑grade hardware without relying on cloud APIs.
14
-
15
- STAR (and its reference implementation, the Anchor Engine) qualifies as research software because it embodies a novel algorithmic research contribution. The software is the practical realization of a new retrieval model (what I term the “Unified Field Equation”) that uses a sparse bipartite graph, temporal decay, and SimHash deduplication to replace computationally expensive dense‑vector ANN combinations (like HNSW/FAISS). The performance benchmarks and the novel $O(k \cdot \bar{d})$ complexity scaling presented in the paper were exclusively generated, validated, and tracked using this software.
16
-
17
- Because the project is very new (the core research and implementation began in August 2025), it has not yet been cited in formal external publications. However, it was built explicitly for the research purpose of providing an explainable, scalable alternative to opaque dense vector indices, enabling researchers to index and traverse massive text corpora (e.g., my 28 M‑token validation dataset) directly on standard laptops.
18
-
19
- ## Word‑Count Compliance
20
-
21
- I have trimmed the `paper.md` from 2720 words to **1219 words** (measured by `wc -w`), well within the JOSS limit of 1750 words. The revisions preserve all key technical content while removing redundant descriptions and condensing tables.
22
-
23
- ## DOI Updates
24
-
25
- Missing DOIs have been added to `paper.bib` for all cited works where a DOI is available. The editorialbot’s reference check should now pass.
26
-
27
- ## Next Steps
28
-
29
- If the research‑software qualification is satisfactory, I am ready to proceed with the review. Please let me know if any further clarifications or adjustments are needed.
30
-
31
- Thank you again for your time and assistance.
32
-
1
+ # Response to JOSS Editor
2
+
3
+ **To:** @danielskatz
4
+ **From:** @RSBalchII
5
+ **Date:** 2026-02-25
6
+
7
+ Thank you for your help with the PDF generation and for your guidance on the submission requirements.
8
+
9
+ ## Research Software Qualification
10
+
11
+ To answer your question regarding how STAR qualifies as research software:
12
+
13
+ As an independent researcher, my work focuses on Information Retrieval and Personal Knowledge Management—specifically solving the problem of deploying large-scale context retrieval systems on resource-constrained, consumer‑grade hardware without relying on cloud APIs.
14
+
15
+ STAR (and its reference implementation, the Anchor Engine) qualifies as research software because it embodies a novel algorithmic research contribution. The software is the practical realization of a new retrieval model (what I term the “Unified Field Equation”) that uses a sparse bipartite graph, temporal decay, and SimHash deduplication to replace computationally expensive dense‑vector ANN combinations (like HNSW/FAISS). The performance benchmarks and the novel $O(k \cdot \bar{d})$ complexity scaling presented in the paper were exclusively generated, validated, and tracked using this software.
16
+
17
+ Because the project is very new (the core research and implementation began in August 2025), it has not yet been cited in formal external publications. However, it was built explicitly for the research purpose of providing an explainable, scalable alternative to opaque dense vector indices, enabling researchers to index and traverse massive text corpora (e.g., my 28 M‑token validation dataset) directly on standard laptops.
18
+
19
+ ## Word‑Count Compliance
20
+
21
+ I have trimmed the `paper.md` from 2720 words to **1219 words** (measured by `wc -w`), well within the JOSS limit of 1750 words. The revisions preserve all key technical content while removing redundant descriptions and condensing tables.
22
+
23
+ ## DOI Updates
24
+
25
+ Missing DOIs have been added to `paper.bib` for all cited works where a DOI is available. The editorialbot’s reference check should now pass.
26
+
27
+ ## Next Steps
28
+
29
+ If the research‑software qualification is satisfactory, I am ready to proceed with the review. Please let me know if any further clarifications or adjustments are needed.
30
+
31
+ Thank you again for your time and assistance.
32
+
33
33
  —R.S. Balch II
@@ -1,46 +1,46 @@
1
- @echo off
2
- REM Prepare arXiv Submission Package
3
- REM Creates a clean zip file with only necessary files
4
-
5
- echo ========================================
6
- echo Preparing arXiv Submission Package
7
- echo ========================================
8
- echo.
9
-
10
- cd /d "%~dp0"
11
-
12
- REM Create submission directory
13
- set SUBMISSION_DIR=star-arxiv-submission
14
- if exist "%SUBMISSION_DIR%" (
15
- echo Cleaning up old submission directory...
16
- rmdir /s /q "%SUBMISSION_DIR%"
17
- )
18
-
19
- echo Creating submission directory...
20
- mkdir "%SUBMISSION_DIR%"
21
-
22
- echo.
23
- echo Copying files...
24
- copy star-whitepaper.tex "%SUBMISSION_DIR%\"
25
- copy BIBLIOGRAPHY.bib "%SUBMISSION_DIR%\"
26
- copy star-whitepaper.pdf "%SUBMISSION_DIR%\" 2>nul
27
-
28
- echo.
29
- echo ========================================
30
- echo Submission Package Ready!
31
- echo ========================================
32
- echo.
33
- echo Files in %SUBMISSION_DIR%:
34
- dir /b "%SUBMISSION_DIR%"
35
- echo.
36
- echo Next steps:
37
- echo 1. Upload %SUBMISSION_DIR% contents to arxiv.org/submit
38
- echo 2. Metadata:
39
- echo - Title: STAR: Semantic Temporal Associative Retrieval
40
- echo - Authors: R.S. Balch II
41
- echo - Categories: cs.IR (primary), cs.AI (secondary)
42
- echo - Comments: 28M token production deployment; 10 pages; 5 figures
43
- echo - Keywords: Information Retrieval, Graph-Based Search, Local-First AI
44
- echo.
45
-
46
- pause
1
+ @echo off
2
+ REM Prepare arXiv Submission Package
3
+ REM Creates a clean zip file with only necessary files
4
+
5
+ echo ========================================
6
+ echo Preparing arXiv Submission Package
7
+ echo ========================================
8
+ echo.
9
+
10
+ cd /d "%~dp0"
11
+
12
+ REM Create submission directory
13
+ set SUBMISSION_DIR=star-arxiv-submission
14
+ if exist "%SUBMISSION_DIR%" (
15
+ echo Cleaning up old submission directory...
16
+ rmdir /s /q "%SUBMISSION_DIR%"
17
+ )
18
+
19
+ echo Creating submission directory...
20
+ mkdir "%SUBMISSION_DIR%"
21
+
22
+ echo.
23
+ echo Copying files...
24
+ copy star-whitepaper.tex "%SUBMISSION_DIR%\"
25
+ copy BIBLIOGRAPHY.bib "%SUBMISSION_DIR%\"
26
+ copy star-whitepaper.pdf "%SUBMISSION_DIR%\" 2>nul
27
+
28
+ echo.
29
+ echo ========================================
30
+ echo Submission Package Ready!
31
+ echo ========================================
32
+ echo.
33
+ echo Files in %SUBMISSION_DIR%:
34
+ dir /b "%SUBMISSION_DIR%"
35
+ echo.
36
+ echo Next steps:
37
+ echo 1. Upload %SUBMISSION_DIR% contents to arxiv.org/submit
38
+ echo 2. Metadata:
39
+ echo - Title: STAR: Semantic Temporal Associative Retrieval
40
+ echo - Authors: R.S. Balch II
41
+ echo - Categories: cs.IR (primary), cs.AI (secondary)
42
+ echo - Comments: 28M token production deployment; 10 pages; 5 figures
43
+ echo - Keywords: Information Retrieval, Graph-Based Search, Local-First AI
44
+ echo.
45
+
46
+ pause