npm - @rbalchii/anchor-engine - Versions diffs - 4.7.0 → 4.8.1 - Mend

@rbalchii/anchor-engine 4.7.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

package/LICENSE +608 -608
package/README.md +513 -317
package/anchor.bat +5 -5
package/docs/AGENT_CONTROLLED_ENGINE.md +581 -0
package/docs/API.md +314 -314
package/docs/DEPLOYMENT.md +448 -448
package/docs/INDEX.md +226 -226
package/docs/MD_FILES_INVENTORY.md +166 -0
package/docs/STAR_Whitepaper_Executive.md +216 -216
package/docs/TROUBLESHOOTING.md +535 -535
package/docs/arxiv/BIBLIOGRAPHY.bib +145 -145
package/docs/arxiv/RELATED_WORK.tex +38 -38
package/docs/arxiv/compile.bat +48 -48
package/docs/arxiv/joss_response.md +32 -32
package/docs/arxiv/prepare-submission.bat +46 -46
package/docs/arxiv/review.md +127 -127
package/docs/arxiv/star-whitepaper.tex +656 -656
package/docs/code-patterns.md +289 -289
package/docs/daily/TODAY_SUMMARY.md +245 -0
package/docs/guides/BUILDING.md +64 -0
package/docs/guides/INSTALL_NPM.md +160 -0
package/docs/guides/NPM_PUBLISH_SUMMARY.md +231 -0
package/docs/paper.md +124 -0
package/docs/project/PROJECT_STATE_ASSESSMENT.md +312 -0
package/docs/reviews/code-review-v4.8.1-decision-record.md +165 -0
package/docs/testing/TESTING.md +213 -0
package/docs/testing/TESTING_FRAMEWORK_COMPLETE.md +271 -0
package/docs/testing/search-test-report.md +76 -0
package/docs/whitepaper.md +445 -445
package/engine/dist/commands/distill.js +21 -21
package/engine/dist/config/index.d.ts +7 -0
package/engine/dist/config/index.d.ts.map +1 -1
package/engine/dist/config/index.js +22 -0
package/engine/dist/config/index.js.map +1 -1
package/engine/dist/config/paths.d.ts +1 -1
package/engine/dist/config/paths.js +3 -3
package/engine/dist/config/paths.js.map +1 -1
package/engine/dist/core/db.js +131 -131
package/engine/dist/mcp/server.d.ts +44 -0
package/engine/dist/mcp/server.d.ts.map +1 -0
package/engine/dist/mcp/server.js +427 -0
package/engine/dist/mcp/server.js.map +1 -0
package/engine/dist/native/index.d.ts +20 -21
package/engine/dist/native/index.d.ts.map +1 -1
package/engine/dist/profiling/atomization-profiling.js +3 -3
package/engine/dist/profiling/bottleneck-identification.js +35 -35
package/engine/dist/profiling/content-sanitization-profiling.js +86 -86
package/engine/dist/routes/monitoring.js +8 -8
package/engine/dist/routes/v1/admin.js +8 -8
package/engine/dist/routes/v1/atoms.js +15 -15
package/engine/dist/routes/v1/ingest.d.ts.map +1 -1
package/engine/dist/routes/v1/ingest.js +39 -0
package/engine/dist/routes/v1/ingest.js.map +1 -1
package/engine/dist/routes/v1/system.d.ts.map +1 -1
package/engine/dist/routes/v1/system.js +305 -6
package/engine/dist/routes/v1/system.js.map +1 -1
package/engine/dist/routes/v1/tags.js +2 -2
package/engine/dist/services/backup/backup-restore.js +23 -23
package/engine/dist/services/backup/backup.js +14 -14
package/engine/dist/services/distillation/radial-distiller.d.ts +1 -0
package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -1
package/engine/dist/services/distillation/radial-distiller.js +23 -16
package/engine/dist/services/distillation/radial-distiller.js.map +1 -1
package/engine/dist/services/ingest/github-ingest-service.js +18 -18
package/engine/dist/services/ingest/ingest-atomic.js +79 -79
package/engine/dist/services/ingest/ingest.d.ts.map +1 -1
package/engine/dist/services/ingest/ingest.js +28 -25
package/engine/dist/services/ingest/ingest.js.map +1 -1
package/engine/dist/services/ingest/watchdog.d.ts.map +1 -1
package/engine/dist/services/ingest/watchdog.js +14 -24
package/engine/dist/services/ingest/watchdog.js.map +1 -1
package/engine/dist/services/llm/reader.js +9 -9
package/engine/dist/services/mirror/mirror.js +5 -5
package/engine/dist/services/mirror/mirror.js.map +1 -1
package/engine/dist/services/research/researcher.js +8 -8
package/engine/dist/services/scribe/scribe.js +27 -27
package/engine/dist/services/search/context-inflator.js +34 -34
package/engine/dist/services/search/explore.js +20 -20
package/engine/dist/services/search/physics-tag-walker.js +208 -208
package/engine/dist/services/search/query-parser.js +5 -5
package/engine/dist/services/search/search-utils.js +3 -3
package/engine/dist/services/search/search.js +36 -36
package/engine/dist/services/search/sovereign-system-prompt.js +22 -22
package/engine/dist/services/semantic/semantic-ingestion-service.js +47 -47
package/engine/dist/services/semantic/semantic-search.js +21 -21
package/engine/dist/services/synonyms/auto-synonym-generator.js +35 -35
package/engine/dist/services/system-status.d.ts +34 -0
package/engine/dist/services/system-status.d.ts.map +1 -1
package/engine/dist/services/system-status.js +57 -1
package/engine/dist/services/system-status.js.map +1 -1
package/engine/dist/services/tags/discovery.js +5 -5
package/engine/dist/services/tags/infector.js +6 -6
package/engine/dist/services/tags/tag-auditor.js +51 -51
package/engine/dist/services/taxonomy/taxonomy-manager.js +6 -6
package/engine/dist/utils/tag-cleanup.js +5 -5
package/engine/dist/utils/tag-modulation.js +1 -1
package/engine/dist/utils/tag-modulation.js.map +1 -1
package/engine/package.json +104 -105
package/mcp-server/README.md +404 -0
package/mcp-server/dist/index.d.ts +16 -0
package/mcp-server/dist/index.d.ts.map +1 -0
package/mcp-server/dist/index.js +709 -0
package/mcp-server/dist/index.js.map +1 -0
package/mcp-server/package.json +34 -0
package/package.json +10 -2
package/docs/archive/GIT_BACKUP_VERIFICATION.md +0 -297
package/docs/archive/adoption-guide.md +0 -264
package/docs/archive/adoption-preparation.md +0 -179
package/docs/archive/agent-harness-integration.md +0 -227
package/docs/archive/api-reference.md +0 -106
package/docs/archive/api_flows_diagram.md +0 -118
package/docs/archive/architecture.md +0 -410
package/docs/archive/architecture_diagram.md +0 -174
package/docs/archive/broader-adoption-preparation.md +0 -175
package/docs/archive/browser-paradigm-architecture.md +0 -163
package/docs/archive/chat-integration.md +0 -124
package/docs/archive/community-adoption-materials.md +0 -103
package/docs/archive/community-adoption.md +0 -147
package/docs/archive/comparison-with-siloed-solutions.md +0 -192
package/docs/archive/comprehensive-docs.md +0 -156
package/docs/archive/data_flow_diagram.md +0 -251
package/docs/archive/enhancement-implementation-summary.md +0 -146
package/docs/archive/evolution-summary.md +0 -141
package/docs/archive/ingestion_pipeline_diagram.md +0 -198
package/docs/archive/native-module-profiling-results.md +0 -135
package/docs/archive/positioning-document.md +0 -158
package/docs/archive/positioning.md +0 -175
package/docs/archive/query-builder-documentation.md +0 -218
package/docs/archive/quick-reference.md +0 -40
package/docs/archive/quickstart.md +0 -63
package/docs/archive/relationship-narrative-discovery.md +0 -141
package/docs/archive/search-logic-improvement-plan.md +0 -336
package/docs/archive/search_architecture_diagram.md +0 -212
package/docs/archive/semantic-architecture-guide.md +0 -97
package/docs/archive/sequence-diagrams.md +0 -128
package/docs/archive/system_components_diagram.md +0 -296
package/docs/archive/test-framework-integration.md +0 -109
package/docs/archive/testing-framework-documentation.md +0 -397
package/docs/archive/testing-framework-summary.md +0 -121
package/docs/archive/testing-framework.md +0 -377
package/docs/archive/ui-architecture.md +0 -75

package/docs/arxiv/BIBLIOGRAPHY.bib CHANGED Viewed

@@ -1,145 +1,145 @@
-% STAR Algorithm Bibliography
-% Add these to your star-whitepaper.tex with \bibliography{BIBLIOGRAPHY}
-% Foundational Work
-@article{charikar2002similar,
-  title={Similarity estimation techniques from rounding algorithms},
-  author={Charikar, Moses S},
-  journal={Proceedings of the thiry-fourth annual ACM symposium on Theory of computing},
-  pages={380--388},
-  year={2002},
-  publisher={ACM}
-}
-@article{brin1998anatomy,
-  title={The anatomy of a large-scale hypertextual web search engine},
-  author={Brin, Sergey and Page, Lawrence},
-  journal={Computer networks and ISDN systems},
-  volume={30},
-  number={1-7},
-  pages={107--117},
-  year={1998},
-  publisher={Elsevier}
-}
-% Vector Retrieval
-@article{malkov2018efficient,
-  title={Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs},
-  author={Malkov, Yu A and Yashunin, Dmitry A},
-  journal={IEEE transactions on pattern analysis and machine intelligence},
-  volume={42},
-  number={4},
-  pages={824--836},
-  year={2018},
-  publisher={IEEE}
-}
-@article{johnson2019billion,
-  title={Billion-scale similarity search with {GPUs}},
-  author={Johnson, Jeff and Douze, Matthijs and J{\'e}gou, H{\'e}rve},
-  journal={IEEE Transactions on Big Data},
-  volume={7},
-  number={3},
-  pages={535--547},
-  year={2019},
-  publisher={IEEE}
-}
-% Graph-Based RAG
-@article{wei2026tretriever,
-  title={{T-Retriever}: Tree-based Hierarchical Retrieval Augmented Generation for Textual Graphs},
-  author={Wei, Chunyu and Qin, Huaiyu and He, Siyuan and Wang, Yunhai and Chen, Yueguo},
-  journal={arXiv preprint arXiv:2601.04945},
-  year={2026}
-}
-@article{menschikov2025personalai,
-  title={{PersonalAI}: A Systematic Comparison of Knowledge Graph Storage and Retrieval Approaches for Personalized {LLM} agents},
-  author={Menschikov, Mikhail and Evseev, Dmitry and Dochkina, Victoria and Kostoev, Ruslan and Perepechkin, Ilia and Anokhin, Petr and Burnaev, Evgeny and Semenov, Nikita},
-  journal={arXiv preprint arXiv:2506.17001},
-  year={2025}
-}
-% Personal AI Memory
-@article{wei2025second,
-  title={{AI}-native Memory 2.0: Second Me},
-  author={Wei, Jiale and Ying, Xiang and Gao, Tao and Bao, Fangyi and Tao, Felix and Shang, Jingbo},
-  journal={arXiv preprint arXiv:2503.08102},
-  year={2025}
-}
-@article{salas2025cognitive,
-  title={Cognitive {AI} framework 2.0: advances in the simulation of human thought},
-  author={Salas-Guerra, Rommel},
-  journal={arXiv preprint arXiv:2502.04259},
-  year={2025}
-}
-% RAG and Context Retrieval
-@article{lewis2020retrieval,
-  title={Retrieval-augmented generation for knowledge-intensive {NLP} tasks},
-  author={Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K{\"u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt{\"a}schel, Tim and others},
-  journal={Advances in Neural Information Processing Systems},
-  volume={33},
-  pages={9459--9474},
-  year={2020}
-}
-@article{guu2020realm,
-  title={{REALM}: Retrieval-augmented language model pre-training},
-  author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Mingwei},
-  journal={Proceedings of the 37th International Conference on Machine Learning},
-  pages={3929--3938},
-  year={2020}
-}
-% Local-First and Edge Computing
-@article{haque2023local,
-  title={Local-first software: You own your data, in spite of the cloud},
-  author={Haque, Amjad and Kleppmann, Martin and Wiggins, Adam},
-  journal={Proceedings of the 2023 ACM SIGPLAN International Symposium on New Ideas, New Paradigms, and Reflections on Programming and Software},
-  pages={59--75},
-  year={2023},
-  publisher={ACM}
-}
-% Knowledge Graphs
-@article{hogan2021knowledge,
-  title={Knowledge graphs},
-  author={Hogan, Aidan and Blomqvist, Eva and Cochez, Michael and d'Amato, Claudia and de Melo, Gerard and Gutierrez, Claudio and Gayo, Jos{\'e} Emilio Labra and Kirrane, Sabrina and Neumaier, Sebastian and Polleres, Axel and others},
-  journal={ACM Computing Surveys (CSUR)},
-  volume={54},
-  number={4},
-  pages={1--37},
-  year={2021},
-  publisher={ACM New York, NY, USA}
-}
-% Temporal Information Retrieval
-@article{kanhabua2008surviving,
-  title={Surviving the {World Wide Web}: A survey on web archive search},
-  author={Kanhabua, Nattiya and N{\o}rv{\aa}g, Kjetil},
-  journal={Proceedings of the 12th International Conference on Extending Database Technology: Advances in Database Technology},
-  pages={675--676},
-  year={2009}
-}
-% SimHash and Near-Duplicate Detection
-@article{sadowski2008algorithm,
-  title={The algorithm behind the {Detect} project},
-  author={Sadowski, Caitlin and Levin, Greg},
-  journal={Google Inc.},
-  year={2008}
-}
-% Sparse Retrieval
-@article{lin2021pretrained,
-  title={Pretrained transformers for text ranking: {BERT} and beyond},
-  author={Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew},
-  journal={Synthesis Lectures on Human Language Technologies},
-  volume={14},
-  number={4},
-  pages={1--325},
-  year={2021},
-  publisher={Morgan & Claypool Publishers}
-}
+% STAR Algorithm Bibliography
+% Add these to your star-whitepaper.tex with \bibliography{BIBLIOGRAPHY}
+% Foundational Work
+@article{charikar2002similar,
+  title={Similarity estimation techniques from rounding algorithms},
+  author={Charikar, Moses S},
+  journal={Proceedings of the thiry-fourth annual ACM symposium on Theory of computing},
+  pages={380--388},
+  year={2002},
+  publisher={ACM}
+}
+@article{brin1998anatomy,
+  title={The anatomy of a large-scale hypertextual web search engine},
+  author={Brin, Sergey and Page, Lawrence},
+  journal={Computer networks and ISDN systems},
+  volume={30},
+  number={1-7},
+  pages={107--117},
+  year={1998},
+  publisher={Elsevier}
+}
+% Vector Retrieval
+@article{malkov2018efficient,
+  title={Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs},
+  author={Malkov, Yu A and Yashunin, Dmitry A},
+  journal={IEEE transactions on pattern analysis and machine intelligence},
+  volume={42},
+  number={4},
+  pages={824--836},
+  year={2018},
+  publisher={IEEE}
+}
+@article{johnson2019billion,
+  title={Billion-scale similarity search with {GPUs}},
+  author={Johnson, Jeff and Douze, Matthijs and J{\'e}gou, H{\'e}rve},
+  journal={IEEE Transactions on Big Data},
+  volume={7},
+  number={3},
+  pages={535--547},
+  year={2019},
+  publisher={IEEE}
+}
+% Graph-Based RAG
+@article{wei2026tretriever,
+  title={{T-Retriever}: Tree-based Hierarchical Retrieval Augmented Generation for Textual Graphs},
+  author={Wei, Chunyu and Qin, Huaiyu and He, Siyuan and Wang, Yunhai and Chen, Yueguo},
+  journal={arXiv preprint arXiv:2601.04945},
+  year={2026}
+}
+@article{menschikov2025personalai,
+  title={{PersonalAI}: A Systematic Comparison of Knowledge Graph Storage and Retrieval Approaches for Personalized {LLM} agents},
+  author={Menschikov, Mikhail and Evseev, Dmitry and Dochkina, Victoria and Kostoev, Ruslan and Perepechkin, Ilia and Anokhin, Petr and Burnaev, Evgeny and Semenov, Nikita},
+  journal={arXiv preprint arXiv:2506.17001},
+  year={2025}
+}
+% Personal AI Memory
+@article{wei2025second,
+  title={{AI}-native Memory 2.0: Second Me},
+  author={Wei, Jiale and Ying, Xiang and Gao, Tao and Bao, Fangyi and Tao, Felix and Shang, Jingbo},
+  journal={arXiv preprint arXiv:2503.08102},
+  year={2025}
+}
+@article{salas2025cognitive,
+  title={Cognitive {AI} framework 2.0: advances in the simulation of human thought},
+  author={Salas-Guerra, Rommel},
+  journal={arXiv preprint arXiv:2502.04259},
+  year={2025}
+}
+% RAG and Context Retrieval
+@article{lewis2020retrieval,
+  title={Retrieval-augmented generation for knowledge-intensive {NLP} tasks},
+  author={Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K{\"u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt{\"a}schel, Tim and others},
+  journal={Advances in Neural Information Processing Systems},
+  volume={33},
+  pages={9459--9474},
+  year={2020}
+}
+@article{guu2020realm,
+  title={{REALM}: Retrieval-augmented language model pre-training},
+  author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Mingwei},
+  journal={Proceedings of the 37th International Conference on Machine Learning},
+  pages={3929--3938},
+  year={2020}
+}
+% Local-First and Edge Computing
+@article{haque2023local,
+  title={Local-first software: You own your data, in spite of the cloud},
+  author={Haque, Amjad and Kleppmann, Martin and Wiggins, Adam},
+  journal={Proceedings of the 2023 ACM SIGPLAN International Symposium on New Ideas, New Paradigms, and Reflections on Programming and Software},
+  pages={59--75},
+  year={2023},
+  publisher={ACM}
+}
+% Knowledge Graphs
+@article{hogan2021knowledge,
+  title={Knowledge graphs},
+  author={Hogan, Aidan and Blomqvist, Eva and Cochez, Michael and d'Amato, Claudia and de Melo, Gerard and Gutierrez, Claudio and Gayo, Jos{\'e} Emilio Labra and Kirrane, Sabrina and Neumaier, Sebastian and Polleres, Axel and others},
+  journal={ACM Computing Surveys (CSUR)},
+  volume={54},
+  number={4},
+  pages={1--37},
+  year={2021},
+  publisher={ACM New York, NY, USA}
+}
+% Temporal Information Retrieval
+@article{kanhabua2008surviving,
+  title={Surviving the {World Wide Web}: A survey on web archive search},
+  author={Kanhabua, Nattiya and N{\o}rv{\aa}g, Kjetil},
+  journal={Proceedings of the 12th International Conference on Extending Database Technology: Advances in Database Technology},
+  pages={675--676},
+  year={2009}
+}
+% SimHash and Near-Duplicate Detection
+@article{sadowski2008algorithm,
+  title={The algorithm behind the {Detect} project},
+  author={Sadowski, Caitlin and Levin, Greg},
+  journal={Google Inc.},
+  year={2008}
+}
+% Sparse Retrieval
+@article{lin2021pretrained,
+  title={Pretrained transformers for text ranking: {BERT} and beyond},
+  author={Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew},
+  journal={Synthesis Lectures on Human Language Technologies},
+  volume={14},
+  number={4},
+  pages={1--325},
+  year={2021},
+  publisher={Morgan & Claypool Publishers}
+}

package/docs/arxiv/RELATED_WORK.tex CHANGED Viewed

@@ -1,39 +1,39 @@
-% Related Work Section for STAR Whitepaper
-% Insert after Section 2 (Mathematical Foundation) and before Section 3 (System Architecture)
-\section{Related Work}
-\label{sec:related}
-\subsection{Vector-Based Retrieval-Augmented Generation}
-Modern RAG systems predominantly rely on dense vector representations and approximate nearest neighbor (ANN) search. HNSW (Hierarchical Navigable Small World) graphs \cite{malkov2018efficient} and FAISS \cite{johnson2019billion} represent the state-of-the-art for vector retrieval, offering sub-linear query complexity. However, these approaches require loading complete indices into RAM-often gigabytes for modest corpora-restricting deployment to high-specification servers. Furthermore, vector similarity provides limited explainability: a result matches because its embedding is "close" to the query, but the specific reasoning remains opaque. STAR addresses these limitations through sparse graph traversal, enabling CPU-only deployment on resource-constrained devices while providing explicit tag-based provenance for every result.
-\subsection{Graph-Based Memory Systems}
-Recent work has explored graph structures as alternatives to dense vectors. T-Retriever \cite{wei2026tretriever} introduces tree-based hierarchical retrieval using semantic-structural entropy for encoding textual graphs. While effective for hierarchical document structures, T-Retriever does not incorporate temporal decay-a key requirement for personal memory systems where recency matters. PersonalAI \cite{menschikov2025personalai} proposes a knowledge graph framework with hyper-edges for personalized LLM agents, achieving strong results on TriviaQA and HotpotQA benchmarks. However, PersonalAI focuses on framework design rather than production implementation; STAR contributes a complete, deployed system with validated performance on 28M tokens of real-world data.
-Our bipartite graph approach (Atoms $\times$ Tags) differs from general knowledge graphs by enforcing a strict separation between content and metadata. This enables O(1) deduplication via SimHash \cite{charikar2002similar} and supports disposable index architectures where the database can be rebuilt entirely from the source-of-truth filesystem.
-\subsection{Personal AI Memory Systems}
-The advent of large context windows has renewed interest in personal AI memory. Second Me \cite{wei2025second} proposes LLM-based memory parameterization, using language models themselves to structure and retrieve personal knowledge. While powerful, this approach requires significant computational resources and offers limited explainability. STAR achieves similar associative retrieval goals through deterministic physics-based scoring, enabling deployment on 4GB RAM laptops without GPU acceleration.
-Cognitive AI frameworks \cite{salas2025cognitive} emphasize governed memory architectures for long-term coherence. STAR's ephemeral index design (Standard 110) aligns with these principles while adding practical constraints for local-first deployment: zero cloud dependencies, AGPL-3.0 licensing, and real-world validation.
-\subsection{Temporal Information Retrieval}
-Temporal decay has been explored in web archive search \cite{kanhabua2008surviving} and recency-weighted ranking, but is rarely integrated into RAG systems as a fundamental scoring component. STAR's Unified Field Equation (Equation~\ref{eq:unified_field}) embeds temporal decay multiplicatively alongside semantic and structural factors, ensuring that any zero factor eliminates irrelevant results. This differs from additive scoring approaches where weak signals can accumulate noise.
-\subsection{Local-First and Edge Computing}
-The local-first software movement \cite{haque2023local} emphasizes user data ownership and offline capability. STAR's browser paradigm extends these principles to AI memory: just as browsers render content without downloading the entire internet, STAR retrieves context without loading complete vector indices. This enables sovereign operation-users maintain complete control over their data without cloud dependencies.
-\subsection{Summary of Contributions}
-STAR distinguishes itself from prior work through:
-\begin{enumerate}
-    \item \textbf{Sparse Graph Physics:} Multiplicative scoring combining co-occurrence, temporal decay, and SimHash similarity (Section \ref{sec:math}).
-    \item \textbf{Browser Paradigm:} Sharded atomization enabling resource-constrained devices to navigate large corpora (Section \ref{sec:architecture}).
-    \item \textbf{Production Validation:} Real-world deployment with 28M tokens, $<$200ms p95 latency, and 4GB RAM compatibility (Section \ref{sec:benchmarks}).
-    \item \textbf{Explainable Retrieval:} Tag paths provide deterministic provenance for every result (Section \ref{sec:retrieval}).
+% Related Work Section for STAR Whitepaper
+% Insert after Section 2 (Mathematical Foundation) and before Section 3 (System Architecture)
+\section{Related Work}
+\label{sec:related}
+\subsection{Vector-Based Retrieval-Augmented Generation}
+Modern RAG systems predominantly rely on dense vector representations and approximate nearest neighbor (ANN) search. HNSW (Hierarchical Navigable Small World) graphs \cite{malkov2018efficient} and FAISS \cite{johnson2019billion} represent the state-of-the-art for vector retrieval, offering sub-linear query complexity. However, these approaches require loading complete indices into RAM-often gigabytes for modest corpora-restricting deployment to high-specification servers. Furthermore, vector similarity provides limited explainability: a result matches because its embedding is "close" to the query, but the specific reasoning remains opaque. STAR addresses these limitations through sparse graph traversal, enabling CPU-only deployment on resource-constrained devices while providing explicit tag-based provenance for every result.
+\subsection{Graph-Based Memory Systems}
+Recent work has explored graph structures as alternatives to dense vectors. T-Retriever \cite{wei2026tretriever} introduces tree-based hierarchical retrieval using semantic-structural entropy for encoding textual graphs. While effective for hierarchical document structures, T-Retriever does not incorporate temporal decay-a key requirement for personal memory systems where recency matters. PersonalAI \cite{menschikov2025personalai} proposes a knowledge graph framework with hyper-edges for personalized LLM agents, achieving strong results on TriviaQA and HotpotQA benchmarks. However, PersonalAI focuses on framework design rather than production implementation; STAR contributes a complete, deployed system with validated performance on 28M tokens of real-world data.
+Our bipartite graph approach (Atoms $\times$ Tags) differs from general knowledge graphs by enforcing a strict separation between content and metadata. This enables O(1) deduplication via SimHash \cite{charikar2002similar} and supports disposable index architectures where the database can be rebuilt entirely from the source-of-truth filesystem.
+\subsection{Personal AI Memory Systems}
+The advent of large context windows has renewed interest in personal AI memory. Second Me \cite{wei2025second} proposes LLM-based memory parameterization, using language models themselves to structure and retrieve personal knowledge. While powerful, this approach requires significant computational resources and offers limited explainability. STAR achieves similar associative retrieval goals through deterministic physics-based scoring, enabling deployment on 4GB RAM laptops without GPU acceleration.
+Cognitive AI frameworks \cite{salas2025cognitive} emphasize governed memory architectures for long-term coherence. STAR's ephemeral index design (Standard 110) aligns with these principles while adding practical constraints for local-first deployment: zero cloud dependencies, AGPL-3.0 licensing, and real-world validation.
+\subsection{Temporal Information Retrieval}
+Temporal decay has been explored in web archive search \cite{kanhabua2008surviving} and recency-weighted ranking, but is rarely integrated into RAG systems as a fundamental scoring component. STAR's Unified Field Equation (Equation~\ref{eq:unified_field}) embeds temporal decay multiplicatively alongside semantic and structural factors, ensuring that any zero factor eliminates irrelevant results. This differs from additive scoring approaches where weak signals can accumulate noise.
+\subsection{Local-First and Edge Computing}
+The local-first software movement \cite{haque2023local} emphasizes user data ownership and offline capability. STAR's browser paradigm extends these principles to AI memory: just as browsers render content without downloading the entire internet, STAR retrieves context without loading complete vector indices. This enables sovereign operation-users maintain complete control over their data without cloud dependencies.
+\subsection{Summary of Contributions}
+STAR distinguishes itself from prior work through:
+\begin{enumerate}
+    \item \textbf{Sparse Graph Physics:} Multiplicative scoring combining co-occurrence, temporal decay, and SimHash similarity (Section \ref{sec:math}).
+    \item \textbf{Browser Paradigm:} Sharded atomization enabling resource-constrained devices to navigate large corpora (Section \ref{sec:architecture}).
+    \item \textbf{Production Validation:} Real-world deployment with 28M tokens, $<$200ms p95 latency, and 4GB RAM compatibility (Section \ref{sec:benchmarks}).
+    \item \textbf{Explainable Retrieval:} Tag paths provide deterministic provenance for every result (Section \ref{sec:retrieval}).
 \end{enumerate}

package/docs/arxiv/compile.bat CHANGED Viewed

@@ -1,48 +1,48 @@
-@echo off
-REM STAR Whitepaper Compilation Script
-REM Run this 4 times for references to resolve
-echo ========================================
-echo STAR Whitepaper - Compilation Script
-echo ========================================
-echo.
-cd /d "%~dp0"
-echo Step 1/4: First pdflatex pass...
-pdflatex -interaction=nonstopmode star-whitepaper.tex
-if errorlevel 1 (
-    echo ERROR: First pdflatex pass failed!
-    pause
-    exit /b 1
-)
-echo.
-echo Step 2/4: Running bibtex...
-bibtex star-whitepaper
-if errorlevel 1 (
-    echo ERROR: BibTeX failed!
-    pause
-    exit /b 1
-)
-echo.
-echo Step 3/4: Second pdflatex pass...
-pdflatex -interaction=nonstopmode star-whitepaper.tex
-echo.
-echo Step 4/4: Third pdflatex pass (final)...
-pdflatex -interaction=nonstopmode star-whitepaper.tex
-echo.
-echo ========================================
-echo Compilation Complete!
-echo ========================================
-echo.
-echo Output: star-whitepaper.pdf
-echo.
-echo If you see "Label(s) may have changed" warnings,
-echo run this script one more time.
-echo.
-pause
+@echo off
+REM STAR Whitepaper Compilation Script
+REM Run this 4 times for references to resolve
+echo ========================================
+echo STAR Whitepaper - Compilation Script
+echo ========================================
+echo.
+cd /d "%~dp0"
+echo Step 1/4: First pdflatex pass...
+pdflatex -interaction=nonstopmode star-whitepaper.tex
+if errorlevel 1 (
+    echo ERROR: First pdflatex pass failed!
+    pause
+    exit /b 1
+)
+echo.
+echo Step 2/4: Running bibtex...
+bibtex star-whitepaper
+if errorlevel 1 (
+    echo ERROR: BibTeX failed!
+    pause
+    exit /b 1
+)
+echo.
+echo Step 3/4: Second pdflatex pass...
+pdflatex -interaction=nonstopmode star-whitepaper.tex
+echo.
+echo Step 4/4: Third pdflatex pass (final)...
+pdflatex -interaction=nonstopmode star-whitepaper.tex
+echo.
+echo ========================================
+echo Compilation Complete!
+echo ========================================
+echo.
+echo Output: star-whitepaper.pdf
+echo.
+echo If you see "Label(s) may have changed" warnings,
+echo run this script one more time.
+echo.
+pause

package/docs/arxiv/joss_response.md CHANGED Viewed

@@ -1,33 +1,33 @@
-# Response to JOSS Editor
-**To:** @danielskatz
-**From:** @RSBalchII
-**Date:** 2026-02-25
-Thank you for your help with the PDF generation and for your guidance on the submission requirements.
-## Research Software Qualification
-To answer your question regarding how STAR qualifies as research software:
-As an independent researcher, my work focuses on Information Retrieval and Personal Knowledge Management—specifically solving the problem of deploying large-scale context retrieval systems on resource-constrained, consumer‑grade hardware without relying on cloud APIs.
-STAR (and its reference implementation, the Anchor Engine) qualifies as research software because it embodies a novel algorithmic research contribution. The software is the practical realization of a new retrieval model (what I term the “Unified Field Equation”) that uses a sparse bipartite graph, temporal decay, and SimHash deduplication to replace computationally expensive dense‑vector ANN combinations (like HNSW/FAISS). The performance benchmarks and the novel $O(k \cdot \bar{d})$ complexity scaling presented in the paper were exclusively generated, validated, and tracked using this software.
-Because the project is very new (the core research and implementation began in August 2025), it has not yet been cited in formal external publications. However, it was built explicitly for the research purpose of providing an explainable, scalable alternative to opaque dense vector indices, enabling researchers to index and traverse massive text corpora (e.g., my 28 M‑token validation dataset) directly on standard laptops.
-## Word‑Count Compliance
-I have trimmed the `paper.md` from 2720 words to **1219 words** (measured by `wc -w`), well within the JOSS limit of 1750 words. The revisions preserve all key technical content while removing redundant descriptions and condensing tables.
-## DOI Updates
-Missing DOIs have been added to `paper.bib` for all cited works where a DOI is available. The editorialbot’s reference check should now pass.
-## Next Steps
-If the research‑software qualification is satisfactory, I am ready to proceed with the review. Please let me know if any further clarifications or adjustments are needed.
-Thank you again for your time and assistance.
+# Response to JOSS Editor
+**To:** @danielskatz
+**From:** @RSBalchII
+**Date:** 2026-02-25
+Thank you for your help with the PDF generation and for your guidance on the submission requirements.
+## Research Software Qualification
+To answer your question regarding how STAR qualifies as research software:
+As an independent researcher, my work focuses on Information Retrieval and Personal Knowledge Management—specifically solving the problem of deploying large-scale context retrieval systems on resource-constrained, consumer‑grade hardware without relying on cloud APIs.
+STAR (and its reference implementation, the Anchor Engine) qualifies as research software because it embodies a novel algorithmic research contribution. The software is the practical realization of a new retrieval model (what I term the “Unified Field Equation”) that uses a sparse bipartite graph, temporal decay, and SimHash deduplication to replace computationally expensive dense‑vector ANN combinations (like HNSW/FAISS). The performance benchmarks and the novel $O(k \cdot \bar{d})$ complexity scaling presented in the paper were exclusively generated, validated, and tracked using this software.
+Because the project is very new (the core research and implementation began in August 2025), it has not yet been cited in formal external publications. However, it was built explicitly for the research purpose of providing an explainable, scalable alternative to opaque dense vector indices, enabling researchers to index and traverse massive text corpora (e.g., my 28 M‑token validation dataset) directly on standard laptops.
+## Word‑Count Compliance
+I have trimmed the `paper.md` from 2720 words to **1219 words** (measured by `wc -w`), well within the JOSS limit of 1750 words. The revisions preserve all key technical content while removing redundant descriptions and condensing tables.
+## DOI Updates
+Missing DOIs have been added to `paper.bib` for all cited works where a DOI is available. The editorialbot’s reference check should now pass.
+## Next Steps
+If the research‑software qualification is satisfactory, I am ready to proceed with the review. Please let me know if any further clarifications or adjustments are needed.
+Thank you again for your time and assistance.
 —R.S. Balch II

package/docs/arxiv/prepare-submission.bat CHANGED Viewed

@@ -1,46 +1,46 @@
-@echo off
-REM Prepare arXiv Submission Package
-REM Creates a clean zip file with only necessary files
-echo ========================================
-echo Preparing arXiv Submission Package
-echo ========================================
-echo.
-cd /d "%~dp0"
-REM Create submission directory
-set SUBMISSION_DIR=star-arxiv-submission
-if exist "%SUBMISSION_DIR%" (
-    echo Cleaning up old submission directory...
-    rmdir /s /q "%SUBMISSION_DIR%"
-)
-echo Creating submission directory...
-mkdir "%SUBMISSION_DIR%"
-echo.
-echo Copying files...
-copy star-whitepaper.tex "%SUBMISSION_DIR%\"
-copy BIBLIOGRAPHY.bib "%SUBMISSION_DIR%\"
-copy star-whitepaper.pdf "%SUBMISSION_DIR%\" 2>nul
-echo.
-echo ========================================
-echo Submission Package Ready!
-echo ========================================
-echo.
-echo Files in %SUBMISSION_DIR%:
-dir /b "%SUBMISSION_DIR%"
-echo.
-echo Next steps:
-echo 1. Upload %SUBMISSION_DIR% contents to arxiv.org/submit
-echo 2. Metadata:
-echo    - Title: STAR: Semantic Temporal Associative Retrieval
-echo    - Authors: R.S. Balch II
-echo    - Categories: cs.IR (primary), cs.AI (secondary)
-echo    - Comments: 28M token production deployment; 10 pages; 5 figures
-echo    - Keywords: Information Retrieval, Graph-Based Search, Local-First AI
-echo.
-pause
+@echo off
+REM Prepare arXiv Submission Package
+REM Creates a clean zip file with only necessary files
+echo ========================================
+echo Preparing arXiv Submission Package
+echo ========================================
+echo.
+cd /d "%~dp0"
+REM Create submission directory
+set SUBMISSION_DIR=star-arxiv-submission
+if exist "%SUBMISSION_DIR%" (
+    echo Cleaning up old submission directory...
+    rmdir /s /q "%SUBMISSION_DIR%"
+)
+echo Creating submission directory...
+mkdir "%SUBMISSION_DIR%"
+echo.
+echo Copying files...
+copy star-whitepaper.tex "%SUBMISSION_DIR%\"
+copy BIBLIOGRAPHY.bib "%SUBMISSION_DIR%\"
+copy star-whitepaper.pdf "%SUBMISSION_DIR%\" 2>nul
+echo.
+echo ========================================
+echo Submission Package Ready!
+echo ========================================
+echo.
+echo Files in %SUBMISSION_DIR%:
+dir /b "%SUBMISSION_DIR%"
+echo.
+echo Next steps:
+echo 1. Upload %SUBMISSION_DIR% contents to arxiv.org/submit
+echo 2. Metadata:
+echo    - Title: STAR: Semantic Temporal Associative Retrieval
+echo    - Authors: R.S. Balch II
+echo    - Categories: cs.IR (primary), cs.AI (secondary)
+echo    - Comments: 28M token production deployment; 10 pages; 5 figures
+echo    - Keywords: Information Retrieval, Graph-Based Search, Local-First AI
+echo.
+pause