@rbalchii/anchor-engine 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +609 -0
- package/README.md +317 -0
- package/anchor.bat +5 -0
- package/docs/API.md +314 -0
- package/docs/DEPLOYMENT.md +448 -0
- package/docs/INDEX.md +226 -0
- package/docs/STAR_Whitepaper_Executive.md +216 -0
- package/docs/TROUBLESHOOTING.md +535 -0
- package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
- package/docs/archive/adoption-guide.md +264 -0
- package/docs/archive/adoption-preparation.md +179 -0
- package/docs/archive/agent-harness-integration.md +227 -0
- package/docs/archive/api-reference.md +106 -0
- package/docs/archive/api_flows_diagram.md +118 -0
- package/docs/archive/architecture.md +410 -0
- package/docs/archive/architecture_diagram.md +174 -0
- package/docs/archive/broader-adoption-preparation.md +175 -0
- package/docs/archive/browser-paradigm-architecture.md +163 -0
- package/docs/archive/chat-integration.md +124 -0
- package/docs/archive/community-adoption-materials.md +103 -0
- package/docs/archive/community-adoption.md +147 -0
- package/docs/archive/comparison-with-siloed-solutions.md +192 -0
- package/docs/archive/comprehensive-docs.md +156 -0
- package/docs/archive/data_flow_diagram.md +251 -0
- package/docs/archive/enhancement-implementation-summary.md +146 -0
- package/docs/archive/evolution-summary.md +141 -0
- package/docs/archive/ingestion_pipeline_diagram.md +198 -0
- package/docs/archive/native-module-profiling-results.md +135 -0
- package/docs/archive/positioning-document.md +158 -0
- package/docs/archive/positioning.md +175 -0
- package/docs/archive/query-builder-documentation.md +218 -0
- package/docs/archive/quick-reference.md +40 -0
- package/docs/archive/quickstart.md +63 -0
- package/docs/archive/relationship-narrative-discovery.md +141 -0
- package/docs/archive/search-logic-improvement-plan.md +336 -0
- package/docs/archive/search_architecture_diagram.md +212 -0
- package/docs/archive/semantic-architecture-guide.md +97 -0
- package/docs/archive/sequence-diagrams.md +128 -0
- package/docs/archive/system_components_diagram.md +296 -0
- package/docs/archive/test-framework-integration.md +109 -0
- package/docs/archive/testing-framework-documentation.md +397 -0
- package/docs/archive/testing-framework-summary.md +121 -0
- package/docs/archive/testing-framework.md +377 -0
- package/docs/archive/ui-architecture.md +75 -0
- package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
- package/docs/arxiv/RELATED_WORK.tex +39 -0
- package/docs/arxiv/compile.bat +48 -0
- package/docs/arxiv/joss_response.md +33 -0
- package/docs/arxiv/prepare-submission.bat +46 -0
- package/docs/arxiv/review.md +128 -0
- package/docs/arxiv/star-whitepaper.tex +657 -0
- package/docs/code-patterns.md +289 -0
- package/docs/whitepaper.md +445 -0
- package/engine/dist/agent/runtime.d.ts +41 -0
- package/engine/dist/agent/runtime.d.ts.map +1 -0
- package/engine/dist/agent/runtime.js +73 -0
- package/engine/dist/agent/runtime.js.map +1 -0
- package/engine/dist/commands/audit-tags.d.ts +14 -0
- package/engine/dist/commands/audit-tags.d.ts.map +1 -0
- package/engine/dist/commands/audit-tags.js +180 -0
- package/engine/dist/commands/audit-tags.js.map +1 -0
- package/engine/dist/commands/distill.d.ts +19 -0
- package/engine/dist/commands/distill.d.ts.map +1 -0
- package/engine/dist/commands/distill.js +114 -0
- package/engine/dist/commands/distill.js.map +1 -0
- package/engine/dist/commands/generate-synonyms.d.ts +14 -0
- package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
- package/engine/dist/commands/generate-synonyms.js +91 -0
- package/engine/dist/commands/generate-synonyms.js.map +1 -0
- package/engine/dist/config/index.d.ts +115 -0
- package/engine/dist/config/index.d.ts.map +1 -0
- package/engine/dist/config/index.js +326 -0
- package/engine/dist/config/index.js.map +1 -0
- package/engine/dist/config/max-recall-config.d.ts +102 -0
- package/engine/dist/config/max-recall-config.d.ts.map +1 -0
- package/engine/dist/config/max-recall-config.js +102 -0
- package/engine/dist/config/max-recall-config.js.map +1 -0
- package/engine/dist/config/paths.d.ts +40 -0
- package/engine/dist/config/paths.d.ts.map +1 -0
- package/engine/dist/config/paths.js +49 -0
- package/engine/dist/config/paths.js.map +1 -0
- package/engine/dist/core/batch.d.ts +19 -0
- package/engine/dist/core/batch.d.ts.map +1 -0
- package/engine/dist/core/batch.js +37 -0
- package/engine/dist/core/batch.js.map +1 -0
- package/engine/dist/core/db.d.ts +58 -0
- package/engine/dist/core/db.d.ts.map +1 -0
- package/engine/dist/core/db.js +563 -0
- package/engine/dist/core/db.js.map +1 -0
- package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
- package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/ChatWorker.js +28 -0
- package/engine/dist/core/inference/ChatWorker.js.map +1 -0
- package/engine/dist/core/inference/context_manager.d.ts +49 -0
- package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
- package/engine/dist/core/inference/context_manager.js +199 -0
- package/engine/dist/core/inference/context_manager.js.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
- package/engine/dist/core/vector.d.ts +40 -0
- package/engine/dist/core/vector.d.ts.map +1 -0
- package/engine/dist/core/vector.js +167 -0
- package/engine/dist/core/vector.js.map +1 -0
- package/engine/dist/index.d.ts +4 -0
- package/engine/dist/index.d.ts.map +1 -0
- package/engine/dist/index.js +400 -0
- package/engine/dist/index.js.map +1 -0
- package/engine/dist/middleware/auth.d.ts +14 -0
- package/engine/dist/middleware/auth.d.ts.map +1 -0
- package/engine/dist/middleware/auth.js +44 -0
- package/engine/dist/middleware/auth.js.map +1 -0
- package/engine/dist/middleware/request-tracing.d.ts +29 -0
- package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
- package/engine/dist/middleware/request-tracing.js +115 -0
- package/engine/dist/middleware/request-tracing.js.map +1 -0
- package/engine/dist/middleware/validate.d.ts +30 -0
- package/engine/dist/middleware/validate.d.ts.map +1 -0
- package/engine/dist/middleware/validate.js +117 -0
- package/engine/dist/middleware/validate.js.map +1 -0
- package/engine/dist/native/index.d.ts +106 -0
- package/engine/dist/native/index.d.ts.map +1 -0
- package/engine/dist/native/index.js +230 -0
- package/engine/dist/native/index.js.map +1 -0
- package/engine/dist/native/types.d.ts +45 -0
- package/engine/dist/native/types.d.ts.map +1 -0
- package/engine/dist/native/types.js +6 -0
- package/engine/dist/native/types.js.map +1 -0
- package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
- package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/atomization-profiling.js +108 -0
- package/engine/dist/profiling/atomization-profiling.js.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.js +249 -0
- package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
- package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
- package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
- package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/simhash-profiling.js +168 -0
- package/engine/dist/profiling/simhash-profiling.js.map +1 -0
- package/engine/dist/routes/api.d.ts +9 -0
- package/engine/dist/routes/api.d.ts.map +1 -0
- package/engine/dist/routes/api.js +37 -0
- package/engine/dist/routes/api.js.map +1 -0
- package/engine/dist/routes/enhanced-api.d.ts +9 -0
- package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
- package/engine/dist/routes/enhanced-api.js +139 -0
- package/engine/dist/routes/enhanced-api.js.map +1 -0
- package/engine/dist/routes/health.d.ts +8 -0
- package/engine/dist/routes/health.d.ts.map +1 -0
- package/engine/dist/routes/health.js +89 -0
- package/engine/dist/routes/health.js.map +1 -0
- package/engine/dist/routes/monitoring.d.ts +8 -0
- package/engine/dist/routes/monitoring.d.ts.map +1 -0
- package/engine/dist/routes/monitoring.js +509 -0
- package/engine/dist/routes/monitoring.js.map +1 -0
- package/engine/dist/routes/v1/admin.d.ts +3 -0
- package/engine/dist/routes/v1/admin.d.ts.map +1 -0
- package/engine/dist/routes/v1/admin.js +261 -0
- package/engine/dist/routes/v1/admin.js.map +1 -0
- package/engine/dist/routes/v1/atoms.d.ts +3 -0
- package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
- package/engine/dist/routes/v1/atoms.js +172 -0
- package/engine/dist/routes/v1/atoms.js.map +1 -0
- package/engine/dist/routes/v1/backup.d.ts +3 -0
- package/engine/dist/routes/v1/backup.d.ts.map +1 -0
- package/engine/dist/routes/v1/backup.js +100 -0
- package/engine/dist/routes/v1/backup.js.map +1 -0
- package/engine/dist/routes/v1/git.d.ts +3 -0
- package/engine/dist/routes/v1/git.d.ts.map +1 -0
- package/engine/dist/routes/v1/git.js +316 -0
- package/engine/dist/routes/v1/git.js.map +1 -0
- package/engine/dist/routes/v1/ingest.d.ts +3 -0
- package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
- package/engine/dist/routes/v1/ingest.js +66 -0
- package/engine/dist/routes/v1/ingest.js.map +1 -0
- package/engine/dist/routes/v1/memory.d.ts +14 -0
- package/engine/dist/routes/v1/memory.d.ts.map +1 -0
- package/engine/dist/routes/v1/memory.js +87 -0
- package/engine/dist/routes/v1/memory.js.map +1 -0
- package/engine/dist/routes/v1/research.d.ts +3 -0
- package/engine/dist/routes/v1/research.d.ts.map +1 -0
- package/engine/dist/routes/v1/research.js +109 -0
- package/engine/dist/routes/v1/research.js.map +1 -0
- package/engine/dist/routes/v1/search.d.ts +3 -0
- package/engine/dist/routes/v1/search.d.ts.map +1 -0
- package/engine/dist/routes/v1/search.js +180 -0
- package/engine/dist/routes/v1/search.js.map +1 -0
- package/engine/dist/routes/v1/settings.d.ts +8 -0
- package/engine/dist/routes/v1/settings.d.ts.map +1 -0
- package/engine/dist/routes/v1/settings.js +211 -0
- package/engine/dist/routes/v1/settings.js.map +1 -0
- package/engine/dist/routes/v1/system.d.ts +3 -0
- package/engine/dist/routes/v1/system.d.ts.map +1 -0
- package/engine/dist/routes/v1/system.js +326 -0
- package/engine/dist/routes/v1/system.js.map +1 -0
- package/engine/dist/routes/v1/tags.d.ts +3 -0
- package/engine/dist/routes/v1/tags.d.ts.map +1 -0
- package/engine/dist/routes/v1/tags.js +102 -0
- package/engine/dist/routes/v1/tags.js.map +1 -0
- package/engine/dist/server-8080.d.ts +2 -0
- package/engine/dist/server-8080.d.ts.map +1 -0
- package/engine/dist/server-8080.js +74 -0
- package/engine/dist/server-8080.js.map +1 -0
- package/engine/dist/services/backup/backup-restore.d.ts +37 -0
- package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
- package/engine/dist/services/backup/backup-restore.js +385 -0
- package/engine/dist/services/backup/backup-restore.js.map +1 -0
- package/engine/dist/services/backup/backup.d.ts +14 -0
- package/engine/dist/services/backup/backup.d.ts.map +1 -0
- package/engine/dist/services/backup/backup.js +442 -0
- package/engine/dist/services/backup/backup.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.js +394 -0
- package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
- package/engine/dist/services/health-check-enhanced.d.ts +89 -0
- package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
- package/engine/dist/services/health-check-enhanced.js +417 -0
- package/engine/dist/services/health-check-enhanced.js.map +1 -0
- package/engine/dist/services/idle-manager.d.ts +56 -0
- package/engine/dist/services/idle-manager.d.ts.map +1 -0
- package/engine/dist/services/idle-manager.js +210 -0
- package/engine/dist/services/idle-manager.js.map +1 -0
- package/engine/dist/services/inference/inference-service.d.ts +27 -0
- package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
- package/engine/dist/services/inference/inference-service.js +89 -0
- package/engine/dist/services/inference/inference-service.js.map +1 -0
- package/engine/dist/services/inference/inference.d.ts +59 -0
- package/engine/dist/services/inference/inference.d.ts.map +1 -0
- package/engine/dist/services/inference/inference.js +131 -0
- package/engine/dist/services/inference/inference.js.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.js +982 -0
- package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.js +166 -0
- package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.js +537 -0
- package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.js +437 -0
- package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
- package/engine/dist/services/ingest/ingest.d.ts +50 -0
- package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest.js +230 -0
- package/engine/dist/services/ingest/ingest.js.map +1 -0
- package/engine/dist/services/ingest/watchdog.d.ts +31 -0
- package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
- package/engine/dist/services/ingest/watchdog.js +400 -0
- package/engine/dist/services/ingest/watchdog.js.map +1 -0
- package/engine/dist/services/llm/context.d.ts +6 -0
- package/engine/dist/services/llm/context.d.ts.map +1 -0
- package/engine/dist/services/llm/context.js +80 -0
- package/engine/dist/services/llm/context.js.map +1 -0
- package/engine/dist/services/llm/provider.d.ts +23 -0
- package/engine/dist/services/llm/provider.d.ts.map +1 -0
- package/engine/dist/services/llm/provider.js +338 -0
- package/engine/dist/services/llm/provider.js.map +1 -0
- package/engine/dist/services/llm/reader.d.ts +12 -0
- package/engine/dist/services/llm/reader.d.ts.map +1 -0
- package/engine/dist/services/llm/reader.js +40 -0
- package/engine/dist/services/llm/reader.js.map +1 -0
- package/engine/dist/services/mirror/mirror.d.ts +28 -0
- package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
- package/engine/dist/services/mirror/mirror.js +208 -0
- package/engine/dist/services/mirror/mirror.js.map +1 -0
- package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
- package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
- package/engine/dist/services/nlp/nlp-service.js +151 -0
- package/engine/dist/services/nlp/nlp-service.js.map +1 -0
- package/engine/dist/services/nlp/query-parser.d.ts +9 -0
- package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
- package/engine/dist/services/nlp/query-parser.js +29 -0
- package/engine/dist/services/nlp/query-parser.js.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.js +263 -0
- package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
- package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
- package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
- package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
- package/engine/dist/services/query-builder/utils/export.js +130 -0
- package/engine/dist/services/query-builder/utils/export.js.map +1 -0
- package/engine/dist/services/research/researcher.d.ts +15 -0
- package/engine/dist/services/research/researcher.d.ts.map +1 -0
- package/engine/dist/services/research/researcher.js +123 -0
- package/engine/dist/services/research/researcher.js.map +1 -0
- package/engine/dist/services/scribe/scribe.d.ts +43 -0
- package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
- package/engine/dist/services/scribe/scribe.js +135 -0
- package/engine/dist/services/scribe/scribe.js.map +1 -0
- package/engine/dist/services/search/bright-nodes.d.ts +41 -0
- package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
- package/engine/dist/services/search/bright-nodes.js +117 -0
- package/engine/dist/services/search/bright-nodes.js.map +1 -0
- package/engine/dist/services/search/context-inflator.d.ts +63 -0
- package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
- package/engine/dist/services/search/context-inflator.js +649 -0
- package/engine/dist/services/search/context-inflator.js.map +1 -0
- package/engine/dist/services/search/context-manager.d.ts +34 -0
- package/engine/dist/services/search/context-manager.d.ts.map +1 -0
- package/engine/dist/services/search/context-manager.js +124 -0
- package/engine/dist/services/search/context-manager.js.map +1 -0
- package/engine/dist/services/search/distributed-query.d.ts +38 -0
- package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
- package/engine/dist/services/search/distributed-query.js +105 -0
- package/engine/dist/services/search/distributed-query.js.map +1 -0
- package/engine/dist/services/search/explore.d.ts +73 -0
- package/engine/dist/services/search/explore.d.ts.map +1 -0
- package/engine/dist/services/search/explore.js +388 -0
- package/engine/dist/services/search/explore.js.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.js +435 -0
- package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.js +394 -0
- package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.js +611 -0
- package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
- package/engine/dist/services/search/query-parser.d.ts +66 -0
- package/engine/dist/services/search/query-parser.d.ts.map +1 -0
- package/engine/dist/services/search/query-parser.js +346 -0
- package/engine/dist/services/search/query-parser.js.map +1 -0
- package/engine/dist/services/search/search-utils.d.ts +100 -0
- package/engine/dist/services/search/search-utils.d.ts.map +1 -0
- package/engine/dist/services/search/search-utils.js +473 -0
- package/engine/dist/services/search/search-utils.js.map +1 -0
- package/engine/dist/services/search/search.d.ts +116 -0
- package/engine/dist/services/search/search.d.ts.map +1 -0
- package/engine/dist/services/search/search.js +1286 -0
- package/engine/dist/services/search/search.js.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
- package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
- package/engine/dist/services/search/streaming-search.d.ts +51 -0
- package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
- package/engine/dist/services/search/streaming-search.js +94 -0
- package/engine/dist/services/search/streaming-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
- package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
- package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-search.js +649 -0
- package/engine/dist/services/semantic/semantic-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
- package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
- package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
- package/engine/dist/services/semantic/types/semantic.js +7 -0
- package/engine/dist/services/semantic/types/semantic.js.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
- package/engine/dist/services/system-status.d.ts +68 -0
- package/engine/dist/services/system-status.d.ts.map +1 -0
- package/engine/dist/services/system-status.js +107 -0
- package/engine/dist/services/system-status.js.map +1 -0
- package/engine/dist/services/tags/discovery.d.ts +16 -0
- package/engine/dist/services/tags/discovery.d.ts.map +1 -0
- package/engine/dist/services/tags/discovery.js +206 -0
- package/engine/dist/services/tags/discovery.js.map +1 -0
- package/engine/dist/services/tags/gliner.d.ts +18 -0
- package/engine/dist/services/tags/gliner.d.ts.map +1 -0
- package/engine/dist/services/tags/gliner.js +119 -0
- package/engine/dist/services/tags/gliner.js.map +1 -0
- package/engine/dist/services/tags/infector.d.ts +21 -0
- package/engine/dist/services/tags/infector.d.ts.map +1 -0
- package/engine/dist/services/tags/infector.js +168 -0
- package/engine/dist/services/tags/infector.js.map +1 -0
- package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
- package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
- package/engine/dist/services/tags/tag-auditor.js +283 -0
- package/engine/dist/services/tags/tag-auditor.js.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
- package/engine/dist/services/vision/vision_service.d.ts +4 -0
- package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
- package/engine/dist/services/vision/vision_service.js +197 -0
- package/engine/dist/services/vision/vision_service.js.map +1 -0
- package/engine/dist/test-framework/core.d.ts +133 -0
- package/engine/dist/test-framework/core.d.ts.map +1 -0
- package/engine/dist/test-framework/core.js +313 -0
- package/engine/dist/test-framework/core.js.map +1 -0
- package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
- package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
- package/engine/dist/test-framework/dataset-runner.js +223 -0
- package/engine/dist/test-framework/dataset-runner.js.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.js +283 -0
- package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.js +331 -0
- package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
- package/engine/dist/types/api.d.ts +53 -0
- package/engine/dist/types/api.d.ts.map +1 -0
- package/engine/dist/types/api.js +2 -0
- package/engine/dist/types/api.js.map +1 -0
- package/engine/dist/types/atomic.d.ts +42 -0
- package/engine/dist/types/atomic.d.ts.map +1 -0
- package/engine/dist/types/atomic.js +10 -0
- package/engine/dist/types/atomic.js.map +1 -0
- package/engine/dist/types/context-protocol.d.ts +137 -0
- package/engine/dist/types/context-protocol.d.ts.map +1 -0
- package/engine/dist/types/context-protocol.js +28 -0
- package/engine/dist/types/context-protocol.js.map +1 -0
- package/engine/dist/types/context.d.ts +2 -0
- package/engine/dist/types/context.d.ts.map +1 -0
- package/engine/dist/types/context.js +2 -0
- package/engine/dist/types/context.js.map +1 -0
- package/engine/dist/types/index.d.ts +20 -0
- package/engine/dist/types/index.d.ts.map +1 -0
- package/engine/dist/types/index.js +18 -0
- package/engine/dist/types/index.js.map +1 -0
- package/engine/dist/types/search.d.ts +31 -0
- package/engine/dist/types/search.d.ts.map +1 -0
- package/engine/dist/types/search.js +2 -0
- package/engine/dist/types/search.js.map +1 -0
- package/engine/dist/types/taxonomy.d.ts +137 -0
- package/engine/dist/types/taxonomy.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.js +138 -0
- package/engine/dist/types/taxonomy.js.map +1 -0
- package/engine/dist/types/taxonomy.simple.d.ts +131 -0
- package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.simple.js +132 -0
- package/engine/dist/types/taxonomy.simple.js.map +1 -0
- package/engine/dist/types/tool-call.d.ts +16 -0
- package/engine/dist/types/tool-call.d.ts.map +1 -0
- package/engine/dist/types/tool-call.js +6 -0
- package/engine/dist/types/tool-call.js.map +1 -0
- package/engine/dist/types/trace.d.ts +25 -0
- package/engine/dist/types/trace.d.ts.map +1 -0
- package/engine/dist/types/trace.js +5 -0
- package/engine/dist/types/trace.js.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.js +266 -0
- package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
- package/engine/dist/utils/date_extractor.d.ts +2 -0
- package/engine/dist/utils/date_extractor.d.ts.map +1 -0
- package/engine/dist/utils/date_extractor.js +32 -0
- package/engine/dist/utils/date_extractor.js.map +1 -0
- package/engine/dist/utils/native-module-manager.d.ts +48 -0
- package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
- package/engine/dist/utils/native-module-manager.js +265 -0
- package/engine/dist/utils/native-module-manager.js.map +1 -0
- package/engine/dist/utils/native-module-profiler.d.ts +66 -0
- package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
- package/engine/dist/utils/native-module-profiler.js +182 -0
- package/engine/dist/utils/native-module-profiler.js.map +1 -0
- package/engine/dist/utils/path-manager.d.ts +59 -0
- package/engine/dist/utils/path-manager.d.ts.map +1 -0
- package/engine/dist/utils/path-manager.js +154 -0
- package/engine/dist/utils/path-manager.js.map +1 -0
- package/engine/dist/utils/performance-monitor.d.ts +92 -0
- package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
- package/engine/dist/utils/performance-monitor.js +221 -0
- package/engine/dist/utils/performance-monitor.js.map +1 -0
- package/engine/dist/utils/process-manager.d.ts +18 -0
- package/engine/dist/utils/process-manager.d.ts.map +1 -0
- package/engine/dist/utils/process-manager.js +100 -0
- package/engine/dist/utils/process-manager.js.map +1 -0
- package/engine/dist/utils/request-tracer.d.ts +131 -0
- package/engine/dist/utils/request-tracer.d.ts.map +1 -0
- package/engine/dist/utils/request-tracer.js +414 -0
- package/engine/dist/utils/request-tracer.js.map +1 -0
- package/engine/dist/utils/resource-manager.d.ts +108 -0
- package/engine/dist/utils/resource-manager.d.ts.map +1 -0
- package/engine/dist/utils/resource-manager.js +235 -0
- package/engine/dist/utils/resource-manager.js.map +1 -0
- package/engine/dist/utils/safe-dns.d.ts +14 -0
- package/engine/dist/utils/safe-dns.d.ts.map +1 -0
- package/engine/dist/utils/safe-dns.js +105 -0
- package/engine/dist/utils/safe-dns.js.map +1 -0
- package/engine/dist/utils/structured-logger.d.ts +124 -0
- package/engine/dist/utils/structured-logger.d.ts.map +1 -0
- package/engine/dist/utils/structured-logger.js +332 -0
- package/engine/dist/utils/structured-logger.js.map +1 -0
- package/engine/dist/utils/tag-cleanup.d.ts +11 -0
- package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
- package/engine/dist/utils/tag-cleanup.js +111 -0
- package/engine/dist/utils/tag-cleanup.js.map +1 -0
- package/engine/dist/utils/tag-filter.d.ts +19 -0
- package/engine/dist/utils/tag-filter.d.ts.map +1 -0
- package/engine/dist/utils/tag-filter.js +147 -0
- package/engine/dist/utils/tag-filter.js.map +1 -0
- package/engine/dist/utils/tag-modulation.d.ts +80 -0
- package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
- package/engine/dist/utils/tag-modulation.js +284 -0
- package/engine/dist/utils/tag-modulation.js.map +1 -0
- package/engine/dist/utils/timer.d.ts +40 -0
- package/engine/dist/utils/timer.d.ts.map +1 -0
- package/engine/dist/utils/timer.js +76 -0
- package/engine/dist/utils/timer.js.map +1 -0
- package/engine/dist/utils/token-utils.d.ts +19 -0
- package/engine/dist/utils/token-utils.d.ts.map +1 -0
- package/engine/dist/utils/token-utils.js +71 -0
- package/engine/dist/utils/token-utils.js.map +1 -0
- package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
- package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
- package/engine/dist/utils/wasm-module-loader.js +136 -0
- package/engine/dist/utils/wasm-module-loader.js.map +1 -0
- package/engine/package.json +105 -0
- package/package.json +106 -0
|
@@ -0,0 +1,657 @@
|
|
|
1
|
+
\documentclass[11pt]{article}
|
|
2
|
+
|
|
3
|
+
% Basic packages
|
|
4
|
+
\usepackage[utf8]{inputenc}
|
|
5
|
+
\usepackage[T1]{fontenc}
|
|
6
|
+
\usepackage{amsmath,amssymb,amsfonts}
|
|
7
|
+
\usepackage{graphicx}
|
|
8
|
+
\usepackage{hyperref}
|
|
9
|
+
\usepackage{booktabs}
|
|
10
|
+
\usepackage{geometry}
|
|
11
|
+
\usepackage{listings}
|
|
12
|
+
\usepackage{xcolor}
|
|
13
|
+
\usepackage{caption}
|
|
14
|
+
\usepackage{subcaption}
|
|
15
|
+
\usepackage{multirow}
|
|
16
|
+
\usepackage{array}
|
|
17
|
+
\usepackage[numbers]{natbib} % For citations
|
|
18
|
+
|
|
19
|
+
% Page layout
|
|
20
|
+
\geometry{letterpaper, margin=1in}
|
|
21
|
+
|
|
22
|
+
% Code listing style
|
|
23
|
+
\definecolor{codegreen}{rgb}{0,0.6,0}
|
|
24
|
+
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
|
|
25
|
+
\definecolor{codepurple}{rgb}{0.58,0,0.82}
|
|
26
|
+
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
|
|
27
|
+
|
|
28
|
+
\lstdefinestyle{mystyle}{
|
|
29
|
+
backgroundcolor=\color{backcolour},
|
|
30
|
+
commentstyle=\color{codegreen},
|
|
31
|
+
keywordstyle=\color{magenta},
|
|
32
|
+
numberstyle=\tiny\color{codegray},
|
|
33
|
+
stringstyle=\color{codepurple},
|
|
34
|
+
basicstyle=\ttfamily\footnotesize,
|
|
35
|
+
breakatwhitespace=false,
|
|
36
|
+
breaklines=true,
|
|
37
|
+
captionpos=b,
|
|
38
|
+
keepspaces=true,
|
|
39
|
+
numbers=left,
|
|
40
|
+
numbersep=5pt,
|
|
41
|
+
showspaces=false,
|
|
42
|
+
showstringspaces=false,
|
|
43
|
+
showtabs=false,
|
|
44
|
+
tabsize=2
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
\lstset{style=mystyle}
|
|
48
|
+
|
|
49
|
+
% Hyperref setup
|
|
50
|
+
\hypersetup{
|
|
51
|
+
colorlinks=true,
|
|
52
|
+
linkcolor=blue,
|
|
53
|
+
filecolor=magenta,
|
|
54
|
+
urlcolor=cyan,
|
|
55
|
+
pdftitle={STAR: Semantic Temporal Associative Retrieval},
|
|
56
|
+
pdfauthor={R.S. Balch II},
|
|
57
|
+
pdfsubject={Information Retrieval, Graph-Based Search, Local-First AI},
|
|
58
|
+
pdfkeywords={Information Retrieval, Graph-Based Search, SimHash, Local-First AI, Explainable AI, PGlite}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
% Title information
|
|
62
|
+
\title{STAR: Semantic Temporal Associative Retrieval\\
|
|
63
|
+
\large The Browser Paradigm for AI Memory}
|
|
64
|
+
|
|
65
|
+
\author{
|
|
66
|
+
R.S. Balch II\\
|
|
67
|
+
\texttt{rsbalchii@gmail.com}\\
|
|
68
|
+
\url{https://github.com/RSBalchII/anchor-engine-node}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
\date{\today}
|
|
72
|
+
|
|
73
|
+
\begin{document}
|
|
74
|
+
|
|
75
|
+
\maketitle
|
|
76
|
+
|
|
77
|
+
\begin{abstract}
|
|
78
|
+
AI memory is broken. To achieve serious context retrieval, practitioners need server racks, GPU budgets, and cloud subscriptions. Intelligence is locked in black boxes---massive vector indices consuming gigabytes of RAM and tying users to proprietary systems.
|
|
79
|
+
|
|
80
|
+
This paper presents \textbf{STAR} (Semantic Temporal Associative Retrieval), a novel retrieval algorithm implementing the ``Browser Paradigm'' for AI memory. Like a browser rendering websites by loading only necessary shards, STAR enables any device from \$200 laptops to supercomputers to navigate massive context by retrieving only atoms required for the current query.
|
|
81
|
+
|
|
82
|
+
We present the mathematical foundation, implementation details, and production benchmarks from real workloads: 91MB chat history ingested in under 3 minutes, 280,000 molecules indexed, zero data loss. STAR achieves $O(k \cdot \bar{d})$ retrieval complexity where $k$ = query tags and $\bar{d}$ = average tag degree, compared to $O(n \log n)$ for dense vector ANN.
|
|
83
|
+
|
|
84
|
+
The future of AI memory isn't bigger silos---it's universal, sharded utility running on hardware you already own.
|
|
85
|
+
\end{abstract}
|
|
86
|
+
|
|
87
|
+
\section{Introduction}
|
|
88
|
+
\label{sec:introduction}
|
|
89
|
+
|
|
90
|
+
Web browsers are universal. The same website renders identically on a \$300 Chromebook and a \$5000 MacBook Pro because browsers download only necessary shards (HTML, CSS, JS) for the current view---not the entire internet.
|
|
91
|
+
|
|
92
|
+
AI memory should operate similarly. Current Retrieval-Augmented Generation (RAG) systems require loading complete HNSW indices into RAM---gigabytes of vector data---before searching. This restricts deployment to high-spec servers, creating artificial scarcity.
|
|
93
|
+
|
|
94
|
+
\subsection{Contributions}
|
|
95
|
+
|
|
96
|
+
This paper makes the following contributions:
|
|
97
|
+
|
|
98
|
+
\begin{enumerate}
|
|
99
|
+
\item \textbf{STAR Algorithm}: Physics-based graph traversal with temporal decay and SimHash fingerprinting
|
|
100
|
+
\item \textbf{Browser Paradigm}: Sharded atomization enabling 4GB RAM laptops to navigate 10TB+ datasets
|
|
101
|
+
\item \textbf{Production Benchmarks}: Real-world performance on 100MB dataset (280K molecules, 151K atoms)
|
|
102
|
+
\item \textbf{SQL-Native Implementation}: Unified Field Equation executed in PGlite in $\sim$10ms
|
|
103
|
+
\end{enumerate}
|
|
104
|
+
|
|
105
|
+
\section{Mathematical Foundation}
|
|
106
|
+
\label{sec:math}
|
|
107
|
+
|
|
108
|
+
\subsection{Bipartite Graph Formalization}
|
|
109
|
+
|
|
110
|
+
Let $G = (A, T, E)$ be a bipartite graph where:
|
|
111
|
+
|
|
112
|
+
\begin{itemize}
|
|
113
|
+
\item \textbf{$A = \{a_1, a_2, \ldots, a_n\}$}: Set of \textit{Atoms} (text/code/data chunks with byte-offset pointers)
|
|
114
|
+
\item \textbf{$T = \{t_1, t_2, \ldots, t_m\}$}: Set of \textit{Tags} (extracted semantic entities/concepts)
|
|
115
|
+
\item \textbf{$E \subseteq A \times T$}: Sparse edges where $|E| \ll |A| \times |T|$
|
|
116
|
+
\end{itemize}
|
|
117
|
+
|
|
118
|
+
Our bipartite structure draws inspiration from PageRank's graph model \cite{brin1998anatomy}, adapted for personal knowledge graphs with explicit tag-based provenance.
|
|
119
|
+
|
|
120
|
+
Each atom $a_i \in A$ has:
|
|
121
|
+
\begin{itemize}
|
|
122
|
+
\item \textbf{Content pointer}: $(source_i, start_i, end_i)$ --- file path and byte offsets
|
|
123
|
+
\item \textbf{Tag set}: $T(a_i) = \{t \in T : (a_i, t) \in E\}$
|
|
124
|
+
\item \textbf{Timestamp}: $\tau_i \in \mathbb{R}^+$ (Unix epoch)
|
|
125
|
+
\item \textbf{SimHash fingerprint}: $h_i \in \{0,1\}^{64}$
|
|
126
|
+
\end{itemize}
|
|
127
|
+
|
|
128
|
+
\subsection{The Unified Field Equation}
|
|
129
|
+
|
|
130
|
+
For query $q$ with tag set $T(q)$ and candidate atom $a$, the \textbf{gravity score} is:
|
|
131
|
+
|
|
132
|
+
\begin{equation}
|
|
133
|
+
\label{eq:unified_field}
|
|
134
|
+
W(q, a) = \underbrace{\left(\sum_{t \in T(q) \cap T(a)} 1\right) \cdot \gamma^{d(q,a)}}_{\text{Semantic Gravity}} \times \underbrace{e^{-\lambda \Delta t}}_{\text{Temporal Decay}} \times \underbrace{\left(1 - \frac{H(h_q, h_a)}{64}\right)}_{\text{Structural Gravity}}
|
|
135
|
+
\end{equation}
|
|
136
|
+
|
|
137
|
+
Where:
|
|
138
|
+
|
|
139
|
+
\begin{table}[h]
|
|
140
|
+
\centering
|
|
141
|
+
\caption{Unified Field Equation Parameters}
|
|
142
|
+
\label{tab:parameters}
|
|
143
|
+
\begin{tabular}{@{}lll@{}}
|
|
144
|
+
\toprule
|
|
145
|
+
\textbf{Symbol} & \textbf{Meaning} & \textbf{Default} \\ \midrule
|
|
146
|
+
$\gamma$ & Damping factor (controls walk viscosity) & 0.85 \\
|
|
147
|
+
$\lambda$ & Decay constant (half-life $\approx$ 7.9 years) & 0.00001 h$^{-1}$ \\
|
|
148
|
+
$d(q,a)$ & Graph hop distance (0 = direct, 1 = 1-hop) & $\in \{0,1,2,3\}$ \\
|
|
149
|
+
$\Delta t$ & Time difference $|\tau_q - \tau_a|$ in hours & --- \\
|
|
150
|
+
$H(\cdot,\cdot)$ & Hamming distance on 64-bit SimHash & 0--63 \\
|
|
151
|
+
$h_q, h_a$ & SimHash fingerprints of query and atom & $\{0,1\}^{64}$ \\ \bottomrule
|
|
152
|
+
\end{tabular}
|
|
153
|
+
\end{table}
|
|
154
|
+
|
|
155
|
+
\paragraph{Component Breakdown:}
|
|
156
|
+
|
|
157
|
+
\begin{enumerate}
|
|
158
|
+
\item \textbf{Semantic Gravity}: $|T(q) \cap T(a)| \cdot \gamma^{d(q,a)}$
|
|
159
|
+
\begin{itemize}
|
|
160
|
+
\item Shared tag count weighted by graph distance
|
|
161
|
+
\item Exponential decay with hop distance (damping)
|
|
162
|
+
\item Graph distance $d(q,a)$ is the minimum number of hops from any anchor atom (direct match to the query) to candidate $a$.
|
|
163
|
+
\end{itemize}
|
|
164
|
+
|
|
165
|
+
\item \textbf{Temporal Decay}: $e^{-\lambda \Delta t}$
|
|
166
|
+
\begin{itemize}
|
|
167
|
+
\item Recent memories exert stronger gravitational pull
|
|
168
|
+
\item Half-life: $t_{1/2} = \ln(2)/\lambda \approx 69,314$ hours $\approx$ 7.9 years
|
|
169
|
+
\item The small decay constant ($\lambda = 0.00001$ h$^{-1}$) ensures memories remain accessible for years, suitable for personal knowledge graphs
|
|
170
|
+
\end{itemize}
|
|
171
|
+
|
|
172
|
+
\item \textbf{Structural Gravity}: $1 - \frac{H(h_q, h_a)}{64}$
|
|
173
|
+
\begin{itemize}
|
|
174
|
+
\item SimHash proximity (1 = identical, 0.5 = uncorrelated, 0 = completely different)
|
|
175
|
+
\item Hamming distance normalized to [0,1]; lower distance = higher similarity
|
|
176
|
+
\item Enables O(1) deduplication via 64-bit fingerprinting \cite{charikar2002similar}
|
|
177
|
+
\end{itemize}
|
|
178
|
+
\end{enumerate}
|
|
179
|
+
|
|
180
|
+
\subsection{SQL-Native Implementation}
|
|
181
|
+
|
|
182
|
+
The Unified Field Equation executes as a single SQL operation in PGlite:
|
|
183
|
+
|
|
184
|
+
\begin{lstlisting}[language=SQL, caption={SQL Implementation of Unified Field Equation}]
|
|
185
|
+
WITH anchor_stats AS (
|
|
186
|
+
SELECT id, timestamp, simhash
|
|
187
|
+
FROM atoms WHERE id IN ($1::text[])
|
|
188
|
+
),
|
|
189
|
+
candidates AS (
|
|
190
|
+
SELECT t.atom_id, a.timestamp, a.simhash,
|
|
191
|
+
COUNT(DISTINCT t.tag) as shared_tags
|
|
192
|
+
FROM tags t
|
|
193
|
+
JOIN atoms a ON t.atom_id = a.id
|
|
194
|
+
WHERE t.tag IN (SELECT DISTINCT tag FROM tags
|
|
195
|
+
WHERE atom_id IN (SELECT id FROM anchor_stats))
|
|
196
|
+
GROUP BY t.atom_id, a.timestamp, a.simhash
|
|
197
|
+
)
|
|
198
|
+
SELECT atom_id,
|
|
199
|
+
MAX(
|
|
200
|
+
GREATEST(0.0, LEAST(1.0,
|
|
201
|
+
((shared_tags / 10.0) * 0.85) *
|
|
202
|
+
EXP(-0.00001 * ABS(timestamp - anchor_ts) / 3600000.0) *
|
|
203
|
+
(1.0 - (bit_count(('x' || LPAD(simhash, 16, '0'))::bit(64)
|
|
204
|
+
# ('x' || LPAD(anchor_sh, 16, '0'))::bit(64)) / 64.0))
|
|
205
|
+
))
|
|
206
|
+
) as gravity_score
|
|
207
|
+
FROM candidates
|
|
208
|
+
CROSS JOIN anchor_stats
|
|
209
|
+
GROUP BY atom_id
|
|
210
|
+
HAVING gravity_score > 0.1
|
|
211
|
+
ORDER BY gravity_score DESC
|
|
212
|
+
LIMIT 200;
|
|
213
|
+
\end{lstlisting}
|
|
214
|
+
|
|
215
|
+
\paragraph{Implementation Notes:}
|
|
216
|
+
\begin{itemize}
|
|
217
|
+
\item \textbf{Normalization}: The $shared\_tags / 10.0$ term normalizes tag counts, assuming $\sim$10 shared tags maximum for typical queries; the final gravity score is clamped to $[0,1]$ via \texttt{GREATEST}/\texttt{LEAST}.
|
|
218
|
+
\item \textbf{Damping}: The $POWER(0.85, hop)$ factor applies per-hop decay; multi-hop results decay exponentially (hop 1: 85\%, hop 2: 72\%, hop 3: 61\%)
|
|
219
|
+
\item \textbf{Recursive Tag‑Walker}: The recursive CTE computes semantic overlap between successive atoms (not directly with the query) because the anchor atoms' tags already capture the query intent; this allows discovery of indirect associations while preserving semantic coherence.
|
|
220
|
+
\item \textbf{Hop Tracking}: Recursive CTE tracks graph distance from anchors for proper damping application
|
|
221
|
+
\item \textbf{Physical Bonus}: Production implementations may add proximity-based bonuses for co-located atoms
|
|
222
|
+
\item \textbf{Bitwise Operations}: SimHash distance uses XOR (\texttt{\#}) and \texttt{bit\_count} for O(1) computation
|
|
223
|
+
\end{itemize}
|
|
224
|
+
|
|
225
|
+
\paragraph{Performance Characteristics:}
|
|
226
|
+
\begin{itemize}
|
|
227
|
+
\item Sparse matrix multiplication via \texttt{JOIN} operations
|
|
228
|
+
\item Bitwise XOR and \texttt{bit\_count} for SimHash distance
|
|
229
|
+
\item Zero transport overhead (only weighted results returned)
|
|
230
|
+
\item \textbf{Latency}: $\sim$10ms for 1M+ atoms on consumer hardware
|
|
231
|
+
\end{itemize}
|
|
232
|
+
|
|
233
|
+
\section{Related Work}
|
|
234
|
+
\label{sec:related}
|
|
235
|
+
|
|
236
|
+
\subsection{Vector-Based Retrieval-Augmented Generation}
|
|
237
|
+
|
|
238
|
+
Modern RAG systems predominantly rely on dense vector representations and approximate nearest neighbor (ANN) search. HNSW (Hierarchical Navigable Small World) graphs \cite{malkov2018efficient} and FAISS \cite{johnson2019billion} represent the state-of-the-art for vector retrieval, offering sub-linear query complexity. However, these approaches require loading complete indices into RAM---often gigabytes for modest corpora---restricting deployment to high-specification servers. Furthermore, vector similarity provides limited explainability: a result matches because its embedding is ``close'' to the query, but the specific reasoning remains opaque. STAR addresses these limitations through sparse graph traversal, enabling CPU-only deployment on resource-constrained devices while providing explicit tag-based provenance for every result.
|
|
239
|
+
|
|
240
|
+
\subsection{Graph-Based Memory Systems}
|
|
241
|
+
|
|
242
|
+
Recent work has explored graph structures as alternatives to dense vectors. T-Retriever \cite{wei2026tretriever} introduces tree-based hierarchical retrieval using semantic-structural entropy for encoding textual graphs. While effective for hierarchical document structures, T-Retriever does not incorporate temporal decay---a key requirement for personal memory systems where recency matters. PersonalAI \cite{menschikov2025personalai} proposes a knowledge graph framework with hyper-edges for personalized LLM agents, achieving strong results on TriviaQA and HotpotQA benchmarks. However, PersonalAI focuses on framework design rather than production implementation; STAR contributes a complete, deployed system with validated performance on 28M tokens of real-world data.
|
|
243
|
+
|
|
244
|
+
Our bipartite graph approach (Atoms $\times$ Tags) differs from general knowledge graphs by enforcing a strict separation between content and metadata. This enables O(1) deduplication via SimHash \cite{charikar2002similar} and supports disposable index architectures where the database can be rebuilt entirely from the source-of-truth filesystem.
|
|
245
|
+
|
|
246
|
+
\subsection{Personal AI Memory Systems}
|
|
247
|
+
|
|
248
|
+
The advent of large context windows has renewed interest in personal AI memory. Second Me \cite{wei2025second} proposes LLM-based memory parameterization, using language models themselves to structure and retrieve personal knowledge. While powerful, this approach requires significant computational resources and offers limited explainability. STAR achieves similar associative retrieval goals through deterministic physics-based scoring, enabling deployment on 4GB RAM laptops without GPU acceleration.
|
|
249
|
+
|
|
250
|
+
Cognitive AI frameworks \cite{salas2025cognitive} emphasize governed memory architectures for long-term coherence. STAR's ephemeral index design (Standard 110) aligns with these principles while adding practical constraints for local-first deployment: zero cloud dependencies, AGPL-3.0 licensing, and real-world validation.
|
|
251
|
+
|
|
252
|
+
\subsection{Temporal Information Retrieval}
|
|
253
|
+
|
|
254
|
+
Temporal decay has been explored in web archive search \cite{kanhabua2008surviving} and recency-weighted ranking, but is rarely integrated into RAG systems as a fundamental scoring component. STAR's Unified Field Equation (Equation~\ref{eq:unified_field}) embeds temporal decay multiplicatively alongside semantic and structural factors, ensuring that any zero factor eliminates irrelevant results. This differs from additive scoring approaches where weak signals can accumulate noise.
|
|
255
|
+
|
|
256
|
+
\subsection{Local-First and Edge Computing}
|
|
257
|
+
|
|
258
|
+
The local-first software movement \cite{haque2023local} emphasizes user data ownership and offline capability. STAR's browser paradigm extends these principles to AI memory: just as browsers render content without downloading the entire internet, STAR retrieves context without loading complete vector indices. This enables sovereign operation---users maintain complete control over their data without cloud dependencies.
|
|
259
|
+
|
|
260
|
+
\subsection{Summary of Contributions}
|
|
261
|
+
|
|
262
|
+
STAR distinguishes itself from prior work through:
|
|
263
|
+
\begin{enumerate}
|
|
264
|
+
\item \textbf{Sparse Graph Physics:} Multiplicative scoring combining co-occurrence, temporal decay, and SimHash similarity (Section \ref{sec:math}).
|
|
265
|
+
\item \textbf{Browser Paradigm:} Sharded atomization enabling resource-constrained devices to navigate large corpora (Section \ref{sec:architecture}).
|
|
266
|
+
\item \textbf{Production Validation:} Real-world deployment with 28M tokens, $<$200ms p95 latency, and 4GB RAM compatibility (Section \ref{sec:benchmarks}).
|
|
267
|
+
\item \textbf{Explainable Retrieval:} Tag paths provide deterministic provenance for every result (Section \ref{sec:retrieval}).
|
|
268
|
+
\end{enumerate}
|
|
269
|
+
|
|
270
|
+
\section{System Architecture}
|
|
271
|
+
\label{sec:architecture}
|
|
272
|
+
|
|
273
|
+
\subsection{Data Hierarchy}
|
|
274
|
+
|
|
275
|
+
\begin{table}[h]
|
|
276
|
+
\centering
|
|
277
|
+
\caption{Three-Tier Data Hierarchy}
|
|
278
|
+
\label{tab:hierarchy}
|
|
279
|
+
\begin{tabular}{@{}llll@{}}
|
|
280
|
+
\toprule
|
|
281
|
+
\textbf{Level} & \textbf{Role} & \textbf{Content Stored} & \textbf{Example} \\ \midrule
|
|
282
|
+
\textbf{Compound} & Document reference & File path + metadata & \texttt{ChatSessions.yaml} (91.88MB) \\
|
|
283
|
+
\textbf{Molecule} & Semantic chunk & Chunk text + byte offsets & Bytes 1024--2048 \\
|
|
284
|
+
\textbf{Atom} & Content unit & Byte-offset pointer + tags & Text chunk with \texttt{\#auth} tag \\
|
|
285
|
+
\textbf{Tag} & Concept/label & Semantic label only & \texttt{\#authentication}, \texttt{\#session} \\ \bottomrule
|
|
286
|
+
\end{tabular}
|
|
287
|
+
\end{table}
|
|
288
|
+
|
|
289
|
+
\textbf{Key Design Decision:} Content lives in \texttt{mirrored\_brain/} filesystem. Database stores pointers only (byte offsets + tags), creating a \textbf{disposable, rebuildable index}.
|
|
290
|
+
|
|
291
|
+
\subsection{The Browser Paradigm}
|
|
292
|
+
|
|
293
|
+
\begin{table}[h]
|
|
294
|
+
\centering
|
|
295
|
+
\caption{Browser Paradigm Mapping}
|
|
296
|
+
\label{tab:browser_paradigm}
|
|
297
|
+
\begin{tabular}{@{}p{0.3\linewidth}p{0.3\linewidth}p{0.3\linewidth}@{}}
|
|
298
|
+
\toprule
|
|
299
|
+
\textbf{Component} & \textbf{Browser Equivalent} & \textbf{Anchor Engine Implementation} \\ \midrule
|
|
300
|
+
HTML/CSS/JS shards & Web page components & Atoms (tags + byte offsets) \\
|
|
301
|
+
DOM tree & Document structure & Tag graph $G = (A, T, E)$ \\
|
|
302
|
+
Lazy loading & On-demand resource fetch & Radial inflation from disk \\
|
|
303
|
+
Cache & Browser cache & Ephemeral PGlite index \\ \bottomrule
|
|
304
|
+
\end{tabular}
|
|
305
|
+
\end{table}
|
|
306
|
+
|
|
307
|
+
\textbf{Universality Principle:} Just as browsers render any website on any machine by loading necessary shards, Anchor Engine navigates any dataset by loading only relevant atoms.
|
|
308
|
+
|
|
309
|
+
\subsection{Complexity Analysis}
|
|
310
|
+
|
|
311
|
+
\begin{table}[h]
|
|
312
|
+
\centering
|
|
313
|
+
\caption{Retrieval Complexity Comparison}
|
|
314
|
+
\label{tab:complexity}
|
|
315
|
+
\begin{tabular}{@{}lcccc@{}}
|
|
316
|
+
\toprule
|
|
317
|
+
\textbf{Method} & \textbf{Time} & \textbf{Space} & \textbf{Explainability} & \textbf{Hardware} \\ \midrule
|
|
318
|
+
\textbf{Dense Vector ANN (HNSW)} & $O(\log n)$ (query)\textsuperscript{$\ddagger$} & $O(n \cdot d)$ & Opaque & GPU preferred \\
|
|
319
|
+
\textbf{STAR (Sparse Graph)} & $\mathbf{O(k \cdot \bar{d})}$ & $\mathbf{O(|E|)}$ & \textbf{Native (tag paths)} & \textbf{CPU-only} \\ \bottomrule
|
|
320
|
+
\end{tabular}
|
|
321
|
+
\end{table}
|
|
322
|
+
|
|
323
|
+
\textsuperscript{$\ddagger$} $O(n \log n)$ is index construction complexity; query complexity is $O(\log n)$.
|
|
324
|
+
|
|
325
|
+
Where:
|
|
326
|
+
\begin{itemize}
|
|
327
|
+
\item $n$ = total atoms
|
|
328
|
+
\item $k$ = query tags (typically 5--20)
|
|
329
|
+
\item $\bar{d}$ = average tag degree (typically 10--100)
|
|
330
|
+
\item $d$ = vector dimension (typically 768--1536)
|
|
331
|
+
\item $|E|$ = sparse edges (typically $10 \cdot n$)
|
|
332
|
+
\end{itemize}
|
|
333
|
+
|
|
334
|
+
\textbf{Key Insight:} For personal knowledge graphs, $k \cdot \bar{d} \ll n$, making STAR query time $O(k\bar{d})$ potentially faster than HNSW's $O(\log n)$ query time.
|
|
335
|
+
|
|
336
|
+
\section{Retrieval Protocol: Planets and Moons}
|
|
337
|
+
\label{sec:retrieval}
|
|
338
|
+
|
|
339
|
+
\subsection{Phase 1 --- Anchor Discovery (Planets)}
|
|
340
|
+
|
|
341
|
+
\textbf{Goal:} High-precision seed set via direct matching.
|
|
342
|
+
|
|
343
|
+
\textbf{Strategies:}
|
|
344
|
+
\begin{enumerate}
|
|
345
|
+
\item \textbf{Full-Text Search (BM25-style)}: \texttt{to\_tsvector() @@ to\_tsquery()} in PGlite
|
|
346
|
+
\item \textbf{Radial Inflation}: Query \texttt{atom\_positions} table for keyword occurrences
|
|
347
|
+
\item \textbf{Engram Lookup:} O(1) cache for frequent entities
|
|
348
|
+
\end{enumerate}
|
|
349
|
+
|
|
350
|
+
\textbf{Output:} 20--200 anchor atoms with $d(q,a) = 0$
|
|
351
|
+
|
|
352
|
+
\subsection{Phase 2 --- Radial Inflation (Moons)}
|
|
353
|
+
|
|
354
|
+
\textbf{Goal:} High-recall expansion via tag-walker graph traversal.
|
|
355
|
+
|
|
356
|
+
\begin{lstlisting}[language=Python, caption={Radial Inflation Algorithm}]
|
|
357
|
+
def radial_inflation(anchors, radius=1, max_per_hop=50):
|
|
358
|
+
current_hop = anchors
|
|
359
|
+
all_results = set(anchors)
|
|
360
|
+
|
|
361
|
+
for hop in range(radius):
|
|
362
|
+
candidates = get_connected_nodes(current_hop)
|
|
363
|
+
weighted = apply_unified_field_equation(candidates, anchors)
|
|
364
|
+
top_k = select_by_gravity(weighted, max_per_hop)
|
|
365
|
+
|
|
366
|
+
all_results.update(top_k)
|
|
367
|
+
current_hop = top_k
|
|
368
|
+
|
|
369
|
+
return all_results
|
|
370
|
+
\end{lstlisting}
|
|
371
|
+
|
|
372
|
+
\paragraph{PhysicsMetadata Schema:}
|
|
373
|
+
\begin{lstlisting}[language=JSON, caption={PhysicsMetadata JSON Schema}]
|
|
374
|
+
{
|
|
375
|
+
"atom_id": "a7f3c2e1-4b5d-6789-abcd-ef0123456789",
|
|
376
|
+
"gravity_score": 0.82,
|
|
377
|
+
"decomposition": {
|
|
378
|
+
"semantic_overlap": 3,
|
|
379
|
+
"temporal_multiplier": 0.94,
|
|
380
|
+
"structural_similarity": 1.0,
|
|
381
|
+
"hop_distance": 1
|
|
382
|
+
},
|
|
383
|
+
"link_reason": "3 shared tags: #authentication, #session, #token",
|
|
384
|
+
"time_drift": "2h 14m ago",
|
|
385
|
+
"source_byte_range": [45210, 46890]
|
|
386
|
+
}
|
|
387
|
+
\end{lstlisting}
|
|
388
|
+
|
|
389
|
+
\subsection{Phase 3 --- Elastic Context Assembly}
|
|
390
|
+
|
|
391
|
+
\textbf{Goal:} Token-budget compliance with maximal coherence.
|
|
392
|
+
|
|
393
|
+
\paragraph{Snippets Coalescing:}
|
|
394
|
+
\begin{itemize}
|
|
395
|
+
\item Merge atoms within 500-byte proximity from same source
|
|
396
|
+
\item Snap to sentence boundaries for narrative flow
|
|
397
|
+
\item \textbf{Result:} 40--100 atoms $\to$ 8--12 coherent paragraphs (500--1000 chars each)
|
|
398
|
+
\end{itemize}
|
|
399
|
+
|
|
400
|
+
\paragraph{Progressive Inflation:}
|
|
401
|
+
\begin{itemize}
|
|
402
|
+
\item Top 10\% results: 2$\times$ inflation radius (1000 bytes)
|
|
403
|
+
\item Next 40\%: 1.5$\times$ radius (750 bytes)
|
|
404
|
+
\item Remaining 50\%: 1$\times$ radius (500 bytes)
|
|
405
|
+
\end{itemize}
|
|
406
|
+
|
|
407
|
+
\paragraph{Metadata Headers:}
|
|
408
|
+
\begin{verbatim}
|
|
409
|
+
[GROUP:1] [File:2025-07-16_to_2025-07-30.json] [Range: 0x4A20-0x4F80]
|
|
410
|
+
[Time: 2025-07-22T07:15:00Z] [Atoms: 5] [Chars: 847]
|
|
411
|
+
<atom id="abc12345" relevance="0.875" timestamp="..." persona="#work">
|
|
412
|
+
Full coherent paragraph content...
|
|
413
|
+
</atom>
|
|
414
|
+
\end{verbatim}
|
|
415
|
+
|
|
416
|
+
\section{Production Performance Benchmarks}
|
|
417
|
+
\label{sec:benchmarks}
|
|
418
|
+
|
|
419
|
+
\subsection{Dataset Characteristics (February 2026)}
|
|
420
|
+
|
|
421
|
+
\begin{table}[h]
|
|
422
|
+
\centering
|
|
423
|
+
\caption{Dataset Statistics}
|
|
424
|
+
\label{tab:dataset}
|
|
425
|
+
\begin{tabular}{@{}ll@{}}
|
|
426
|
+
\toprule
|
|
427
|
+
\textbf{Metric} & \textbf{Value} \\ \midrule
|
|
428
|
+
Total Files & 436 \\
|
|
429
|
+
Total Size & $\sim$100MB \\
|
|
430
|
+
Molecules & 280,000 \\
|
|
431
|
+
Atoms & 151,876 \\
|
|
432
|
+
Tags & $\sim$1,500 \\
|
|
433
|
+
Edges & $\sim$450,000 \\ \bottomrule
|
|
434
|
+
\end{tabular}
|
|
435
|
+
\end{table}
|
|
436
|
+
|
|
437
|
+
\subsection{Ingestion Performance}
|
|
438
|
+
|
|
439
|
+
\begin{table}[h]
|
|
440
|
+
\centering
|
|
441
|
+
\caption{Ingestion Performance by Dataset}
|
|
442
|
+
\label{tab:ingestion}
|
|
443
|
+
\begin{tabular}{@{}lrrrrr@{}}
|
|
444
|
+
\toprule
|
|
445
|
+
\textbf{Dataset} & \textbf{Size} & \textbf{Molecules} & \textbf{Atoms} & \textbf{Time} & \textbf{Throughput} \\ \midrule
|
|
446
|
+
\textbf{Chat Sessions} (monolith) & 91.88MB & 214,000 & 776 & 177.8s & 1,203 mol/s \\
|
|
447
|
+
\textbf{GitHub Archive} & 2.66MB & 36,793 & 497 & 22.4s & 1,642 mol/s \\
|
|
448
|
+
\textbf{Code Repository} & 0.94MB & 20,916 & 199 & 25.0s & 836 mol/s \\
|
|
449
|
+
\textbf{Total System} & $\sim$100MB & \textbf{280,000} & \textbf{1,500} & \textbf{$\sim$4 min} & \textbf{1,200 mol/s} \\ \bottomrule
|
|
450
|
+
\end{tabular}
|
|
451
|
+
\end{table}
|
|
452
|
+
|
|
453
|
+
\textbf{Optimization:} Monolithic files (single YAML) ingest 2$\times$ faster than hundreds of small files due to reduced I/O overhead and transaction batching.
|
|
454
|
+
|
|
455
|
+
\subsection{Search Performance}
|
|
456
|
+
|
|
457
|
+
\begin{table}[h]
|
|
458
|
+
\centering
|
|
459
|
+
\caption{Search Performance by Type}
|
|
460
|
+
\label{tab:search}
|
|
461
|
+
\begin{tabular}{@{}lcccc@{}}
|
|
462
|
+
\toprule
|
|
463
|
+
\textbf{Search Type} & \textbf{Budget} & \textbf{Results} & \textbf{Latency (p95)} & \textbf{Use Case} \\ \midrule
|
|
464
|
+
\textbf{Standard} (70/30) & 16k tokens & 40--100 atoms & \textbf{150ms} & Daily queries \\
|
|
465
|
+
\textbf{Max Recall} (3-hop) & 65k+ tokens & 200--500 atoms & \textbf{690ms} & Research \\
|
|
466
|
+
\textbf{Keyword} (direct FTS) & 4k tokens & 20--50 atoms & \textbf{100ms} & High precision \\ \bottomrule
|
|
467
|
+
\end{tabular}
|
|
468
|
+
\end{table}
|
|
469
|
+
|
|
470
|
+
\paragraph{Scaling Behavior (151K atoms):}
|
|
471
|
+
\begin{itemize}
|
|
472
|
+
\item Standard Search: \textbf{7.7s} (50$\times$ increase for 100$\times$ data growth)
|
|
473
|
+
\item Max Recall: \textbf{25--50s} (acceptable for 618k chars retrieved)
|
|
474
|
+
\end{itemize}
|
|
475
|
+
|
|
476
|
+
\paragraph{Trade-off Analysis:}
|
|
477
|
+
\begin{itemize}
|
|
478
|
+
\item \textbf{Vector RAG (HNSW):} Stable latency, memory-bound (4--8GB for 100MB)
|
|
479
|
+
\item \textbf{STAR:} Linear latency scaling, constant memory ($<$2GB)
|
|
480
|
+
\end{itemize}
|
|
481
|
+
|
|
482
|
+
For sovereign, local-first deployments on consumer hardware, latency scaling is acceptable.
|
|
483
|
+
|
|
484
|
+
\subsection{Memory Management}
|
|
485
|
+
|
|
486
|
+
\begin{table}[h]
|
|
487
|
+
\centering
|
|
488
|
+
\caption{Memory Usage by Phase}
|
|
489
|
+
\label{tab:memory}
|
|
490
|
+
\begin{tabular}{@{}lll@{}}
|
|
491
|
+
\toprule
|
|
492
|
+
\textbf{Phase} & \textbf{RSS Memory} & \textbf{Notes} \\ \midrule
|
|
493
|
+
\textbf{Peak (ingestion)} & 1,657MB & During 91MB file processing \\
|
|
494
|
+
\textbf{Idle (post-cleanup)} & 510MB & After 5min idle \\
|
|
495
|
+
\textbf{Reduction} & \textbf{-69\%} & 1,147MB saved via GC \\ \bottomrule
|
|
496
|
+
\end{tabular}
|
|
497
|
+
\end{table}
|
|
498
|
+
|
|
499
|
+
\textbf{Ephemeral Index Architecture (Standard 110):}
|
|
500
|
+
\begin{itemize}
|
|
501
|
+
\item Database wiped on shutdown
|
|
502
|
+
\item \texttt{mirrored\_brain/} preserved as source of truth
|
|
503
|
+
\item 338 files rehydrated from YAML on restart
|
|
504
|
+
\item Zero data loss guarantee
|
|
505
|
+
\end{itemize}
|
|
506
|
+
|
|
507
|
+
\section{Comparison with Vector-Based RAG}
|
|
508
|
+
\label{sec:comparison}
|
|
509
|
+
|
|
510
|
+
\begin{table}[h]
|
|
511
|
+
\centering
|
|
512
|
+
\caption{STAR vs. Vector RAG Comparison}
|
|
513
|
+
\label{tab:comparison}
|
|
514
|
+
\begin{tabular}{@{}p{0.45\linewidth}p{0.45\linewidth}@{}}
|
|
515
|
+
\toprule
|
|
516
|
+
\textbf{Anchor Engine (STAR)} & \textbf{Vector RAG (HNSW)} \\ \midrule
|
|
517
|
+
\textbf{90MB Ingestion: $\sim$178s} \checkmark 2$\times$ faster & $\sim$360s (batch) \\
|
|
518
|
+
\textbf{Memory Peak: $<$1.7GB} \checkmark 60--80\% less & 4--8GB \\
|
|
519
|
+
\textbf{Search (1.5K atoms): $\sim$150ms} \checkmark Comparable & $\sim$100ms \\
|
|
520
|
+
\textbf{Search (151K atoms): $\sim$7.7s} $\omega$ Linear scaling & $\sim$150ms (stable) \\
|
|
521
|
+
\textbf{CPU-only} \checkmark No GPU & GPU preferred \\
|
|
522
|
+
\textbf{Explainable (tag paths)} \checkmark & Opaque (black box) \\
|
|
523
|
+
\textbf{Local-first} \checkmark No cloud & Cloud-dependent \\ \bottomrule
|
|
524
|
+
\end{tabular}
|
|
525
|
+
\end{table}
|
|
526
|
+
|
|
527
|
+
\subsection{Use Case Fit}
|
|
528
|
+
|
|
529
|
+
\begin{table}[h]
|
|
530
|
+
\centering
|
|
531
|
+
\caption{Recommended Approach by Scenario}
|
|
532
|
+
\label{tab:use_cases}
|
|
533
|
+
\begin{tabular}{@{}ll@{}}
|
|
534
|
+
\toprule
|
|
535
|
+
\textbf{Scenario} & \textbf{Recommended Approach} \\ \midrule
|
|
536
|
+
High-throughput cloud deployment & Vector RAG (HNSW) \\
|
|
537
|
+
\textbf{Sovereign, local-first operation} & \textbf{STAR (Anchor Engine)} \\
|
|
538
|
+
\textbf{4GB RAM laptop} & \textbf{STAR} \\
|
|
539
|
+
\textbf{Explainable retrieval required} & \textbf{STAR} \\
|
|
540
|
+
GPU infrastructure available & Vector RAG \\ \bottomrule
|
|
541
|
+
\end{tabular}
|
|
542
|
+
\end{table}
|
|
543
|
+
|
|
544
|
+
\section{Economic Impact and Democratization}
|
|
545
|
+
\label{sec:economic}
|
|
546
|
+
|
|
547
|
+
\subsection{Breaking Down Silos}
|
|
548
|
+
|
|
549
|
+
Current AI memory landscape:
|
|
550
|
+
\begin{itemize}
|
|
551
|
+
\item \textbf{Proprietary systems}: Black boxes, artificial scarcity
|
|
552
|
+
\item \textbf{Cloud dependency}: Recurring costs, vendor lock-in
|
|
553
|
+
\item \textbf{Hardware barriers}: GPU requirements exclude most users
|
|
554
|
+
\end{itemize}
|
|
555
|
+
|
|
556
|
+
STAR enables:
|
|
557
|
+
\begin{itemize}
|
|
558
|
+
\item \textbf{Cognitive Sovereignty}: Users own data, context, memories
|
|
559
|
+
\item \textbf{Economic Efficiency}: No cloud bills, no GPU rentals
|
|
560
|
+
\item \textbf{Innovation Acceleration}: Open architecture (AGPL-3.0), extensible
|
|
561
|
+
\end{itemize}
|
|
562
|
+
|
|
563
|
+
\subsection{Public Research Foundation}
|
|
564
|
+
|
|
565
|
+
Foundational AI research was publicly funded. STAR builds on:
|
|
566
|
+
\begin{itemize}
|
|
567
|
+
\item \textbf{SimHash} (Charikar, 1997) --- Stanford University
|
|
568
|
+
\item \textbf{PageRank} (Brin \& Page, 1998) --- Stanford University
|
|
569
|
+
\item \textbf{Transformer architecture} (Vaswani et al., 2017) --- Google Brain
|
|
570
|
+
\end{itemize}
|
|
571
|
+
|
|
572
|
+
This is a return on public investment: tools serving individuals, not corporations.
|
|
573
|
+
|
|
574
|
+
\section{Conclusion}
|
|
575
|
+
\label{sec:conclusion}
|
|
576
|
+
|
|
577
|
+
STAR proves that ``Write Once, Run Everywhere'' applies to AI infrastructure. Decouple logic from data. Shard context into atoms. Implement universal distribution.
|
|
578
|
+
|
|
579
|
+
\subsection{Key Achievements}
|
|
580
|
+
|
|
581
|
+
\begin{itemize}
|
|
582
|
+
\item \checkmark \textbf{1,200 molecules/second} ingestion throughput
|
|
583
|
+
\item \checkmark \textbf{$<$200ms} search latency (p95, standard queries)
|
|
584
|
+
\item \checkmark \textbf{69\% memory reduction} after idle cleanup
|
|
585
|
+
\item \checkmark \textbf{Zero data loss} with ephemeral index architecture
|
|
586
|
+
\item \checkmark \textbf{151K atoms} navigable on 4GB RAM laptop
|
|
587
|
+
\end{itemize}
|
|
588
|
+
|
|
589
|
+
\subsection{Future Work}
|
|
590
|
+
|
|
591
|
+
\begin{enumerate}
|
|
592
|
+
\item \textbf{Caching Layer}: Frequent query result caching (target: 50\% latency reduction)
|
|
593
|
+
\item \textbf{Diffusion Models}: Graph-based reasoning over knowledge structures
|
|
594
|
+
\item \textbf{Mobile Applications}: iOS/Android ports via React Native
|
|
595
|
+
\item \textbf{Plugin Marketplace}: Community-contributed atomizers and taggers
|
|
596
|
+
\end{enumerate}
|
|
597
|
+
|
|
598
|
+
\subsection{Availability}
|
|
599
|
+
|
|
600
|
+
\begin{itemize}
|
|
601
|
+
\item \textbf{Repository}: \url{https://github.com/RSBalchII/anchor-engine-node}
|
|
602
|
+
\item \textbf{License}: AGPL-3.0
|
|
603
|
+
\item \textbf{Production Verified}: February 23, 2026
|
|
604
|
+
\end{itemize}
|
|
605
|
+
|
|
606
|
+
\appendix
|
|
607
|
+
|
|
608
|
+
\section{Recursive CTE for Tag-Walker}
|
|
609
|
+
\label{app:sql}
|
|
610
|
+
|
|
611
|
+
\begin{lstlisting}[language=SQL, caption={Recursive CTE Implementation of Tag-Walker}]
|
|
612
|
+
WITH RECURSIVE tag_walk AS (
|
|
613
|
+
-- Base case: anchor atoms
|
|
614
|
+
SELECT
|
|
615
|
+
a.id as atom_id,
|
|
616
|
+
a.simhash,
|
|
617
|
+
a.timestamp,
|
|
618
|
+
0 as hop_distance,
|
|
619
|
+
1.0 as gravity_score
|
|
620
|
+
FROM atoms a
|
|
621
|
+
WHERE a.id IN ($1::text[])
|
|
622
|
+
|
|
623
|
+
UNION ALL
|
|
624
|
+
|
|
625
|
+
-- Recursive case: 1-hop neighbors via shared tags
|
|
626
|
+
SELECT
|
|
627
|
+
t2.atom_id,
|
|
628
|
+
a2.simhash,
|
|
629
|
+
a2.timestamp,
|
|
630
|
+
tw.hop_distance + 1,
|
|
631
|
+
((COUNT(DISTINCT t1.tag) / 10.0) * POWER(0.85, tw.hop_distance + 1)) *
|
|
632
|
+
EXP(-0.00001 * ABS(a2.timestamp - tw.timestamp) / 3600000.0) *
|
|
633
|
+
(1.0 - (bit_count(('x' || LPAD(a2.simhash, 16, '0'))::bit(64)
|
|
634
|
+
# ('x' || LPAD(tw.simhash, 16, '0'))::bit(64)) / 64.0))
|
|
635
|
+
FROM tag_walk tw
|
|
636
|
+
JOIN atoms a1 ON tw.atom_id = a1.id
|
|
637
|
+
JOIN tags t1 ON a1.id = t1.atom_id
|
|
638
|
+
JOIN tags t2 ON t1.tag = t2.tag
|
|
639
|
+
JOIN atoms a2 ON t2.atom_id = a2.id
|
|
640
|
+
WHERE tw.hop_distance < 3
|
|
641
|
+
AND a2.id NOT IN (SELECT atom_id FROM tag_walk)
|
|
642
|
+
GROUP BY t2.atom_id, a2.simhash, a2.timestamp, tw.hop_distance, tw.timestamp, tw.simhash
|
|
643
|
+
)
|
|
644
|
+
SELECT * FROM tag_walk
|
|
645
|
+
WHERE gravity_score > 0.1
|
|
646
|
+
ORDER BY gravity_score DESC
|
|
647
|
+
LIMIT 200;
|
|
648
|
+
\end{lstlisting}
|
|
649
|
+
|
|
650
|
+
% Bibliography
|
|
651
|
+
\bibliographystyle{plain}
|
|
652
|
+
\bibliography{BIBLIOGRAPHY}
|
|
653
|
+
|
|
654
|
+
% Ensure arXiv runs pdflatex 4 times for references to resolve
|
|
655
|
+
\typeout{get arXiv to do 4 passes: Label(s) may have changed. Rerun}
|
|
656
|
+
|
|
657
|
+
\end{document}
|