@rbalchii/anchor-engine 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +609 -0
- package/README.md +317 -0
- package/anchor.bat +5 -0
- package/docs/API.md +314 -0
- package/docs/DEPLOYMENT.md +448 -0
- package/docs/INDEX.md +226 -0
- package/docs/STAR_Whitepaper_Executive.md +216 -0
- package/docs/TROUBLESHOOTING.md +535 -0
- package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
- package/docs/archive/adoption-guide.md +264 -0
- package/docs/archive/adoption-preparation.md +179 -0
- package/docs/archive/agent-harness-integration.md +227 -0
- package/docs/archive/api-reference.md +106 -0
- package/docs/archive/api_flows_diagram.md +118 -0
- package/docs/archive/architecture.md +410 -0
- package/docs/archive/architecture_diagram.md +174 -0
- package/docs/archive/broader-adoption-preparation.md +175 -0
- package/docs/archive/browser-paradigm-architecture.md +163 -0
- package/docs/archive/chat-integration.md +124 -0
- package/docs/archive/community-adoption-materials.md +103 -0
- package/docs/archive/community-adoption.md +147 -0
- package/docs/archive/comparison-with-siloed-solutions.md +192 -0
- package/docs/archive/comprehensive-docs.md +156 -0
- package/docs/archive/data_flow_diagram.md +251 -0
- package/docs/archive/enhancement-implementation-summary.md +146 -0
- package/docs/archive/evolution-summary.md +141 -0
- package/docs/archive/ingestion_pipeline_diagram.md +198 -0
- package/docs/archive/native-module-profiling-results.md +135 -0
- package/docs/archive/positioning-document.md +158 -0
- package/docs/archive/positioning.md +175 -0
- package/docs/archive/query-builder-documentation.md +218 -0
- package/docs/archive/quick-reference.md +40 -0
- package/docs/archive/quickstart.md +63 -0
- package/docs/archive/relationship-narrative-discovery.md +141 -0
- package/docs/archive/search-logic-improvement-plan.md +336 -0
- package/docs/archive/search_architecture_diagram.md +212 -0
- package/docs/archive/semantic-architecture-guide.md +97 -0
- package/docs/archive/sequence-diagrams.md +128 -0
- package/docs/archive/system_components_diagram.md +296 -0
- package/docs/archive/test-framework-integration.md +109 -0
- package/docs/archive/testing-framework-documentation.md +397 -0
- package/docs/archive/testing-framework-summary.md +121 -0
- package/docs/archive/testing-framework.md +377 -0
- package/docs/archive/ui-architecture.md +75 -0
- package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
- package/docs/arxiv/RELATED_WORK.tex +39 -0
- package/docs/arxiv/compile.bat +48 -0
- package/docs/arxiv/joss_response.md +33 -0
- package/docs/arxiv/prepare-submission.bat +46 -0
- package/docs/arxiv/review.md +128 -0
- package/docs/arxiv/star-whitepaper.tex +657 -0
- package/docs/code-patterns.md +289 -0
- package/docs/whitepaper.md +445 -0
- package/engine/dist/agent/runtime.d.ts +41 -0
- package/engine/dist/agent/runtime.d.ts.map +1 -0
- package/engine/dist/agent/runtime.js +73 -0
- package/engine/dist/agent/runtime.js.map +1 -0
- package/engine/dist/commands/audit-tags.d.ts +14 -0
- package/engine/dist/commands/audit-tags.d.ts.map +1 -0
- package/engine/dist/commands/audit-tags.js +180 -0
- package/engine/dist/commands/audit-tags.js.map +1 -0
- package/engine/dist/commands/distill.d.ts +19 -0
- package/engine/dist/commands/distill.d.ts.map +1 -0
- package/engine/dist/commands/distill.js +114 -0
- package/engine/dist/commands/distill.js.map +1 -0
- package/engine/dist/commands/generate-synonyms.d.ts +14 -0
- package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
- package/engine/dist/commands/generate-synonyms.js +91 -0
- package/engine/dist/commands/generate-synonyms.js.map +1 -0
- package/engine/dist/config/index.d.ts +115 -0
- package/engine/dist/config/index.d.ts.map +1 -0
- package/engine/dist/config/index.js +326 -0
- package/engine/dist/config/index.js.map +1 -0
- package/engine/dist/config/max-recall-config.d.ts +102 -0
- package/engine/dist/config/max-recall-config.d.ts.map +1 -0
- package/engine/dist/config/max-recall-config.js +102 -0
- package/engine/dist/config/max-recall-config.js.map +1 -0
- package/engine/dist/config/paths.d.ts +40 -0
- package/engine/dist/config/paths.d.ts.map +1 -0
- package/engine/dist/config/paths.js +49 -0
- package/engine/dist/config/paths.js.map +1 -0
- package/engine/dist/core/batch.d.ts +19 -0
- package/engine/dist/core/batch.d.ts.map +1 -0
- package/engine/dist/core/batch.js +37 -0
- package/engine/dist/core/batch.js.map +1 -0
- package/engine/dist/core/db.d.ts +58 -0
- package/engine/dist/core/db.d.ts.map +1 -0
- package/engine/dist/core/db.js +563 -0
- package/engine/dist/core/db.js.map +1 -0
- package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
- package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/ChatWorker.js +28 -0
- package/engine/dist/core/inference/ChatWorker.js.map +1 -0
- package/engine/dist/core/inference/context_manager.d.ts +49 -0
- package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
- package/engine/dist/core/inference/context_manager.js +199 -0
- package/engine/dist/core/inference/context_manager.js.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
- package/engine/dist/core/vector.d.ts +40 -0
- package/engine/dist/core/vector.d.ts.map +1 -0
- package/engine/dist/core/vector.js +167 -0
- package/engine/dist/core/vector.js.map +1 -0
- package/engine/dist/index.d.ts +4 -0
- package/engine/dist/index.d.ts.map +1 -0
- package/engine/dist/index.js +400 -0
- package/engine/dist/index.js.map +1 -0
- package/engine/dist/middleware/auth.d.ts +14 -0
- package/engine/dist/middleware/auth.d.ts.map +1 -0
- package/engine/dist/middleware/auth.js +44 -0
- package/engine/dist/middleware/auth.js.map +1 -0
- package/engine/dist/middleware/request-tracing.d.ts +29 -0
- package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
- package/engine/dist/middleware/request-tracing.js +115 -0
- package/engine/dist/middleware/request-tracing.js.map +1 -0
- package/engine/dist/middleware/validate.d.ts +30 -0
- package/engine/dist/middleware/validate.d.ts.map +1 -0
- package/engine/dist/middleware/validate.js +117 -0
- package/engine/dist/middleware/validate.js.map +1 -0
- package/engine/dist/native/index.d.ts +106 -0
- package/engine/dist/native/index.d.ts.map +1 -0
- package/engine/dist/native/index.js +230 -0
- package/engine/dist/native/index.js.map +1 -0
- package/engine/dist/native/types.d.ts +45 -0
- package/engine/dist/native/types.d.ts.map +1 -0
- package/engine/dist/native/types.js +6 -0
- package/engine/dist/native/types.js.map +1 -0
- package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
- package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/atomization-profiling.js +108 -0
- package/engine/dist/profiling/atomization-profiling.js.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.js +249 -0
- package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
- package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
- package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
- package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/simhash-profiling.js +168 -0
- package/engine/dist/profiling/simhash-profiling.js.map +1 -0
- package/engine/dist/routes/api.d.ts +9 -0
- package/engine/dist/routes/api.d.ts.map +1 -0
- package/engine/dist/routes/api.js +37 -0
- package/engine/dist/routes/api.js.map +1 -0
- package/engine/dist/routes/enhanced-api.d.ts +9 -0
- package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
- package/engine/dist/routes/enhanced-api.js +139 -0
- package/engine/dist/routes/enhanced-api.js.map +1 -0
- package/engine/dist/routes/health.d.ts +8 -0
- package/engine/dist/routes/health.d.ts.map +1 -0
- package/engine/dist/routes/health.js +89 -0
- package/engine/dist/routes/health.js.map +1 -0
- package/engine/dist/routes/monitoring.d.ts +8 -0
- package/engine/dist/routes/monitoring.d.ts.map +1 -0
- package/engine/dist/routes/monitoring.js +509 -0
- package/engine/dist/routes/monitoring.js.map +1 -0
- package/engine/dist/routes/v1/admin.d.ts +3 -0
- package/engine/dist/routes/v1/admin.d.ts.map +1 -0
- package/engine/dist/routes/v1/admin.js +261 -0
- package/engine/dist/routes/v1/admin.js.map +1 -0
- package/engine/dist/routes/v1/atoms.d.ts +3 -0
- package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
- package/engine/dist/routes/v1/atoms.js +172 -0
- package/engine/dist/routes/v1/atoms.js.map +1 -0
- package/engine/dist/routes/v1/backup.d.ts +3 -0
- package/engine/dist/routes/v1/backup.d.ts.map +1 -0
- package/engine/dist/routes/v1/backup.js +100 -0
- package/engine/dist/routes/v1/backup.js.map +1 -0
- package/engine/dist/routes/v1/git.d.ts +3 -0
- package/engine/dist/routes/v1/git.d.ts.map +1 -0
- package/engine/dist/routes/v1/git.js +316 -0
- package/engine/dist/routes/v1/git.js.map +1 -0
- package/engine/dist/routes/v1/ingest.d.ts +3 -0
- package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
- package/engine/dist/routes/v1/ingest.js +66 -0
- package/engine/dist/routes/v1/ingest.js.map +1 -0
- package/engine/dist/routes/v1/memory.d.ts +14 -0
- package/engine/dist/routes/v1/memory.d.ts.map +1 -0
- package/engine/dist/routes/v1/memory.js +87 -0
- package/engine/dist/routes/v1/memory.js.map +1 -0
- package/engine/dist/routes/v1/research.d.ts +3 -0
- package/engine/dist/routes/v1/research.d.ts.map +1 -0
- package/engine/dist/routes/v1/research.js +109 -0
- package/engine/dist/routes/v1/research.js.map +1 -0
- package/engine/dist/routes/v1/search.d.ts +3 -0
- package/engine/dist/routes/v1/search.d.ts.map +1 -0
- package/engine/dist/routes/v1/search.js +180 -0
- package/engine/dist/routes/v1/search.js.map +1 -0
- package/engine/dist/routes/v1/settings.d.ts +8 -0
- package/engine/dist/routes/v1/settings.d.ts.map +1 -0
- package/engine/dist/routes/v1/settings.js +211 -0
- package/engine/dist/routes/v1/settings.js.map +1 -0
- package/engine/dist/routes/v1/system.d.ts +3 -0
- package/engine/dist/routes/v1/system.d.ts.map +1 -0
- package/engine/dist/routes/v1/system.js +326 -0
- package/engine/dist/routes/v1/system.js.map +1 -0
- package/engine/dist/routes/v1/tags.d.ts +3 -0
- package/engine/dist/routes/v1/tags.d.ts.map +1 -0
- package/engine/dist/routes/v1/tags.js +102 -0
- package/engine/dist/routes/v1/tags.js.map +1 -0
- package/engine/dist/server-8080.d.ts +2 -0
- package/engine/dist/server-8080.d.ts.map +1 -0
- package/engine/dist/server-8080.js +74 -0
- package/engine/dist/server-8080.js.map +1 -0
- package/engine/dist/services/backup/backup-restore.d.ts +37 -0
- package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
- package/engine/dist/services/backup/backup-restore.js +385 -0
- package/engine/dist/services/backup/backup-restore.js.map +1 -0
- package/engine/dist/services/backup/backup.d.ts +14 -0
- package/engine/dist/services/backup/backup.d.ts.map +1 -0
- package/engine/dist/services/backup/backup.js +442 -0
- package/engine/dist/services/backup/backup.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.js +394 -0
- package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
- package/engine/dist/services/health-check-enhanced.d.ts +89 -0
- package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
- package/engine/dist/services/health-check-enhanced.js +417 -0
- package/engine/dist/services/health-check-enhanced.js.map +1 -0
- package/engine/dist/services/idle-manager.d.ts +56 -0
- package/engine/dist/services/idle-manager.d.ts.map +1 -0
- package/engine/dist/services/idle-manager.js +210 -0
- package/engine/dist/services/idle-manager.js.map +1 -0
- package/engine/dist/services/inference/inference-service.d.ts +27 -0
- package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
- package/engine/dist/services/inference/inference-service.js +89 -0
- package/engine/dist/services/inference/inference-service.js.map +1 -0
- package/engine/dist/services/inference/inference.d.ts +59 -0
- package/engine/dist/services/inference/inference.d.ts.map +1 -0
- package/engine/dist/services/inference/inference.js +131 -0
- package/engine/dist/services/inference/inference.js.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.js +982 -0
- package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.js +166 -0
- package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.js +537 -0
- package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.js +437 -0
- package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
- package/engine/dist/services/ingest/ingest.d.ts +50 -0
- package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest.js +230 -0
- package/engine/dist/services/ingest/ingest.js.map +1 -0
- package/engine/dist/services/ingest/watchdog.d.ts +31 -0
- package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
- package/engine/dist/services/ingest/watchdog.js +400 -0
- package/engine/dist/services/ingest/watchdog.js.map +1 -0
- package/engine/dist/services/llm/context.d.ts +6 -0
- package/engine/dist/services/llm/context.d.ts.map +1 -0
- package/engine/dist/services/llm/context.js +80 -0
- package/engine/dist/services/llm/context.js.map +1 -0
- package/engine/dist/services/llm/provider.d.ts +23 -0
- package/engine/dist/services/llm/provider.d.ts.map +1 -0
- package/engine/dist/services/llm/provider.js +338 -0
- package/engine/dist/services/llm/provider.js.map +1 -0
- package/engine/dist/services/llm/reader.d.ts +12 -0
- package/engine/dist/services/llm/reader.d.ts.map +1 -0
- package/engine/dist/services/llm/reader.js +40 -0
- package/engine/dist/services/llm/reader.js.map +1 -0
- package/engine/dist/services/mirror/mirror.d.ts +28 -0
- package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
- package/engine/dist/services/mirror/mirror.js +208 -0
- package/engine/dist/services/mirror/mirror.js.map +1 -0
- package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
- package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
- package/engine/dist/services/nlp/nlp-service.js +151 -0
- package/engine/dist/services/nlp/nlp-service.js.map +1 -0
- package/engine/dist/services/nlp/query-parser.d.ts +9 -0
- package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
- package/engine/dist/services/nlp/query-parser.js +29 -0
- package/engine/dist/services/nlp/query-parser.js.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.js +263 -0
- package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
- package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
- package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
- package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
- package/engine/dist/services/query-builder/utils/export.js +130 -0
- package/engine/dist/services/query-builder/utils/export.js.map +1 -0
- package/engine/dist/services/research/researcher.d.ts +15 -0
- package/engine/dist/services/research/researcher.d.ts.map +1 -0
- package/engine/dist/services/research/researcher.js +123 -0
- package/engine/dist/services/research/researcher.js.map +1 -0
- package/engine/dist/services/scribe/scribe.d.ts +43 -0
- package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
- package/engine/dist/services/scribe/scribe.js +135 -0
- package/engine/dist/services/scribe/scribe.js.map +1 -0
- package/engine/dist/services/search/bright-nodes.d.ts +41 -0
- package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
- package/engine/dist/services/search/bright-nodes.js +117 -0
- package/engine/dist/services/search/bright-nodes.js.map +1 -0
- package/engine/dist/services/search/context-inflator.d.ts +63 -0
- package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
- package/engine/dist/services/search/context-inflator.js +649 -0
- package/engine/dist/services/search/context-inflator.js.map +1 -0
- package/engine/dist/services/search/context-manager.d.ts +34 -0
- package/engine/dist/services/search/context-manager.d.ts.map +1 -0
- package/engine/dist/services/search/context-manager.js +124 -0
- package/engine/dist/services/search/context-manager.js.map +1 -0
- package/engine/dist/services/search/distributed-query.d.ts +38 -0
- package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
- package/engine/dist/services/search/distributed-query.js +105 -0
- package/engine/dist/services/search/distributed-query.js.map +1 -0
- package/engine/dist/services/search/explore.d.ts +73 -0
- package/engine/dist/services/search/explore.d.ts.map +1 -0
- package/engine/dist/services/search/explore.js +388 -0
- package/engine/dist/services/search/explore.js.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.js +435 -0
- package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.js +394 -0
- package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.js +611 -0
- package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
- package/engine/dist/services/search/query-parser.d.ts +66 -0
- package/engine/dist/services/search/query-parser.d.ts.map +1 -0
- package/engine/dist/services/search/query-parser.js +346 -0
- package/engine/dist/services/search/query-parser.js.map +1 -0
- package/engine/dist/services/search/search-utils.d.ts +100 -0
- package/engine/dist/services/search/search-utils.d.ts.map +1 -0
- package/engine/dist/services/search/search-utils.js +473 -0
- package/engine/dist/services/search/search-utils.js.map +1 -0
- package/engine/dist/services/search/search.d.ts +116 -0
- package/engine/dist/services/search/search.d.ts.map +1 -0
- package/engine/dist/services/search/search.js +1286 -0
- package/engine/dist/services/search/search.js.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
- package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
- package/engine/dist/services/search/streaming-search.d.ts +51 -0
- package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
- package/engine/dist/services/search/streaming-search.js +94 -0
- package/engine/dist/services/search/streaming-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
- package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
- package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-search.js +649 -0
- package/engine/dist/services/semantic/semantic-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
- package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
- package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
- package/engine/dist/services/semantic/types/semantic.js +7 -0
- package/engine/dist/services/semantic/types/semantic.js.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
- package/engine/dist/services/system-status.d.ts +68 -0
- package/engine/dist/services/system-status.d.ts.map +1 -0
- package/engine/dist/services/system-status.js +107 -0
- package/engine/dist/services/system-status.js.map +1 -0
- package/engine/dist/services/tags/discovery.d.ts +16 -0
- package/engine/dist/services/tags/discovery.d.ts.map +1 -0
- package/engine/dist/services/tags/discovery.js +206 -0
- package/engine/dist/services/tags/discovery.js.map +1 -0
- package/engine/dist/services/tags/gliner.d.ts +18 -0
- package/engine/dist/services/tags/gliner.d.ts.map +1 -0
- package/engine/dist/services/tags/gliner.js +119 -0
- package/engine/dist/services/tags/gliner.js.map +1 -0
- package/engine/dist/services/tags/infector.d.ts +21 -0
- package/engine/dist/services/tags/infector.d.ts.map +1 -0
- package/engine/dist/services/tags/infector.js +168 -0
- package/engine/dist/services/tags/infector.js.map +1 -0
- package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
- package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
- package/engine/dist/services/tags/tag-auditor.js +283 -0
- package/engine/dist/services/tags/tag-auditor.js.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
- package/engine/dist/services/vision/vision_service.d.ts +4 -0
- package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
- package/engine/dist/services/vision/vision_service.js +197 -0
- package/engine/dist/services/vision/vision_service.js.map +1 -0
- package/engine/dist/test-framework/core.d.ts +133 -0
- package/engine/dist/test-framework/core.d.ts.map +1 -0
- package/engine/dist/test-framework/core.js +313 -0
- package/engine/dist/test-framework/core.js.map +1 -0
- package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
- package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
- package/engine/dist/test-framework/dataset-runner.js +223 -0
- package/engine/dist/test-framework/dataset-runner.js.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.js +283 -0
- package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.js +331 -0
- package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
- package/engine/dist/types/api.d.ts +53 -0
- package/engine/dist/types/api.d.ts.map +1 -0
- package/engine/dist/types/api.js +2 -0
- package/engine/dist/types/api.js.map +1 -0
- package/engine/dist/types/atomic.d.ts +42 -0
- package/engine/dist/types/atomic.d.ts.map +1 -0
- package/engine/dist/types/atomic.js +10 -0
- package/engine/dist/types/atomic.js.map +1 -0
- package/engine/dist/types/context-protocol.d.ts +137 -0
- package/engine/dist/types/context-protocol.d.ts.map +1 -0
- package/engine/dist/types/context-protocol.js +28 -0
- package/engine/dist/types/context-protocol.js.map +1 -0
- package/engine/dist/types/context.d.ts +2 -0
- package/engine/dist/types/context.d.ts.map +1 -0
- package/engine/dist/types/context.js +2 -0
- package/engine/dist/types/context.js.map +1 -0
- package/engine/dist/types/index.d.ts +20 -0
- package/engine/dist/types/index.d.ts.map +1 -0
- package/engine/dist/types/index.js +18 -0
- package/engine/dist/types/index.js.map +1 -0
- package/engine/dist/types/search.d.ts +31 -0
- package/engine/dist/types/search.d.ts.map +1 -0
- package/engine/dist/types/search.js +2 -0
- package/engine/dist/types/search.js.map +1 -0
- package/engine/dist/types/taxonomy.d.ts +137 -0
- package/engine/dist/types/taxonomy.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.js +138 -0
- package/engine/dist/types/taxonomy.js.map +1 -0
- package/engine/dist/types/taxonomy.simple.d.ts +131 -0
- package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.simple.js +132 -0
- package/engine/dist/types/taxonomy.simple.js.map +1 -0
- package/engine/dist/types/tool-call.d.ts +16 -0
- package/engine/dist/types/tool-call.d.ts.map +1 -0
- package/engine/dist/types/tool-call.js +6 -0
- package/engine/dist/types/tool-call.js.map +1 -0
- package/engine/dist/types/trace.d.ts +25 -0
- package/engine/dist/types/trace.d.ts.map +1 -0
- package/engine/dist/types/trace.js +5 -0
- package/engine/dist/types/trace.js.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.js +266 -0
- package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
- package/engine/dist/utils/date_extractor.d.ts +2 -0
- package/engine/dist/utils/date_extractor.d.ts.map +1 -0
- package/engine/dist/utils/date_extractor.js +32 -0
- package/engine/dist/utils/date_extractor.js.map +1 -0
- package/engine/dist/utils/native-module-manager.d.ts +48 -0
- package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
- package/engine/dist/utils/native-module-manager.js +265 -0
- package/engine/dist/utils/native-module-manager.js.map +1 -0
- package/engine/dist/utils/native-module-profiler.d.ts +66 -0
- package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
- package/engine/dist/utils/native-module-profiler.js +182 -0
- package/engine/dist/utils/native-module-profiler.js.map +1 -0
- package/engine/dist/utils/path-manager.d.ts +59 -0
- package/engine/dist/utils/path-manager.d.ts.map +1 -0
- package/engine/dist/utils/path-manager.js +154 -0
- package/engine/dist/utils/path-manager.js.map +1 -0
- package/engine/dist/utils/performance-monitor.d.ts +92 -0
- package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
- package/engine/dist/utils/performance-monitor.js +221 -0
- package/engine/dist/utils/performance-monitor.js.map +1 -0
- package/engine/dist/utils/process-manager.d.ts +18 -0
- package/engine/dist/utils/process-manager.d.ts.map +1 -0
- package/engine/dist/utils/process-manager.js +100 -0
- package/engine/dist/utils/process-manager.js.map +1 -0
- package/engine/dist/utils/request-tracer.d.ts +131 -0
- package/engine/dist/utils/request-tracer.d.ts.map +1 -0
- package/engine/dist/utils/request-tracer.js +414 -0
- package/engine/dist/utils/request-tracer.js.map +1 -0
- package/engine/dist/utils/resource-manager.d.ts +108 -0
- package/engine/dist/utils/resource-manager.d.ts.map +1 -0
- package/engine/dist/utils/resource-manager.js +235 -0
- package/engine/dist/utils/resource-manager.js.map +1 -0
- package/engine/dist/utils/safe-dns.d.ts +14 -0
- package/engine/dist/utils/safe-dns.d.ts.map +1 -0
- package/engine/dist/utils/safe-dns.js +105 -0
- package/engine/dist/utils/safe-dns.js.map +1 -0
- package/engine/dist/utils/structured-logger.d.ts +124 -0
- package/engine/dist/utils/structured-logger.d.ts.map +1 -0
- package/engine/dist/utils/structured-logger.js +332 -0
- package/engine/dist/utils/structured-logger.js.map +1 -0
- package/engine/dist/utils/tag-cleanup.d.ts +11 -0
- package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
- package/engine/dist/utils/tag-cleanup.js +111 -0
- package/engine/dist/utils/tag-cleanup.js.map +1 -0
- package/engine/dist/utils/tag-filter.d.ts +19 -0
- package/engine/dist/utils/tag-filter.d.ts.map +1 -0
- package/engine/dist/utils/tag-filter.js +147 -0
- package/engine/dist/utils/tag-filter.js.map +1 -0
- package/engine/dist/utils/tag-modulation.d.ts +80 -0
- package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
- package/engine/dist/utils/tag-modulation.js +284 -0
- package/engine/dist/utils/tag-modulation.js.map +1 -0
- package/engine/dist/utils/timer.d.ts +40 -0
- package/engine/dist/utils/timer.d.ts.map +1 -0
- package/engine/dist/utils/timer.js +76 -0
- package/engine/dist/utils/timer.js.map +1 -0
- package/engine/dist/utils/token-utils.d.ts +19 -0
- package/engine/dist/utils/token-utils.d.ts.map +1 -0
- package/engine/dist/utils/token-utils.js +71 -0
- package/engine/dist/utils/token-utils.js.map +1 -0
- package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
- package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
- package/engine/dist/utils/wasm-module-loader.js +136 -0
- package/engine/dist/utils/wasm-module-loader.js.map +1 -0
- package/engine/package.json +105 -0
- package/package.json +106 -0
|
@@ -0,0 +1,1286 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search Orchestrator — "The Brain"
|
|
3
|
+
*
|
|
4
|
+
* Core search orchestration, Tag-Walker physics engine, engram lookup,
|
|
5
|
+
* and result merging. All NLP parsing lives in query-parser.ts ("The Ears"),
|
|
6
|
+
* utilities in search-utils.ts ("The Tools"), and graph reasoning in
|
|
7
|
+
* bright-nodes.ts ("The Illuminator").
|
|
8
|
+
*
|
|
9
|
+
* Standard 086 Compliant.
|
|
10
|
+
* Standard 086 = "Dual-Strategy Search" (internal specification numbering).
|
|
11
|
+
* See specs/standards/STANDARD_086_DUAL_STRATEGY_SEARCH.md for full spec.
|
|
12
|
+
* Two modes: Standard Search (70/30 budget, temporal decay) and Max-Recall
|
|
13
|
+
* (zero decay, 3-hop traversal). Mode auto-selects based on token budget.
|
|
14
|
+
*/
|
|
15
|
+
import { db } from '../../core/db.js';
|
|
16
|
+
import { createHash } from 'crypto';
|
|
17
|
+
import { config } from '../../config/index.js';
|
|
18
|
+
import { ContextInflator } from './context-inflator.js';
|
|
19
|
+
import { systemStatus } from '../system-status.js';
|
|
20
|
+
import { processWithAdaptiveConcurrency } from '../../utils/adaptive-concurrency.js';
|
|
21
|
+
// --- Imports from extracted modules ---
|
|
22
|
+
import { nlp, getGlobalTags, sanitizeFtsQuery, expandCamelCase, extractTemporalContext, splitQueryIntoMolecules, parseQuery } from './query-parser.js';
|
|
23
|
+
import { getHammingDistance, formatResults, filterDisplayTags } from './search-utils.js';
|
|
24
|
+
// Re-export everything that external consumers need
|
|
25
|
+
export { getGlobalTags, filterDisplayTags, parseQuery, splitQueryIntoMolecules };
|
|
26
|
+
export { getBrightNodes, getStructuredGraph } from './bright-nodes.js';
|
|
27
|
+
/**
|
|
28
|
+
* Lightweight semantic scoring for two-pass search (Standard 134)
|
|
29
|
+
* Scores candidates without expensive context inflation
|
|
30
|
+
*/
|
|
31
|
+
function calculateLightweightScore(result, queryTerms, query) {
|
|
32
|
+
if (!result.content)
|
|
33
|
+
return result.score || 0;
|
|
34
|
+
const content = result.content.toLowerCase();
|
|
35
|
+
const contentWords = new Set(content.split(/\s+/).filter(w => w.length > 2));
|
|
36
|
+
// Term overlap score (0-1)
|
|
37
|
+
let termMatches = 0;
|
|
38
|
+
for (const term of queryTerms) {
|
|
39
|
+
const termLower = term.toLowerCase();
|
|
40
|
+
if (content.includes(termLower))
|
|
41
|
+
termMatches++;
|
|
42
|
+
}
|
|
43
|
+
const termScore = queryTerms.length > 0 ? termMatches / queryTerms.length : 0;
|
|
44
|
+
// Exact phrase bonus
|
|
45
|
+
const phraseBonus = content.includes(query.toLowerCase()) ? 0.3 : 0;
|
|
46
|
+
// Tag relevance bonus
|
|
47
|
+
const tagBonus = result.tags && result.tags.length > 0
|
|
48
|
+
? result.tags.filter(t => queryTerms.some(qt => t.toLowerCase().includes(qt.toLowerCase()))).length * 0.1
|
|
49
|
+
: 0;
|
|
50
|
+
// Recency bonus (newer = higher score, decay over 30 days)
|
|
51
|
+
let recencyBonus = 0;
|
|
52
|
+
if (result.timestamp) {
|
|
53
|
+
const ageDays = (Date.now() - result.timestamp) / (1000 * 60 * 60 * 24);
|
|
54
|
+
recencyBonus = Math.max(0, 0.2 * (1 - ageDays / 30));
|
|
55
|
+
}
|
|
56
|
+
// Combine scores (base score + term overlap + bonuses)
|
|
57
|
+
const baseScore = result.score || 0.5;
|
|
58
|
+
return Math.min(1.0, baseScore * 0.3 + termScore * 0.5 + phraseBonus + tagBonus + recencyBonus);
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Create or update an engram (lexical sidecar) for fast entity lookup
|
|
62
|
+
*/
|
|
63
|
+
export async function createEngram(key, memoryIds) {
|
|
64
|
+
const normalizedKey = key.toLowerCase().trim();
|
|
65
|
+
const engramId = createHash('md5').update(normalizedKey).digest('hex');
|
|
66
|
+
const insertQuery = `INSERT INTO engrams (key, value) VALUES ($1, $2) ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value`;
|
|
67
|
+
await db.run(insertQuery, [engramId, JSON.stringify(memoryIds)]);
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Lookup memories by engram key (O(1) operation)
|
|
71
|
+
*/
|
|
72
|
+
export async function lookupByEngram(key) {
|
|
73
|
+
const normalizedKey = key.toLowerCase().trim();
|
|
74
|
+
const engramId = createHash('md5').update(normalizedKey).digest('hex');
|
|
75
|
+
const query = `SELECT value FROM engrams WHERE key = $1`;
|
|
76
|
+
const result = await db.run(query, [engramId]);
|
|
77
|
+
if (result.rows && result.rows.length > 0) {
|
|
78
|
+
return JSON.parse(result.rows[0].value);
|
|
79
|
+
}
|
|
80
|
+
return [];
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Hydrate engram IDs into full SearchResult objects
|
|
84
|
+
*/
|
|
85
|
+
export async function hydrateEngrams(ids) {
|
|
86
|
+
if (!ids || ids.length === 0)
|
|
87
|
+
return [];
|
|
88
|
+
const query = `
|
|
89
|
+
SELECT id, content, source_path, timestamp, buckets, tags, provenance, compound_id, start_byte, end_byte
|
|
90
|
+
FROM atoms
|
|
91
|
+
WHERE id = ANY($1)
|
|
92
|
+
`;
|
|
93
|
+
try {
|
|
94
|
+
const result = await db.run(query, [ids]);
|
|
95
|
+
return result.rows.map((row) => ({
|
|
96
|
+
id: row.id,
|
|
97
|
+
content: row.content,
|
|
98
|
+
source: row.source_path, // Map source_path to source
|
|
99
|
+
timestamp: row.timestamp,
|
|
100
|
+
buckets: row.buckets || [],
|
|
101
|
+
tags: row.tags || [],
|
|
102
|
+
epochs: '',
|
|
103
|
+
provenance: row.provenance || 'internal',
|
|
104
|
+
score: 1.0, // High score for direct engram hits
|
|
105
|
+
compound_id: row.compound_id,
|
|
106
|
+
start_byte: row.start_byte,
|
|
107
|
+
end_byte: row.end_byte
|
|
108
|
+
}));
|
|
109
|
+
}
|
|
110
|
+
catch (e) {
|
|
111
|
+
console.error('[Search] Failed to hydrate engrams:', e);
|
|
112
|
+
return [];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Enrich atoms with molecule tags for better contextual associations
|
|
117
|
+
* Fetches tags from parent molecules and merges them with atom tags
|
|
118
|
+
* This provides richer semantic context for LLMs viewing search results
|
|
119
|
+
*/
|
|
120
|
+
async function enrichAtomsWithMoleculeTags(anchors) {
|
|
121
|
+
try {
|
|
122
|
+
// Group anchors by compound_id for efficient batch query
|
|
123
|
+
const anchorsByCompound = new Map();
|
|
124
|
+
for (const anchor of anchors) {
|
|
125
|
+
if (anchor.compound_id) {
|
|
126
|
+
if (!anchorsByCompound.has(anchor.compound_id)) {
|
|
127
|
+
anchorsByCompound.set(anchor.compound_id, []);
|
|
128
|
+
}
|
|
129
|
+
anchorsByCompound.get(anchor.compound_id).push(anchor);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
if (anchorsByCompound.size === 0)
|
|
133
|
+
return;
|
|
134
|
+
const compoundIds = Array.from(anchorsByCompound.keys());
|
|
135
|
+
try {
|
|
136
|
+
// ⚡ Bolt Optimization: Batch fetch molecules for all compounds using ANY() to prevent N+1 queries
|
|
137
|
+
const molQuery = `
|
|
138
|
+
SELECT compound_id, tags
|
|
139
|
+
FROM molecules
|
|
140
|
+
WHERE compound_id = ANY($1) AND tags IS NOT NULL
|
|
141
|
+
`;
|
|
142
|
+
const molResult = await db.run(molQuery, [compoundIds]);
|
|
143
|
+
// Group molecule tags by compound_id
|
|
144
|
+
const tagsByCompound = new Map();
|
|
145
|
+
if (molResult.rows && molResult.rows.length > 0) {
|
|
146
|
+
for (const molRow of molResult.rows) {
|
|
147
|
+
const cId = molRow.compound_id;
|
|
148
|
+
if (!tagsByCompound.has(cId)) {
|
|
149
|
+
tagsByCompound.set(cId, new Set());
|
|
150
|
+
}
|
|
151
|
+
const compoundTags = tagsByCompound.get(cId);
|
|
152
|
+
if (molRow.tags) {
|
|
153
|
+
let rawTags = molRow.tags;
|
|
154
|
+
if (typeof rawTags === 'string') {
|
|
155
|
+
try {
|
|
156
|
+
rawTags = JSON.parse(rawTags);
|
|
157
|
+
}
|
|
158
|
+
catch {
|
|
159
|
+
// Malformed tags JSON for this molecule; skip this row only.
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
if (Array.isArray(rawTags)) {
|
|
164
|
+
for (const tag of rawTags) {
|
|
165
|
+
if (tag && typeof tag === 'string') {
|
|
166
|
+
compoundTags.add(tag);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
// Merge molecule tags with each atom's tags
|
|
174
|
+
for (const [compoundId, compoundAnchors] of anchorsByCompound) {
|
|
175
|
+
const moleculeTags = tagsByCompound.get(compoundId);
|
|
176
|
+
if (moleculeTags && moleculeTags.size > 0) {
|
|
177
|
+
for (const anchor of compoundAnchors) {
|
|
178
|
+
const atomTags = anchor.tags || [];
|
|
179
|
+
const mergedTags = Array.from(new Set([...atomTags, ...moleculeTags]));
|
|
180
|
+
// Sort tags for consistency (atom tags first, then molecule tags alphabetically)
|
|
181
|
+
anchor.tags = mergedTags.sort();
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
catch (molErr) {
|
|
187
|
+
// Silently continue if molecule tag fetch fails, but include compoundId context for debugging
|
|
188
|
+
const sampleCompoundIds = compoundIds.slice(0, 5);
|
|
189
|
+
console.debug('[Search] Could not fetch molecule tags for compounds (count=%d, sample=%o): %o', compoundIds.length, sampleCompoundIds, molErr);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
catch (e) {
|
|
193
|
+
console.warn('[Search] Failed to enrich atoms with molecule tags:', e);
|
|
194
|
+
// Continue without enrichment - this is not a critical failure
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
import { PhysicsTagWalker } from './physics-tag-walker.js';
|
|
198
|
+
import { assembleAndSerialize, assembleContextPackage } from './graph-context-serializer.js';
|
|
199
|
+
// ---------------------------------------------------------------------------
|
|
200
|
+
// Search serialization lock — only one search runs at a time to prevent
|
|
201
|
+
// concurrent searches from doubling peak heap usage.
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
let _searchLock = Promise.resolve();
|
|
204
|
+
function acquireSearchLock() {
|
|
205
|
+
let release;
|
|
206
|
+
const next = new Promise(resolve => { release = resolve; });
|
|
207
|
+
const acquired = _searchLock.then(() => release);
|
|
208
|
+
_searchLock = _searchLock.then(() => next);
|
|
209
|
+
return acquired;
|
|
210
|
+
}
|
|
211
|
+
// Memory thresholds - loaded from user_settings.json with defaults
|
|
212
|
+
// Standard 127/134/135: Configurable memory management
|
|
213
|
+
function getMemoryThresholds() {
|
|
214
|
+
const userSettings = config.MEMORY || {};
|
|
215
|
+
return {
|
|
216
|
+
// HEAP_PRESSURE_MB: if V8 heapUsed exceeds this, downgrade max-recall → standard
|
|
217
|
+
HEAP_PRESSURE_MB: userSettings.heap_pressure_mb ?? 500,
|
|
218
|
+
// Throttling thresholds for memory-aware search pacing
|
|
219
|
+
THROTTLE_START_MB: userSettings.throttle_start_mb ?? 800,
|
|
220
|
+
THROTTLE_MAX_MB: userSettings.throttle_max_mb ?? 1200,
|
|
221
|
+
EMERGENCY_STOP_MB: userSettings.emergency_stop_mb ?? 1500,
|
|
222
|
+
// Streaming results configuration
|
|
223
|
+
RESULTS_BATCH_SIZE: userSettings.search_results_batch_size ?? 20,
|
|
224
|
+
ENABLE_STREAMING: userSettings.enable_streaming_results ?? false
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
function heapUsedMB() {
|
|
228
|
+
return Math.round(process.memoryUsage().heapUsed / 1024 / 1024);
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Memory-aware throttling: slows down or blocks searches based on memory pressure
|
|
232
|
+
* Returns true if search should proceed, false if it should be rejected
|
|
233
|
+
* Standard 127/134/135: Configurable memory thresholds
|
|
234
|
+
*/
|
|
235
|
+
async function throttleSearchForMemory() {
|
|
236
|
+
const heapMB = heapUsedMB();
|
|
237
|
+
const thresholds = getMemoryThresholds();
|
|
238
|
+
// Emergency stop - reject search
|
|
239
|
+
if (heapMB >= thresholds.EMERGENCY_STOP_MB) {
|
|
240
|
+
console.warn(`[Throttle] EMERGENCY: Heap at ${heapMB}MB >= ${thresholds.EMERGENCY_STOP_MB}MB. Rejecting search.`);
|
|
241
|
+
return { proceed: false, delayMs: 0, reason: `Memory too high (${heapMB}MB)` };
|
|
242
|
+
}
|
|
243
|
+
// Throttle zone - reject if too high
|
|
244
|
+
if (heapMB >= thresholds.THROTTLE_MAX_MB) {
|
|
245
|
+
console.warn(`[Throttle] Heap at ${heapMB}MB >= ${thresholds.THROTTLE_MAX_MB}MB. Rejecting search temporarily.`);
|
|
246
|
+
return { proceed: false, delayMs: 0, reason: `Memory pressure (${heapMB}MB)` };
|
|
247
|
+
}
|
|
248
|
+
// Throttle zone - add delay based on memory pressure
|
|
249
|
+
if (heapMB >= thresholds.THROTTLE_START_MB) {
|
|
250
|
+
const pressureRatio = (heapMB - thresholds.THROTTLE_START_MB) / (thresholds.THROTTLE_MAX_MB - thresholds.THROTTLE_START_MB);
|
|
251
|
+
const delayMs = Math.round(pressureRatio * 10000); // Up to 10 second delay
|
|
252
|
+
console.log(`[Throttle] Heap at ${heapMB}MB. Delaying search by ${delayMs}ms (pressure: ${(pressureRatio * 100).toFixed(0)}%)`);
|
|
253
|
+
await new Promise(resolve => setTimeout(resolve, delayMs));
|
|
254
|
+
return { proceed: true, delayMs, reason: `Throttled (${heapMB}MB)` };
|
|
255
|
+
}
|
|
256
|
+
// Normal operation - no delay
|
|
257
|
+
return { proceed: true, delayMs: 0 };
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Find Anchors (Direct Hits) - Formerly part of tagWalkerSearch
|
|
261
|
+
* Executes Strategy A (Atom positions) and Strategy B (Molecules FTS)
|
|
262
|
+
*/
|
|
263
|
+
export async function findAnchors(query, buckets = [], tags = [], _maxChars = config.SEARCH.max_chars_default, provenance = 'all', filters, fuzzy = false) {
|
|
264
|
+
try {
|
|
265
|
+
const sanitizedQuery = sanitizeFtsQuery(query);
|
|
266
|
+
if (!sanitizedQuery)
|
|
267
|
+
return [];
|
|
268
|
+
// 0. Dynamic Atom Scaling
|
|
269
|
+
const tokenBudget = Math.floor(_maxChars / 4);
|
|
270
|
+
const avgTokensPerAtom = 60; // Tuned for better density
|
|
271
|
+
const targetAtomCount = Math.max(10, Math.ceil(tokenBudget / avgTokensPerAtom));
|
|
272
|
+
console.log(`[Search] Dynamic Scaling: Budget=${tokenBudget}t -> Target=${targetAtomCount} atoms`);
|
|
273
|
+
// Construct Query String for FTS
|
|
274
|
+
// Use OR ( | ) by default so multi-word queries find documents containing
|
|
275
|
+
// ANY of the terms, not ALL of them. AND ( & ) is too restrictive for
|
|
276
|
+
// conversational queries like "College Music education" — it requires all
|
|
277
|
+
// three words in the same molecule, which rarely matches.
|
|
278
|
+
// Strip English stop words before building the tsquery — 'simple' config
|
|
279
|
+
// does NOT filter them, so connector words like "and", "the", "or" would
|
|
280
|
+
// match almost every molecule and corrupt ranking.
|
|
281
|
+
const FTS_STOP_WORDS = new Set([
|
|
282
|
+
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'been', 'being', 'but', 'by',
|
|
283
|
+
'can', 'could', 'did', 'do', 'does', 'doing', 'done', 'each', 'for',
|
|
284
|
+
'from', 'had', 'has', 'have', 'having', 'he', 'her', 'him', 'his',
|
|
285
|
+
'how', 'i', 'if', 'in', 'is', 'it', 'its', 'itself', 'just', 'me',
|
|
286
|
+
'more', 'my', 'no', 'not', 'of', 'off', 'on', 'or', 'our', 'out',
|
|
287
|
+
'own', 'same', 'she', 'should', 'so', 'some', 'such', 'than', 'that',
|
|
288
|
+
'the', 'their', 'them', 'then', 'there', 'these', 'they', 'this',
|
|
289
|
+
'those', 'to', 'too', 'very', 'was', 'we', 'were', 'what', 'when',
|
|
290
|
+
'where', 'which', 'while', 'who', 'whom', 'why', 'will', 'with',
|
|
291
|
+
'would', 'you', 'your', 'yours'
|
|
292
|
+
]);
|
|
293
|
+
const queryWords = sanitizedQuery.trim().split(/\s+/).filter(t => t.length > 0);
|
|
294
|
+
const contentWords = queryWords.filter(t => !FTS_STOP_WORDS.has(t));
|
|
295
|
+
// Fall back to full word list if stop-word stripping removed everything
|
|
296
|
+
const baseTerms = contentWords.length > 0 ? contentWords : queryWords;
|
|
297
|
+
// Expand camelCase identifiers (e.g. findAnchors → [findanchors, find, anchors])
|
|
298
|
+
// so FTS can match partial names and prose descriptions of the same concept.
|
|
299
|
+
const tsTerms = expandCamelCase(baseTerms);
|
|
300
|
+
let tsQueryString = tsTerms.join(' | ');
|
|
301
|
+
let anchors = [];
|
|
302
|
+
let atomResults = [];
|
|
303
|
+
// A. Atom Search (Radial Inflation) via ContextInflator
|
|
304
|
+
// Use stop-word-stripped terms (tsTerms) so we don't inflate around "and", "the", etc.
|
|
305
|
+
const terms = tsTerms.length > 0 ? tsTerms : sanitizedQuery.split(/\s+/).filter(t => t.length > 0);
|
|
306
|
+
if (terms.length > 0) {
|
|
307
|
+
try {
|
|
308
|
+
// [Standard 132] Use adaptive concurrency based on available memory
|
|
309
|
+
const inflations = await processWithAdaptiveConcurrency(terms, async (term) => ContextInflator.inflateFromAtomPositions(term, 150, 20, undefined, { buckets, provenance }));
|
|
310
|
+
let rawAtoms = inflations.flat();
|
|
311
|
+
// [Standard 134] Two-pass scoring: score candidates before expensive processing
|
|
312
|
+
// This avoids inflating low-quality candidates, saving memory and time
|
|
313
|
+
const scoredAtoms = rawAtoms.map(atom => ({
|
|
314
|
+
...atom,
|
|
315
|
+
score: calculateLightweightScore(atom, terms, sanitizedQuery)
|
|
316
|
+
}));
|
|
317
|
+
// Sort by score and keep only top N (mobile: 5, desktop: 10 per term)
|
|
318
|
+
const isMobile = process.platform === 'android' || (await import('os')).totalmem() < 2 * 1024 * 1024 * 1024;
|
|
319
|
+
const maxResultsPerTerm = isMobile ? 5 : 10;
|
|
320
|
+
const topAtoms = scoredAtoms
|
|
321
|
+
.sort((a, b) => (b.score || 0) - (a.score || 0))
|
|
322
|
+
.slice(0, maxResultsPerTerm * terms.length);
|
|
323
|
+
atomResults.push(...topAtoms);
|
|
324
|
+
console.log(`[Search] Atom search found ${rawAtoms.length} atoms, kept top ${topAtoms.length} after scoring for terms: ${terms.join(', ')}`);
|
|
325
|
+
}
|
|
326
|
+
catch (e) {
|
|
327
|
+
console.error(`[Search] Atom Search failed:`, e);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
anchors = atomResults;
|
|
331
|
+
// B. Molecule Search (Full-Text with BM25-style ranking)
|
|
332
|
+
let moleculeQuery = `
|
|
333
|
+
SELECT m.id, m.content, c.path as source, m.timestamp,
|
|
334
|
+
'{}'::text[] as buckets, '{}'::text[] as tags, 'epoch_placeholder' as epochs, c.provenance,
|
|
335
|
+
-- Use ts_rank_cd for cover-density ranking (closer to BM25)
|
|
336
|
+
ts_rank_cd(to_tsvector('simple', m.content), to_tsquery('simple', $1)) * 10 as score,
|
|
337
|
+
m.sequence, m.molecular_signature,
|
|
338
|
+
m.start_byte, m.end_byte, m.type, m.numeric_value, m.numeric_unit, m.compound_id
|
|
339
|
+
FROM molecules m
|
|
340
|
+
JOIN compounds c ON m.compound_id = c.id
|
|
341
|
+
WHERE to_tsvector('simple', m.content) @@ to_tsquery('simple', $1)
|
|
342
|
+
`;
|
|
343
|
+
const moleculeParams = [tsQueryString];
|
|
344
|
+
if (buckets.length > 0) {
|
|
345
|
+
moleculeQuery += ` AND EXISTS (
|
|
346
|
+
SELECT 1 FROM atoms a
|
|
347
|
+
WHERE a.source_path = c.path
|
|
348
|
+
AND a.buckets && $${moleculeParams.length + 1}
|
|
349
|
+
)`;
|
|
350
|
+
moleculeParams.push(buckets);
|
|
351
|
+
}
|
|
352
|
+
if (provenance !== 'all' && provenance !== 'quarantine') {
|
|
353
|
+
moleculeQuery += ` AND c.provenance = $${moleculeParams.length + 1}`;
|
|
354
|
+
moleculeParams.push(provenance);
|
|
355
|
+
}
|
|
356
|
+
else if (provenance === 'all') {
|
|
357
|
+
moleculeQuery += ` AND c.provenance != 'quarantine'`;
|
|
358
|
+
}
|
|
359
|
+
// Replace hardcoded LIMIT 50 with the intended dynamic token budget scalar
|
|
360
|
+
moleculeQuery += ` ORDER BY score DESC LIMIT ${targetAtomCount}`;
|
|
361
|
+
try {
|
|
362
|
+
let molResult = await db.run(moleculeQuery, moleculeParams);
|
|
363
|
+
// Strategy 1.1: If AND fails and query has multiple terms, retry with OR (Fuzzy Fallback)
|
|
364
|
+
if (molResult.rows.length === 0 && tsQueryString.includes('&')) {
|
|
365
|
+
console.log('[Search] Initial AND query yielded 0 results. Retrying with OR-fuzzy logic...');
|
|
366
|
+
// To prevent massive Cartesian product explosions in SQL, we limit the OR fallback
|
|
367
|
+
// to the top 8 longest words (which are statistically more likely to be unique/important).
|
|
368
|
+
const allTerms = sanitizedQuery.split(/\s+/).filter(t => t.length > 3);
|
|
369
|
+
const uniqueTerms = Array.from(new Set(allTerms));
|
|
370
|
+
uniqueTerms.sort((a, b) => b.length - a.length);
|
|
371
|
+
const topTerms = uniqueTerms.slice(0, 8);
|
|
372
|
+
if (topTerms.length > 0) {
|
|
373
|
+
const orQueryString = topTerms.join(' | ');
|
|
374
|
+
console.log(`[Search] OR-fuzzy fallback using terms: ${orQueryString}`);
|
|
375
|
+
const orQuery = moleculeQuery.replace(/\$1/g, '$1'); // Keep same param index
|
|
376
|
+
const orParams = [orQueryString, ...moleculeParams.slice(1)];
|
|
377
|
+
molResult = await db.run(orQuery, orParams);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
const molecules = (molResult.rows || []).map((row) => ({
|
|
381
|
+
id: row.id,
|
|
382
|
+
content: row.content,
|
|
383
|
+
source: row.source,
|
|
384
|
+
timestamp: row.timestamp,
|
|
385
|
+
buckets: row.buckets,
|
|
386
|
+
tags: row.tags,
|
|
387
|
+
epochs: row.epochs,
|
|
388
|
+
provenance: row.provenance,
|
|
389
|
+
score: row.score,
|
|
390
|
+
sequence: row.sequence,
|
|
391
|
+
molecular_signature: row.molecular_signature,
|
|
392
|
+
start_byte: row.start_byte,
|
|
393
|
+
end_byte: row.end_byte,
|
|
394
|
+
type: row.type,
|
|
395
|
+
numeric_value: row.numeric_value,
|
|
396
|
+
numeric_unit: row.numeric_unit,
|
|
397
|
+
compound_id: row.compound_id
|
|
398
|
+
}));
|
|
399
|
+
// Merge atom and molecule results
|
|
400
|
+
anchors = [...atomResults, ...molecules];
|
|
401
|
+
// Deduplicate anchors using Range Merging
|
|
402
|
+
// Group by compound_id to find overlaps
|
|
403
|
+
const anchorsByCompound = new Map();
|
|
404
|
+
[...atomResults, ...molecules].forEach(a => {
|
|
405
|
+
if (!a.compound_id)
|
|
406
|
+
return;
|
|
407
|
+
if (!anchorsByCompound.has(a.compound_id)) {
|
|
408
|
+
anchorsByCompound.set(a.compound_id, []);
|
|
409
|
+
}
|
|
410
|
+
anchorsByCompound.get(a.compound_id).push(a);
|
|
411
|
+
});
|
|
412
|
+
anchors = [];
|
|
413
|
+
for (const [cId, compoundAnchors] of anchorsByCompound) {
|
|
414
|
+
// Sort by start byte
|
|
415
|
+
compoundAnchors.sort((a, b) => (a.start_byte || 0) - (b.start_byte || 0));
|
|
416
|
+
const merged = [];
|
|
417
|
+
if (compoundAnchors.length === 0)
|
|
418
|
+
continue;
|
|
419
|
+
let current = compoundAnchors[0];
|
|
420
|
+
for (let i = 1; i < compoundAnchors.length; i++) {
|
|
421
|
+
const next = compoundAnchors[i];
|
|
422
|
+
const currentEnd = (current.end_byte || 0);
|
|
423
|
+
const nextStart = (next.start_byte || 0);
|
|
424
|
+
const nextEnd = (next.end_byte || 0);
|
|
425
|
+
// LOGGING FOR DEBUGGING
|
|
426
|
+
// console.log(`[Dedup] Checking ${cId}: [${current.start_byte}-${currentEnd}] vs [${nextStart}-${nextEnd}]`);
|
|
427
|
+
// Check for overlap or adjacency (within 50 bytes)
|
|
428
|
+
if (nextStart <= currentEnd + 50) {
|
|
429
|
+
// If identical start/end, it's a true duplicate (just skip next)
|
|
430
|
+
if (Math.abs(nextStart - (current.start_byte || 0)) < 5 && Math.abs(nextEnd - currentEnd) < 5) {
|
|
431
|
+
// console.log(`[Dedup] Exact/Near match found. Skipping.`);
|
|
432
|
+
continue;
|
|
433
|
+
}
|
|
434
|
+
// If next is contained in current, skip next
|
|
435
|
+
if (nextEnd <= currentEnd) {
|
|
436
|
+
// console.log(`[Dedup] Next contained in Current. Skipping.`);
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
// If current is contained in next, switch to next
|
|
440
|
+
if ((next.start_byte || 0) <= (current.start_byte || 0) && nextEnd >= currentEnd) {
|
|
441
|
+
// console.log(`[Dedup] Current contained in Next. Swapping.`);
|
|
442
|
+
current = next;
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
// Strict Dedup: If they overlap by more than 50% (lowered from 80%), suppress the lower scored one.
|
|
446
|
+
const overlap = Math.min(currentEnd, nextEnd) - Math.max((current.start_byte || 0), nextStart);
|
|
447
|
+
const len1 = currentEnd - (current.start_byte || 0);
|
|
448
|
+
const len2 = nextEnd - nextStart;
|
|
449
|
+
if (overlap > 0 && (overlap / len1 > 0.5 || overlap / len2 > 0.5)) {
|
|
450
|
+
// console.log(`[Dedup] Heavy overlap (>50%). Picking better score.`);
|
|
451
|
+
// Keep the one with higher score, or if equal, the current (first)
|
|
452
|
+
if ((next.score || 0) > (current.score || 0)) {
|
|
453
|
+
current = next;
|
|
454
|
+
}
|
|
455
|
+
continue; // Skip the 'loser'
|
|
456
|
+
}
|
|
457
|
+
merged.push(current);
|
|
458
|
+
current = next;
|
|
459
|
+
}
|
|
460
|
+
else {
|
|
461
|
+
merged.push(current);
|
|
462
|
+
current = next;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
merged.push(current);
|
|
466
|
+
anchors.push(...merged);
|
|
467
|
+
}
|
|
468
|
+
// Final Safety Net: Global Content Similarity Deduplication (O(N^2))
|
|
469
|
+
// Addresses:
|
|
470
|
+
// 1. Cross-Compound Duplicates (different IDs/provenance, same text)
|
|
471
|
+
// 2. Near-Exact Duplicates (whitespace diffs, timestamp diffs)
|
|
472
|
+
// 3. Containment (one result is a subset of another)
|
|
473
|
+
// 4. Overlapping Windows from same compound (NEW FIX)
|
|
474
|
+
const distinctAnchors = [];
|
|
475
|
+
// Sort by score desc to prioritize best matches
|
|
476
|
+
anchors.sort((a, b) => (b.score || 0) - (a.score || 0));
|
|
477
|
+
// Helper for normalization: lowercase + remove non-alphanumeric + unescape JSON
|
|
478
|
+
const normalize = (s) => {
|
|
479
|
+
// First unescape JSON strings (\\\" → ", \\n → newline, etc.)
|
|
480
|
+
let unescaped = s;
|
|
481
|
+
try {
|
|
482
|
+
// Try to unescape common JSON escape sequences
|
|
483
|
+
unescaped = s
|
|
484
|
+
.replace(/\\"/g, '"')
|
|
485
|
+
.replace(/\\\\/g, '\\')
|
|
486
|
+
.replace(/\\n/g, '\n')
|
|
487
|
+
.replace(/\\r/g, '\r')
|
|
488
|
+
.replace(/\\t/g, '\t');
|
|
489
|
+
}
|
|
490
|
+
catch (e) {
|
|
491
|
+
// If unescaping fails, use original
|
|
492
|
+
}
|
|
493
|
+
return unescaped.toLowerCase().replace(/[^a-z0-9]/g, '');
|
|
494
|
+
};
|
|
495
|
+
// Helper for content fingerprinting (hash-based dedup across files)
|
|
496
|
+
const crypto = await import('crypto');
|
|
497
|
+
const contentFingerprints = new Map(); // hash -> kept result
|
|
498
|
+
// Track kept ranges per compound to detect sliding window duplicates
|
|
499
|
+
const keptRanges = new Map();
|
|
500
|
+
for (const candidate of anchors) {
|
|
501
|
+
if (!candidate.content || candidate.content.length < 20) {
|
|
502
|
+
distinctAnchors.push(candidate);
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
505
|
+
// C. Content Fingerprint Deduplication (ACROSS different files)
|
|
506
|
+
// Hash the normalized content to catch duplicates from different compounds
|
|
507
|
+
const candidateNorm = normalize(candidate.content);
|
|
508
|
+
const contentHash = crypto.createHash('md5').update(candidateNorm.substring(0, 500)).digest('hex');
|
|
509
|
+
if (contentFingerprints.has(contentHash)) {
|
|
510
|
+
// This content already exists from another file - skip it
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
contentFingerprints.set(contentHash, candidate);
|
|
514
|
+
// A. Geometric Deduplication (if compound_id is available)
|
|
515
|
+
let isGeometricDuplicate = false;
|
|
516
|
+
if (candidate.compound_id && candidate.start_byte !== undefined && candidate.end_byte !== undefined) {
|
|
517
|
+
const ranges = keptRanges.get(candidate.compound_id) || [];
|
|
518
|
+
for (const range of ranges) {
|
|
519
|
+
// Check overlap - LOWERED threshold from 75% to 50% for aggressive dedup
|
|
520
|
+
const overlapStart = Math.max(candidate.start_byte, range.start);
|
|
521
|
+
const overlapEnd = Math.min(candidate.end_byte, range.end);
|
|
522
|
+
const overlapLen = Math.max(0, overlapEnd - overlapStart);
|
|
523
|
+
const candidateLen = candidate.end_byte - candidate.start_byte;
|
|
524
|
+
const rangeLen = range.end - range.start;
|
|
525
|
+
const minLen = Math.min(candidateLen, rangeLen);
|
|
526
|
+
// If overlap is > 50% of either window, it's a duplicate
|
|
527
|
+
if (overlapLen > 0 && (overlapLen >= minLen * 0.5)) {
|
|
528
|
+
isGeometricDuplicate = true;
|
|
529
|
+
break;
|
|
530
|
+
}
|
|
531
|
+
// Check if windows are adjacent or overlapping (within 500 bytes for molecules)
|
|
532
|
+
// Molecules can be large, so use larger threshold
|
|
533
|
+
const gap = Math.max(0, overlapStart - overlapEnd);
|
|
534
|
+
const adjacencyThreshold = Math.max(500, Math.min(candidateLen, rangeLen) * 0.2);
|
|
535
|
+
if (gap >= 0 && gap < adjacencyThreshold) {
|
|
536
|
+
isGeometricDuplicate = true;
|
|
537
|
+
break;
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
if (isGeometricDuplicate)
|
|
541
|
+
continue;
|
|
542
|
+
}
|
|
543
|
+
// B. Content Deduplication (Fallback)
|
|
544
|
+
const candidateFingerprint = candidateNorm.substring(0, 100);
|
|
545
|
+
let isContentDuplicate = false;
|
|
546
|
+
for (const kept of distinctAnchors) {
|
|
547
|
+
const keptNorm = normalize(kept.content);
|
|
548
|
+
// 1. Exact Containment (Candidate is subset of Kept, or vice-versa)
|
|
549
|
+
if (keptNorm.includes(candidateNorm)) {
|
|
550
|
+
isContentDuplicate = true;
|
|
551
|
+
break;
|
|
552
|
+
}
|
|
553
|
+
if (candidateNorm.includes(keptNorm)) {
|
|
554
|
+
isContentDuplicate = true;
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
// 2. Fuzzy Prefix Match - INCREASED check length to 50 for better matching
|
|
558
|
+
const keptFingerprint = keptNorm.substring(0, 100);
|
|
559
|
+
const checkLen = Math.min(candidateFingerprint.length, keptFingerprint.length);
|
|
560
|
+
if (checkLen > 50 && candidateFingerprint.substring(0, checkLen) === keptFingerprint.substring(0, checkLen)) {
|
|
561
|
+
isContentDuplicate = true;
|
|
562
|
+
break;
|
|
563
|
+
}
|
|
564
|
+
// 3. SimHash Distance Check - Cross-file near-duplicates (NEW)
|
|
565
|
+
// Hamming distance < 5 out of 64 bits = near-duplicate content
|
|
566
|
+
if (candidate.molecular_signature && kept.molecular_signature) {
|
|
567
|
+
const simhashDistance = getHammingDistance(candidate.molecular_signature, kept.molecular_signature);
|
|
568
|
+
if (simhashDistance < 5) {
|
|
569
|
+
isContentDuplicate = true;
|
|
570
|
+
break;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
if (!isContentDuplicate) {
|
|
575
|
+
distinctAnchors.push(candidate);
|
|
576
|
+
// Register range
|
|
577
|
+
if (candidate.compound_id && candidate.start_byte !== undefined && candidate.end_byte !== undefined) {
|
|
578
|
+
const ranges = keptRanges.get(candidate.compound_id) || [];
|
|
579
|
+
ranges.push({ start: candidate.start_byte, end: candidate.end_byte, content: candidate.content });
|
|
580
|
+
keptRanges.set(candidate.compound_id, ranges);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
const originalCount = anchors.length;
|
|
585
|
+
anchors = distinctAnchors;
|
|
586
|
+
console.log(`[Search] Final Dedup: ${originalCount} -> ${anchors.length} items. Removed ${originalCount - anchors.length} duplicates.`);
|
|
587
|
+
console.log(`[Search] Anchors found: ${atomResults.length} Atoms, ${molecules.length} Molecules. Final Unique: ${anchors.length}`);
|
|
588
|
+
}
|
|
589
|
+
catch (e) {
|
|
590
|
+
console.error('[Search] Molecule search failed:', e);
|
|
591
|
+
anchors = atomResults;
|
|
592
|
+
}
|
|
593
|
+
// Intercept: Read content from Mirror (if source_path exists)
|
|
594
|
+
// For atoms without source files (chat history), keep DB content
|
|
595
|
+
const { getMirrorPath } = await import('../mirror/mirror.js');
|
|
596
|
+
const fs = await import('fs');
|
|
597
|
+
// Parallelize mirror reads for performance (non-blocking I/O)
|
|
598
|
+
await Promise.all(anchors.map(async (anchor) => {
|
|
599
|
+
// Skip mirror read if no source_path (chat history atoms)
|
|
600
|
+
if (!anchor.source || anchor.source.trim() === '') {
|
|
601
|
+
return; // Keep DB content
|
|
602
|
+
}
|
|
603
|
+
try {
|
|
604
|
+
// Calculate Mirror Path
|
|
605
|
+
const mirrorPath = getMirrorPath(anchor.source, anchor.provenance);
|
|
606
|
+
// Check if exists and read async
|
|
607
|
+
try {
|
|
608
|
+
const liveContent = await fs.promises.readFile(mirrorPath, 'utf-8');
|
|
609
|
+
if (liveContent && liveContent.length > 0) {
|
|
610
|
+
anchor.content = liveContent;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
catch (err) {
|
|
614
|
+
// Ignore ENOENT (file missing) or other read errors
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
catch (e) {
|
|
618
|
+
// Fail silently -> Keep DB content
|
|
619
|
+
}
|
|
620
|
+
}));
|
|
621
|
+
// === TAG ENRICHMENT: Merge molecule tags with atom tags ===
|
|
622
|
+
// This provides richer contextual associations for LLMs by showing
|
|
623
|
+
// all tags from the parent molecule(s) alongside atom tags
|
|
624
|
+
await enrichAtomsWithMoleculeTags(anchors);
|
|
625
|
+
return anchors;
|
|
626
|
+
}
|
|
627
|
+
catch (e) {
|
|
628
|
+
console.error('[Search] findAnchors failed:', e);
|
|
629
|
+
return [];
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Execute search with Intelligent Expansion and Physics Tag-Walker Protocol (GCP)
|
|
634
|
+
*
|
|
635
|
+
* @param query - Search query string
|
|
636
|
+
* @param buckets - Array of buckets to search
|
|
637
|
+
* @param maxChars - Maximum characters to return
|
|
638
|
+
* @param provenance - Provenance filter (internal/external/quarantine/all)
|
|
639
|
+
* @param explicitTags - Explicit tags to filter by
|
|
640
|
+
* @param filters - Additional filters
|
|
641
|
+
* @param useMaxRecall - If true, uses MAX_RECALL_CONFIG for comprehensive retrieval
|
|
642
|
+
* @param userContext - User context for personalization
|
|
643
|
+
*/
|
|
644
|
+
export async function executeSearch(query, buckets, maxChars = config.SEARCH.max_chars_default, provenance = 'all', explicitTags = [], filters, useMaxRecall = false, userContext) {
|
|
645
|
+
console.log(`[Search] executeSearch (Physics Engine V2) called with provenance: ${provenance}`);
|
|
646
|
+
const startTime = Date.now();
|
|
647
|
+
// Serialize searches — only one at a time to keep peak heap predictable.
|
|
648
|
+
// Concurrent searches on a large corpus (214K+ atoms) double peak memory usage.
|
|
649
|
+
const release = await acquireSearchLock();
|
|
650
|
+
try {
|
|
651
|
+
return await _executeSearchInternal(query, buckets, maxChars, provenance, explicitTags, filters, useMaxRecall, userContext, startTime);
|
|
652
|
+
}
|
|
653
|
+
finally {
|
|
654
|
+
release();
|
|
655
|
+
if (typeof global.gc === 'function')
|
|
656
|
+
global.gc();
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
async function _executeSearchInternal(query, buckets, maxChars = config.SEARCH.max_chars_default, provenance = 'all', explicitTags = [], filters, useMaxRecall = false, userContext, startTime = Date.now()) {
|
|
660
|
+
// Memory-aware throttling: slow down or reject searches based on memory pressure
|
|
661
|
+
const throttleResult = await throttleSearchForMemory();
|
|
662
|
+
if (!throttleResult.proceed) {
|
|
663
|
+
throw new Error(`Search rejected: ${throttleResult.reason}. Please wait and try again.`);
|
|
664
|
+
}
|
|
665
|
+
// Memory pressure check: if heap is already near the limit, downgrade max-recall
|
|
666
|
+
// to standard search to avoid OOM. Trades result depth for stability.
|
|
667
|
+
const heapMB = heapUsedMB();
|
|
668
|
+
const thresholds = getMemoryThresholds();
|
|
669
|
+
if (useMaxRecall && heapMB > thresholds.HEAP_PRESSURE_MB) {
|
|
670
|
+
console.warn(`[Search] Memory pressure detected (${heapMB}MB heap). Downgrading max-recall → standard search.`);
|
|
671
|
+
useMaxRecall = false;
|
|
672
|
+
maxChars = Math.min(maxChars, config.SEARCH.max_chars_default);
|
|
673
|
+
}
|
|
674
|
+
// Check if system is busy with ingestion
|
|
675
|
+
const status = systemStatus.getStatus();
|
|
676
|
+
if (status.isBusy) {
|
|
677
|
+
// Wait for ingestion to finish before running search.
|
|
678
|
+
// Concurrent search+ingestion causes O(N) memory pressure that can exceed the heap limit
|
|
679
|
+
// (e.g. 207K molecules sharing a compound_id → physics walker cross product crashes at 8GB).
|
|
680
|
+
const maxWaitMs = 180_000; // 3 minutes
|
|
681
|
+
const pollMs = 1_000;
|
|
682
|
+
let waited = 0;
|
|
683
|
+
console.log(`[Search] System busy (${status.state}), waiting for idle before proceeding...`);
|
|
684
|
+
while (systemStatus.getStatus().isBusy && waited < maxWaitMs) {
|
|
685
|
+
await new Promise(r => setTimeout(r, pollMs));
|
|
686
|
+
waited += pollMs;
|
|
687
|
+
}
|
|
688
|
+
if (systemStatus.getStatus().isBusy) {
|
|
689
|
+
console.warn(`[Search] System still busy after ${waited}ms, proceeding with risk.`);
|
|
690
|
+
}
|
|
691
|
+
else {
|
|
692
|
+
console.log(`[Search] System became idle after ${waited}ms, proceeding with search.`);
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
// 1. Parse & Prepare
|
|
696
|
+
const cleanQuery = query; // Simplified for now, real NLP parsing happens in findAnchors/query-parser calls if needed
|
|
697
|
+
const realBuckets = new Set(buckets || []);
|
|
698
|
+
if (explicitTags.length > 0)
|
|
699
|
+
console.log(`[Search] Explicit tags: ${explicitTags.join(', ')}`);
|
|
700
|
+
// 2. Find Anchors (Planets)
|
|
701
|
+
// Combine Engram Lookup + FTS + Molecule Search
|
|
702
|
+
const engramIds = await lookupByEngram(cleanQuery);
|
|
703
|
+
const engramResults = await hydrateEngrams(engramIds);
|
|
704
|
+
let primaryAnchors = await findAnchors(cleanQuery, Array.from(realBuckets), explicitTags, maxChars, provenance, filters);
|
|
705
|
+
// Tag-Aware Fallback (if low precision/recall on initial anchors)
|
|
706
|
+
if (primaryAnchors.length < 5) {
|
|
707
|
+
console.log(`[Search] Low recall (${primaryAnchors.length} anchors). Attempting Tag-Aware Fallback.`);
|
|
708
|
+
const words = cleanQuery.split(/[\s,]+/);
|
|
709
|
+
// Very naive tag extraction: words > 4 chars, capitalize or check if exists in a tag format.
|
|
710
|
+
// Usually, users type things like "graph nodes consciousness". We can try to use these as tags via LIKE query.
|
|
711
|
+
const fallbackTags = words.filter(w => w.length > 3).map(w => w.toLowerCase());
|
|
712
|
+
if (fallbackTags.length > 0) {
|
|
713
|
+
// Simple programmatic fallback to explicitly look for these terms in the DB tags
|
|
714
|
+
try {
|
|
715
|
+
for (const fbTag of fallbackTags) {
|
|
716
|
+
// PostgreSQL array search - check if tag exists in array
|
|
717
|
+
const tagRes = await db.run(`
|
|
718
|
+
SELECT id, content, source_path, timestamp, buckets, tags, provenance, simhash, embedding, compound_id, start_byte, end_byte
|
|
719
|
+
FROM atoms
|
|
720
|
+
WHERE $1 = ANY(tags)
|
|
721
|
+
LIMIT 20
|
|
722
|
+
`, [fbTag]);
|
|
723
|
+
if (tagRes.rows && tagRes.rows.length > 0) {
|
|
724
|
+
tagRes.rows.forEach((row) => {
|
|
725
|
+
primaryAnchors.push({
|
|
726
|
+
id: String(row.id),
|
|
727
|
+
content: row.content,
|
|
728
|
+
source: row.source_path,
|
|
729
|
+
timestamp: row.timestamp || Date.now(),
|
|
730
|
+
buckets: typeof row.buckets === 'string' ? JSON.parse(row.buckets) : (row.buckets || []),
|
|
731
|
+
tags: typeof row.tags === 'string' ? JSON.parse(row.tags) : (row.tags || []),
|
|
732
|
+
epochs: '',
|
|
733
|
+
provenance: row.provenance,
|
|
734
|
+
score: 0.8, // fallback constant score
|
|
735
|
+
compound_id: row.compound_id,
|
|
736
|
+
start_byte: row.start_byte,
|
|
737
|
+
end_byte: row.end_byte,
|
|
738
|
+
molecular_signature: String(row.simhash)
|
|
739
|
+
});
|
|
740
|
+
});
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
// Enrich fallback results with molecule tags
|
|
744
|
+
if (primaryAnchors.length > 0) {
|
|
745
|
+
await enrichAtomsWithMoleculeTags(primaryAnchors);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
catch (e) {
|
|
749
|
+
console.warn('[Search] Tag-aware fallback failed', e);
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
const allAnchors = [...engramResults, ...primaryAnchors];
|
|
754
|
+
// Enrich engram results with molecule tags (findAnchors already does this internally)
|
|
755
|
+
if (engramResults.length > 0) {
|
|
756
|
+
await enrichAtomsWithMoleculeTags(engramResults);
|
|
757
|
+
}
|
|
758
|
+
// Deduplicate
|
|
759
|
+
const seenIds = new Set();
|
|
760
|
+
const uniqueAnchors = allAnchors.filter(r => {
|
|
761
|
+
if (seenIds.has(r.id))
|
|
762
|
+
return false;
|
|
763
|
+
seenIds.add(r.id);
|
|
764
|
+
return true;
|
|
765
|
+
});
|
|
766
|
+
// 3. Physics Walker (Moons) - Use TypeScript PhysicsTagWalker
|
|
767
|
+
let walkerResults = [];
|
|
768
|
+
try {
|
|
769
|
+
// Separate real DB IDs from virtual in-memory molecules created by ContextInflator.
|
|
770
|
+
// Virtual IDs (any prefix starting with 'virtual') have no row in atoms/molecules tables.
|
|
771
|
+
// For each virtual anchor, use its compound_id to find the nearest real molecule.
|
|
772
|
+
const realIds = uniqueAnchors
|
|
773
|
+
.map(a => a.id)
|
|
774
|
+
.filter(id => id && id !== '' && !id.startsWith('virtual'));
|
|
775
|
+
// Collect unique compound_ids from virtual anchors so we can resolve them to real mol_* IDs.
|
|
776
|
+
const virtualCompoundIds = [...new Set(uniqueAnchors
|
|
777
|
+
.filter(a => a.id && a.id.startsWith('virtual') && a.compound_id)
|
|
778
|
+
.map(a => a.compound_id))];
|
|
779
|
+
let resolvedMolIds = [];
|
|
780
|
+
if (virtualCompoundIds.length > 0) {
|
|
781
|
+
try {
|
|
782
|
+
const res = await db.run(`SELECT id FROM molecules WHERE compound_id = ANY($1) ORDER BY timestamp DESC LIMIT 100`, [virtualCompoundIds]);
|
|
783
|
+
if (res.rows)
|
|
784
|
+
resolvedMolIds = res.rows.map((r) => String(r.id));
|
|
785
|
+
}
|
|
786
|
+
catch (e) {
|
|
787
|
+
console.warn('[Search] Failed to resolve virtual compound IDs:', e.message);
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
const anchorIds = [...new Set([...realIds, ...resolvedMolIds])];
|
|
791
|
+
// Round-robin by compound_id so the walker sees anchors from diverse source
|
|
792
|
+
// documents rather than 30 IDs all from the same file.
|
|
793
|
+
const diverseAnchorIds = [];
|
|
794
|
+
{
|
|
795
|
+
const byCompound = new Map();
|
|
796
|
+
for (const a of uniqueAnchors) {
|
|
797
|
+
if (!a.id || a.id.startsWith('virtual'))
|
|
798
|
+
continue;
|
|
799
|
+
const cid = a.compound_id || '__unknown__';
|
|
800
|
+
if (!byCompound.has(cid))
|
|
801
|
+
byCompound.set(cid, []);
|
|
802
|
+
byCompound.get(cid).push(a.id);
|
|
803
|
+
}
|
|
804
|
+
// Append resolved mol IDs (from virtual compounds) under their compound bucket
|
|
805
|
+
for (const molId of resolvedMolIds) {
|
|
806
|
+
const cid = '__virtual__';
|
|
807
|
+
if (!byCompound.has(cid))
|
|
808
|
+
byCompound.set(cid, []);
|
|
809
|
+
byCompound.get(cid).push(molId);
|
|
810
|
+
}
|
|
811
|
+
const groups = [...byCompound.values()];
|
|
812
|
+
const maxRound = Math.max(...groups.map(g => g.length));
|
|
813
|
+
for (let i = 0; i < maxRound; i++) {
|
|
814
|
+
for (const group of groups) {
|
|
815
|
+
if (i < group.length)
|
|
816
|
+
diverseAnchorIds.push(group[i]);
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
const dedupedAnchorIds = [...new Set(diverseAnchorIds)];
|
|
821
|
+
if (dedupedAnchorIds.length > 0) {
|
|
822
|
+
// Use TypeScript PhysicsTagWalker for radial inflation
|
|
823
|
+
const walker = new PhysicsTagWalker();
|
|
824
|
+
walkerResults = await walker.performRadialInflation(dedupedAnchorIds, 1, // radius (1 hop)
|
|
825
|
+
useMaxRecall ? 300 : 150, // maxPerHop (results returned; fetches 3x candidates)
|
|
826
|
+
0.2, // temperature
|
|
827
|
+
0.001 // gravityThreshold (lowered from 0.005 for sparser graphs)
|
|
828
|
+
);
|
|
829
|
+
console.log(`[Search] PhysicsTagWalker found ${walkerResults.length} associations`);
|
|
830
|
+
}
|
|
831
|
+
else {
|
|
832
|
+
console.log(`[Search] No valid anchor IDs for Physics Walker`);
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
catch (e) {
|
|
836
|
+
console.log(`[Search] Physics Walker failed, skipping: ${e.message}`);
|
|
837
|
+
walkerResults = [];
|
|
838
|
+
}
|
|
839
|
+
// 4. Graph-Context Serialization (GCP)
|
|
840
|
+
const finalUserContext = {
|
|
841
|
+
name: userContext?.name || 'User',
|
|
842
|
+
current_state: userContext?.current_state || 'active'
|
|
843
|
+
};
|
|
844
|
+
const contextPackage = assembleContextPackage({
|
|
845
|
+
user: finalUserContext,
|
|
846
|
+
query: cleanQuery,
|
|
847
|
+
keyTerms: cleanQuery.split(' '),
|
|
848
|
+
scopeTags: explicitTags,
|
|
849
|
+
anchors: uniqueAnchors,
|
|
850
|
+
walkerResults: walkerResults,
|
|
851
|
+
charBudget: maxChars
|
|
852
|
+
});
|
|
853
|
+
const serializedContext = assembleAndSerialize({
|
|
854
|
+
user: finalUserContext,
|
|
855
|
+
query: cleanQuery,
|
|
856
|
+
keyTerms: cleanQuery.split(' '),
|
|
857
|
+
scopeTags: explicitTags,
|
|
858
|
+
anchors: uniqueAnchors,
|
|
859
|
+
walkerResults: walkerResults,
|
|
860
|
+
charBudget: maxChars
|
|
861
|
+
});
|
|
862
|
+
console.log(`[Search] Search completed in ${Date.now() - startTime}ms`);
|
|
863
|
+
// Map back to SearchResult[] for legacy API compatibility
|
|
864
|
+
// Combine Anchors + Walker Results, sorted by score desc
|
|
865
|
+
const combinedResults = [
|
|
866
|
+
...uniqueAnchors,
|
|
867
|
+
...walkerResults.map(w => ({
|
|
868
|
+
...w.result,
|
|
869
|
+
physics: w.physics
|
|
870
|
+
}))
|
|
871
|
+
];
|
|
872
|
+
// Cap total results fed to formatResults to prevent OOM.
|
|
873
|
+
// 100KB per snippet cap in inflateSnippetFromDisk bounds memory per snippet,
|
|
874
|
+
// but 900+ snippets * 100KB = still huge. Limit by budget: budget / 200 chars minimum
|
|
875
|
+
// gives a rough upper bound on useful snippets.
|
|
876
|
+
const maxResultsForBudget = Math.min(combinedResults.length, Math.max(200, Math.ceil(maxChars / 200)));
|
|
877
|
+
const cappedResults = combinedResults
|
|
878
|
+
.sort((a, b) => (b.score || 0) - (a.score || 0))
|
|
879
|
+
.slice(0, maxResultsForBudget);
|
|
880
|
+
// Apply context provenance formatting with coalescing (Standard 108)
|
|
881
|
+
// Enable coalescing for high-budget queries to improve coherence
|
|
882
|
+
const enableCoalescing = maxChars > 16000; // Only coalesce for budgets > 16k chars
|
|
883
|
+
const proximityThreshold = maxChars > 100000 ? 800 : 500; // Larger threshold for max-recall
|
|
884
|
+
console.log(`[Search] Coalescing: ${enableCoalescing ? 'enabled' : 'disabled'} (threshold: ${proximityThreshold}px)`);
|
|
885
|
+
const formatted = await formatResults(cappedResults, maxChars, {
|
|
886
|
+
enableCoalescing,
|
|
887
|
+
proximityThreshold
|
|
888
|
+
});
|
|
889
|
+
return {
|
|
890
|
+
context: serializedContext,
|
|
891
|
+
results: formatted.results,
|
|
892
|
+
toAgentString: () => serializedContext,
|
|
893
|
+
metadata: { ...contextPackage.graphStats, ...formatted.metadata }
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
/**
|
|
897
|
+
* Execute molecule-based search - splits query into sentence-like chunks and searches each separately
|
|
898
|
+
*/
|
|
899
|
+
export async function executeMoleculeSearch(query, bucket, buckets, maxChars = config.SEARCH.max_chars_default, deep = false, provenance = 'all', explicitTags = [], userContext) {
|
|
900
|
+
// Memory-aware throttling
|
|
901
|
+
const throttleResult = await throttleSearchForMemory();
|
|
902
|
+
if (!throttleResult.proceed) {
|
|
903
|
+
throw new Error(`Search rejected: ${throttleResult.reason}. Please wait and try again.`);
|
|
904
|
+
}
|
|
905
|
+
// Split the query into molecules (sentence-like chunks)
|
|
906
|
+
const molecules = splitQueryIntoMolecules(query);
|
|
907
|
+
console.log(`[MoleculeSearch] Split query into ${molecules.length} molecules:`, molecules);
|
|
908
|
+
// Search each molecule separately
|
|
909
|
+
const allResults = [];
|
|
910
|
+
const includedIds = new Set();
|
|
911
|
+
for (const [index, molecule] of molecules.entries()) {
|
|
912
|
+
console.log(`[MoleculeSearch] Searching molecule ${index + 1}/${molecules.length}: "${molecule}"`);
|
|
913
|
+
try {
|
|
914
|
+
// Execute search for this specific molecule
|
|
915
|
+
const result = await executeSearch(molecule, buckets, maxChars, provenance, explicitTags, undefined, false, userContext);
|
|
916
|
+
// Add unique results to our collection
|
|
917
|
+
for (const item of result.results) {
|
|
918
|
+
if (!includedIds.has(item.id)) {
|
|
919
|
+
allResults.push(item);
|
|
920
|
+
includedIds.add(item.id);
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
catch (error) {
|
|
925
|
+
console.error(`[MoleculeSearch] Error searching molecule:`, molecule, error);
|
|
926
|
+
// Continue with other molecules even if one fails
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
// Sort results by score
|
|
930
|
+
allResults.sort((a, b) => b.score - a.score);
|
|
931
|
+
console.log(`[MoleculeSearch] Combined results from ${molecules.length} molecules: ${allResults.length} total results`);
|
|
932
|
+
return await formatResults(allResults, maxChars); // Use original maxChars to maintain token budget
|
|
933
|
+
}
|
|
934
|
+
/**
|
|
935
|
+
* Traditional FTS fallback
|
|
936
|
+
*/
|
|
937
|
+
export async function runTraditionalSearch(query, buckets) {
|
|
938
|
+
const sanitizedQuery = sanitizeFtsQuery(query);
|
|
939
|
+
if (!sanitizedQuery)
|
|
940
|
+
return [];
|
|
941
|
+
let querySql = `
|
|
942
|
+
SELECT a.id,
|
|
943
|
+
ts_rank(to_tsvector('simple', a.content), plainto_tsquery('simple', $1)) as score,
|
|
944
|
+
a.content, a.source_path as source, a.timestamp,
|
|
945
|
+
a.buckets, a.tags, 'epoch_placeholder' as epochs, a.provenance
|
|
946
|
+
FROM atoms a
|
|
947
|
+
WHERE to_tsvector('simple', a.content) @@ plainto_tsquery('simple', $1)
|
|
948
|
+
`;
|
|
949
|
+
if (buckets.length > 0) {
|
|
950
|
+
querySql += ` AND EXISTS (
|
|
951
|
+
SELECT 1 FROM unnest(a.buckets) as bucket WHERE bucket = ANY($2)
|
|
952
|
+
)`;
|
|
953
|
+
}
|
|
954
|
+
querySql += ` ORDER BY score DESC`;
|
|
955
|
+
try {
|
|
956
|
+
const result = await db.run(querySql, buckets.length > 0 ? [sanitizedQuery, buckets] : [sanitizedQuery]);
|
|
957
|
+
if (!result.rows)
|
|
958
|
+
return [];
|
|
959
|
+
const mappedResults = result.rows.map((row) => ({
|
|
960
|
+
id: row.id,
|
|
961
|
+
score: row.score,
|
|
962
|
+
content: row.content,
|
|
963
|
+
source: row.source,
|
|
964
|
+
timestamp: row.timestamp,
|
|
965
|
+
buckets: row.buckets,
|
|
966
|
+
tags: row.tags,
|
|
967
|
+
epochs: row.epochs,
|
|
968
|
+
provenance: row.provenance
|
|
969
|
+
}));
|
|
970
|
+
await hydrateFromMirror(mappedResults);
|
|
971
|
+
return mappedResults;
|
|
972
|
+
}
|
|
973
|
+
catch (e) {
|
|
974
|
+
console.error('[Search] FTS failed', e);
|
|
975
|
+
return [];
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
/**
|
|
979
|
+
* Helper to hydrate results from Mirror (Code Reuse)
|
|
980
|
+
*/
|
|
981
|
+
async function hydrateFromMirror(results) {
|
|
982
|
+
try {
|
|
983
|
+
const { getMirrorPath } = await import('../mirror/mirror.js');
|
|
984
|
+
const fs = await import('fs');
|
|
985
|
+
await Promise.all(results.map(async (res) => {
|
|
986
|
+
try {
|
|
987
|
+
const mirrorPath = getMirrorPath(res.source, res.provenance);
|
|
988
|
+
try {
|
|
989
|
+
const content = await fs.promises.readFile(mirrorPath, 'utf-8');
|
|
990
|
+
if (content)
|
|
991
|
+
res.content = content;
|
|
992
|
+
}
|
|
993
|
+
catch (err) {
|
|
994
|
+
// ignore file not found
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
catch (e) { /* ignore */ }
|
|
998
|
+
}));
|
|
999
|
+
}
|
|
1000
|
+
catch (e) { /* ignore */ }
|
|
1001
|
+
}
|
|
1002
|
+
/**
|
|
1003
|
+
* Iterative Search with Back-off Strategy
|
|
1004
|
+
* Attempts to retrieve results by progressively simplifying the query.
|
|
1005
|
+
*
|
|
1006
|
+
* @param useMaxRecall - If true, uses MAX_RECALL_CONFIG for comprehensive retrieval
|
|
1007
|
+
*/
|
|
1008
|
+
export async function iterativeSearch(query, buckets = [], maxChars = config.SEARCH.max_chars_default, tags = [], provenance = 'all', useMaxRecall = false, userContext) {
|
|
1009
|
+
// Memory-aware throttling
|
|
1010
|
+
const throttleResult = await throttleSearchForMemory();
|
|
1011
|
+
if (!throttleResult.proceed) {
|
|
1012
|
+
throw new Error(`Search rejected: ${throttleResult.reason}. Please wait and try again.`);
|
|
1013
|
+
}
|
|
1014
|
+
// 0. Extract Scope Tags (Hashtags) to preserve them across strategies
|
|
1015
|
+
// We want to make sure if user typed "#work", it stays even if we strip adjectives.
|
|
1016
|
+
const scopeTags = [...tags];
|
|
1017
|
+
const queryParts = query.split(/\s+/);
|
|
1018
|
+
queryParts.forEach(part => {
|
|
1019
|
+
if (part.startsWith('#'))
|
|
1020
|
+
scopeTags.push(part);
|
|
1021
|
+
});
|
|
1022
|
+
const tagsString = scopeTags.join(' ');
|
|
1023
|
+
// Strategy 1: Standard Expanded Search (All Nouns, Verbs, Dates + Expansion)
|
|
1024
|
+
console.log(`[IterativeSearch] Strategy 1: Standard Execution`);
|
|
1025
|
+
let results = await executeSearch(query, buckets, maxChars, provenance, tags, undefined, useMaxRecall, userContext);
|
|
1026
|
+
if (results.results.length > 0)
|
|
1027
|
+
return { ...results, attempt: 1 };
|
|
1028
|
+
// Strategy 2: Strict "Subjects & Time" (Strip Verbs/Adjectives, keep Nouns + Dates)
|
|
1029
|
+
console.log(`[IterativeSearch] Strategy 2: Strict Nouns/Dates`);
|
|
1030
|
+
const temporalContext = extractTemporalContext(query);
|
|
1031
|
+
const doc = nlp.readDoc(query);
|
|
1032
|
+
const nouns = doc.tokens().filter((t) => {
|
|
1033
|
+
const tag = t.out(nlp.its.pos);
|
|
1034
|
+
return tag === 'NOUN' || tag === 'PROPN';
|
|
1035
|
+
}).out(nlp.its.text);
|
|
1036
|
+
const uniqueTokens = new Set([...nouns, ...temporalContext]);
|
|
1037
|
+
if (uniqueTokens.size > 0) {
|
|
1038
|
+
// Re-inject scope tags
|
|
1039
|
+
const strictQuery = Array.from(uniqueTokens).join(' ') + ' ' + tagsString;
|
|
1040
|
+
console.log(`[IterativeSearch] Fallback Query 1: "${strictQuery.trim()}"`);
|
|
1041
|
+
results = await executeSearch(strictQuery, buckets, maxChars, provenance, tags, undefined, false, userContext);
|
|
1042
|
+
if (results.results.length > 0)
|
|
1043
|
+
return { ...results, attempt: 2 };
|
|
1044
|
+
}
|
|
1045
|
+
// Strategy 3: "Just the Dates" (If query heavily implies time)
|
|
1046
|
+
// Sometimes "2025" is the only anchor we have if keywords fail.
|
|
1047
|
+
// Or maybe just "Proper Nouns" (Entities).
|
|
1048
|
+
const propNouns = doc.tokens().filter((t) => t.out(nlp.its.pos) === 'PROPN').out(nlp.its.text);
|
|
1049
|
+
// Re-inject scope tags
|
|
1050
|
+
const entityQuery = [...new Set([...propNouns, ...temporalContext])].join(' ') + ' ' + tagsString;
|
|
1051
|
+
if (entityQuery.trim().length > 0 && entityQuery.trim() !== (Array.from(uniqueTokens).join(' ') + ' ' + tagsString).trim()) {
|
|
1052
|
+
console.log(`[IterativeSearch] Fallback Query 2: "${entityQuery.trim()}"`);
|
|
1053
|
+
results = await executeSearch(entityQuery, buckets, maxChars, provenance, tags, undefined, false, userContext);
|
|
1054
|
+
if (results.results.length > 0)
|
|
1055
|
+
return { ...results, attempt: 3 };
|
|
1056
|
+
}
|
|
1057
|
+
return { ...results, attempt: 4 }; // Return empty result if all fail
|
|
1058
|
+
}
|
|
1059
|
+
/**
|
|
1060
|
+
* Smart Chat Search (The "Markovian" Context Gatherer)
|
|
1061
|
+
* Logic:
|
|
1062
|
+
* 1. Try standard Iterative Search.
|
|
1063
|
+
* 2. If Recall is Low (< 10 atoms), TRIGGER SPLIT.
|
|
1064
|
+
* 3. Split Query into Top Entities (Alice, Bob, etc.).
|
|
1065
|
+
* 4. Run Parallel Searches for each entity.
|
|
1066
|
+
* 5. Aggregate & Deduplicate.
|
|
1067
|
+
*
|
|
1068
|
+
* @param useMaxRecall - If true, uses MAX_RECALL_CONFIG for comprehensive retrieval
|
|
1069
|
+
*/
|
|
1070
|
+
export async function smartChatSearch(query, buckets = [], maxChars = 20000, tags = [], provenance = 'all', useMaxRecall = false, userContext) {
|
|
1071
|
+
const isLongQuery = query.length > 100;
|
|
1072
|
+
let initial = { results: [], context: '', toAgentString: () => '' };
|
|
1073
|
+
// 1. Initial Attempt (Skip if it's a massive max-recall query to force chunking)
|
|
1074
|
+
if (!isLongQuery || !useMaxRecall) {
|
|
1075
|
+
initial = await iterativeSearch(query, buckets, maxChars, tags, provenance, useMaxRecall, userContext);
|
|
1076
|
+
// If we have enough results, returns immediately
|
|
1077
|
+
if (initial.results.length >= 10 && !useMaxRecall) {
|
|
1078
|
+
return { ...initial, strategy: 'standard' };
|
|
1079
|
+
}
|
|
1080
|
+
// Max-recall initial search already runs with full budget and 1639-atom target —
|
|
1081
|
+
// parallel sub-query split would just run 3 more full-budget searches simultaneously,
|
|
1082
|
+
// tripling memory. Return here.
|
|
1083
|
+
if (useMaxRecall && initial.results.length > 0) {
|
|
1084
|
+
return { ...initial, strategy: 'max-recall' };
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
console.log(`[SmartSearch] Triggering Multi-Query Split...`);
|
|
1088
|
+
// 2. Extract Entities for Split Search
|
|
1089
|
+
let splitQueries = [];
|
|
1090
|
+
if (isLongQuery && useMaxRecall) {
|
|
1091
|
+
// Chunk the query into groups of 3-4 words for massive keyword lists
|
|
1092
|
+
const words = query.split(/\s+/).filter(w => w.length > 2);
|
|
1093
|
+
for (let i = 0; i < words.length; i += 4) {
|
|
1094
|
+
splitQueries.push(words.slice(i, i + 4).join(' '));
|
|
1095
|
+
}
|
|
1096
|
+
// Limit to top 5 chunks to avoid blowing up the DB
|
|
1097
|
+
splitQueries = splitQueries.slice(0, 5);
|
|
1098
|
+
}
|
|
1099
|
+
else {
|
|
1100
|
+
const doc = nlp.readDoc(query);
|
|
1101
|
+
// Get Proper Nouns (Entities) and regular Nouns
|
|
1102
|
+
// We prioritize PROPN (High Value)
|
|
1103
|
+
let entities = [];
|
|
1104
|
+
entities = doc.tokens()
|
|
1105
|
+
.filter((t) => t.out(nlp.its.pos) === 'PROPN')
|
|
1106
|
+
.out(nlp.its.normal, nlp.as.freqTable)
|
|
1107
|
+
.map((e) => e[0])
|
|
1108
|
+
.slice(0, 3); // Top 3 Entities
|
|
1109
|
+
// If no entities, try Nouns
|
|
1110
|
+
if (entities.length === 0) {
|
|
1111
|
+
const nouns = doc.tokens()
|
|
1112
|
+
.filter((t) => t.out(nlp.its.pos) === 'NOUN')
|
|
1113
|
+
.out(nlp.its.normal, nlp.as.freqTable)
|
|
1114
|
+
.map((e) => e[0])
|
|
1115
|
+
.slice(0, 3);
|
|
1116
|
+
entities.push(...nouns);
|
|
1117
|
+
}
|
|
1118
|
+
splitQueries = entities;
|
|
1119
|
+
}
|
|
1120
|
+
if (splitQueries.length === 0) {
|
|
1121
|
+
// No entities to split on, return what we have
|
|
1122
|
+
return { ...initial, strategy: 'shallow', splitQueries: [] };
|
|
1123
|
+
}
|
|
1124
|
+
console.log(`[SmartSearch] Split Entities/Chunks: ${JSON.stringify(splitQueries)}`);
|
|
1125
|
+
// 3. Sequential Execution
|
|
1126
|
+
// Run each split sub-query one at a time to prevent concurrent heap exhaustion.
|
|
1127
|
+
// Parallel Promise.all with max-recall budgets multiplies memory by N sub-queries.
|
|
1128
|
+
const budgetPerQuery = useMaxRecall ? maxChars : Math.floor(maxChars / splitQueries.length);
|
|
1129
|
+
const parallelResults = [];
|
|
1130
|
+
for (const entity of splitQueries) {
|
|
1131
|
+
parallelResults.push(await executeSearch(entity, buckets, budgetPerQuery, provenance, tags, undefined, useMaxRecall, userContext));
|
|
1132
|
+
}
|
|
1133
|
+
// 4. Merge & Deduplicate
|
|
1134
|
+
const mergedMap = new Map();
|
|
1135
|
+
// Add initial results first
|
|
1136
|
+
initial.results.forEach(r => mergedMap.set(r.id, r));
|
|
1137
|
+
// Add split results
|
|
1138
|
+
parallelResults.forEach((res) => {
|
|
1139
|
+
res.results.forEach(r => {
|
|
1140
|
+
if (!mergedMap.has(r.id)) {
|
|
1141
|
+
// Boost score slightly for multi-path discovery?
|
|
1142
|
+
// Or keep as is.
|
|
1143
|
+
mergedMap.set(r.id, r);
|
|
1144
|
+
}
|
|
1145
|
+
});
|
|
1146
|
+
});
|
|
1147
|
+
const mergedResults = Array.from(mergedMap.values());
|
|
1148
|
+
console.log(`[SmartSearch] Merged Total: ${mergedResults.length} atoms.`);
|
|
1149
|
+
// 4.5. Context Inflation — Expand each atom with surrounding context (n-1, n+1)
|
|
1150
|
+
// For max-recall searches, read full context from disk to fill the budget
|
|
1151
|
+
if (useMaxRecall && mergedResults.length > 0) {
|
|
1152
|
+
// Calculate per-atom budget to fill ~90% of total budget
|
|
1153
|
+
const budgetPerAtom = Math.floor(maxChars * 0.9 / mergedResults.length);
|
|
1154
|
+
console.log(`[SmartSearch] Inflating ${mergedResults.length} atoms with ${budgetPerAtom} chars each (total budget: ${maxChars})...`);
|
|
1155
|
+
const inflatedResults = await ContextInflator.inflate(mergedResults, maxChars, budgetPerAtom // Dynamic radius based on available budget
|
|
1156
|
+
);
|
|
1157
|
+
// Replace merged results with inflated versions
|
|
1158
|
+
mergedResults.length = 0;
|
|
1159
|
+
mergedResults.push(...inflatedResults);
|
|
1160
|
+
const avgChars = Math.round(inflatedResults.reduce((sum, a) => sum + a.content.length, 0) / inflatedResults.length);
|
|
1161
|
+
console.log(`[SmartSearch] Inflation complete: ${inflatedResults.length} atoms with avg ${avgChars} chars each`);
|
|
1162
|
+
}
|
|
1163
|
+
// 5. Re-Format using GCP (Standard 086)
|
|
1164
|
+
const finalUserContext = {
|
|
1165
|
+
name: userContext?.name || 'User',
|
|
1166
|
+
current_state: userContext?.current_state || 'active'
|
|
1167
|
+
};
|
|
1168
|
+
const serializedContext = assembleAndSerialize({
|
|
1169
|
+
user: finalUserContext,
|
|
1170
|
+
query: query,
|
|
1171
|
+
keyTerms: splitQueries,
|
|
1172
|
+
scopeTags: tags,
|
|
1173
|
+
anchors: mergedResults, // Treat all merged results as anchors for now in this aggregate view
|
|
1174
|
+
walkerResults: [],
|
|
1175
|
+
charBudget: maxChars * 1.5
|
|
1176
|
+
});
|
|
1177
|
+
return {
|
|
1178
|
+
context: serializedContext,
|
|
1179
|
+
results: mergedResults,
|
|
1180
|
+
toAgentString: () => serializedContext,
|
|
1181
|
+
strategy: 'split_merge',
|
|
1182
|
+
splitQueries: splitQueries,
|
|
1183
|
+
metadata: { strategy: 'split_merge' }
|
|
1184
|
+
};
|
|
1185
|
+
}
|
|
1186
|
+
/**
|
|
1187
|
+
* Cluster SearchResults into KnowledgeClusters for high-density JSON.
|
|
1188
|
+
* Groups by source file and sorts by chronological timestamp.
|
|
1189
|
+
*/
|
|
1190
|
+
export function clusterMolecules(results) {
|
|
1191
|
+
const bySource = new Map();
|
|
1192
|
+
for (const res of results) {
|
|
1193
|
+
const source = res.source || 'unknown';
|
|
1194
|
+
if (!bySource.has(source))
|
|
1195
|
+
bySource.set(source, []);
|
|
1196
|
+
bySource.get(source).push(res);
|
|
1197
|
+
}
|
|
1198
|
+
const clusters = [];
|
|
1199
|
+
for (const [source, mols] of bySource) {
|
|
1200
|
+
// Sort chronologically
|
|
1201
|
+
mols.sort((a, b) => a.timestamp - b.timestamp);
|
|
1202
|
+
let currentGroup = [];
|
|
1203
|
+
for (let i = 0; i < mols.length; i++) {
|
|
1204
|
+
if (i === 0) {
|
|
1205
|
+
currentGroup.push(mols[i]);
|
|
1206
|
+
}
|
|
1207
|
+
else {
|
|
1208
|
+
const gapMs = Math.abs(mols[i].timestamp - mols[i - 1].timestamp);
|
|
1209
|
+
// If > 1 hour gap, split cluster
|
|
1210
|
+
if (gapMs > 60 * 60 * 1000) {
|
|
1211
|
+
clusters.push(createCluster(currentGroup, source));
|
|
1212
|
+
currentGroup = [mols[i]];
|
|
1213
|
+
}
|
|
1214
|
+
else {
|
|
1215
|
+
currentGroup.push(mols[i]);
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
if (currentGroup.length > 0) {
|
|
1220
|
+
clusters.push(createCluster(currentGroup, source));
|
|
1221
|
+
}
|
|
1222
|
+
}
|
|
1223
|
+
return clusters;
|
|
1224
|
+
}
|
|
1225
|
+
function createCluster(mols, source) {
|
|
1226
|
+
const startTs = new Date(mols[0].timestamp).toISOString();
|
|
1227
|
+
const endTs = new Date(mols[mols.length - 1].timestamp).toISOString();
|
|
1228
|
+
// Topic extraction based on tag frequency
|
|
1229
|
+
const tagCounts = new Map();
|
|
1230
|
+
mols.forEach(m => {
|
|
1231
|
+
(m.tags || []).forEach(t => tagCounts.set(t, (tagCounts.get(t) || 0) + 1));
|
|
1232
|
+
});
|
|
1233
|
+
const topTags = Array.from(tagCounts.entries())
|
|
1234
|
+
.sort((a, b) => b[1] - a[1])
|
|
1235
|
+
.slice(0, 3)
|
|
1236
|
+
.map(e => e[0]);
|
|
1237
|
+
const topic = topTags.join(' ');
|
|
1238
|
+
// Transform SearchResult to KnowledgeMolecule
|
|
1239
|
+
const mappedMolecules = mols.map(m => {
|
|
1240
|
+
const people = [];
|
|
1241
|
+
const concepts = [];
|
|
1242
|
+
const projects = [];
|
|
1243
|
+
if (m.tags) {
|
|
1244
|
+
m.tags.forEach(t => {
|
|
1245
|
+
const lower = t.toLowerCase();
|
|
1246
|
+
if (lower.includes('rob') || lower.includes('coda') || lower.includes('oliver')) {
|
|
1247
|
+
people.push(t);
|
|
1248
|
+
}
|
|
1249
|
+
else if (lower.includes('agent') || lower.includes('engine') || lower.includes('project') || lower.includes('anchor')) {
|
|
1250
|
+
projects.push(t);
|
|
1251
|
+
}
|
|
1252
|
+
else if (t.startsWith('#')) {
|
|
1253
|
+
concepts.push(t);
|
|
1254
|
+
}
|
|
1255
|
+
});
|
|
1256
|
+
}
|
|
1257
|
+
return {
|
|
1258
|
+
id: m.id,
|
|
1259
|
+
timestamp: new Date(m.timestamp).toISOString(),
|
|
1260
|
+
speaker: m.provenance || 'unknown',
|
|
1261
|
+
tags: m.tags || [],
|
|
1262
|
+
entities: {
|
|
1263
|
+
people,
|
|
1264
|
+
concepts,
|
|
1265
|
+
projects
|
|
1266
|
+
},
|
|
1267
|
+
content: m.content || '',
|
|
1268
|
+
byte_range: {
|
|
1269
|
+
start: m.start_byte || 0,
|
|
1270
|
+
end: m.end_byte || 0,
|
|
1271
|
+
source: m.source || 'unknown'
|
|
1272
|
+
}
|
|
1273
|
+
};
|
|
1274
|
+
});
|
|
1275
|
+
const safeId = startTs.replace(/[^0-9]/g, '');
|
|
1276
|
+
const basename = source.split(/[/\\]/).pop() || 'unknown';
|
|
1277
|
+
const clusterId = `cluster_${basename}_${safeId}`;
|
|
1278
|
+
return {
|
|
1279
|
+
id: clusterId,
|
|
1280
|
+
start_time: startTs,
|
|
1281
|
+
end_time: endTs,
|
|
1282
|
+
topic: topic,
|
|
1283
|
+
molecules: mappedMolecules
|
|
1284
|
+
};
|
|
1285
|
+
}
|
|
1286
|
+
//# sourceMappingURL=search.js.map
|