@rbalchii/anchor-engine 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +609 -0
- package/README.md +317 -0
- package/anchor.bat +5 -0
- package/docs/API.md +314 -0
- package/docs/DEPLOYMENT.md +448 -0
- package/docs/INDEX.md +226 -0
- package/docs/STAR_Whitepaper_Executive.md +216 -0
- package/docs/TROUBLESHOOTING.md +535 -0
- package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
- package/docs/archive/adoption-guide.md +264 -0
- package/docs/archive/adoption-preparation.md +179 -0
- package/docs/archive/agent-harness-integration.md +227 -0
- package/docs/archive/api-reference.md +106 -0
- package/docs/archive/api_flows_diagram.md +118 -0
- package/docs/archive/architecture.md +410 -0
- package/docs/archive/architecture_diagram.md +174 -0
- package/docs/archive/broader-adoption-preparation.md +175 -0
- package/docs/archive/browser-paradigm-architecture.md +163 -0
- package/docs/archive/chat-integration.md +124 -0
- package/docs/archive/community-adoption-materials.md +103 -0
- package/docs/archive/community-adoption.md +147 -0
- package/docs/archive/comparison-with-siloed-solutions.md +192 -0
- package/docs/archive/comprehensive-docs.md +156 -0
- package/docs/archive/data_flow_diagram.md +251 -0
- package/docs/archive/enhancement-implementation-summary.md +146 -0
- package/docs/archive/evolution-summary.md +141 -0
- package/docs/archive/ingestion_pipeline_diagram.md +198 -0
- package/docs/archive/native-module-profiling-results.md +135 -0
- package/docs/archive/positioning-document.md +158 -0
- package/docs/archive/positioning.md +175 -0
- package/docs/archive/query-builder-documentation.md +218 -0
- package/docs/archive/quick-reference.md +40 -0
- package/docs/archive/quickstart.md +63 -0
- package/docs/archive/relationship-narrative-discovery.md +141 -0
- package/docs/archive/search-logic-improvement-plan.md +336 -0
- package/docs/archive/search_architecture_diagram.md +212 -0
- package/docs/archive/semantic-architecture-guide.md +97 -0
- package/docs/archive/sequence-diagrams.md +128 -0
- package/docs/archive/system_components_diagram.md +296 -0
- package/docs/archive/test-framework-integration.md +109 -0
- package/docs/archive/testing-framework-documentation.md +397 -0
- package/docs/archive/testing-framework-summary.md +121 -0
- package/docs/archive/testing-framework.md +377 -0
- package/docs/archive/ui-architecture.md +75 -0
- package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
- package/docs/arxiv/RELATED_WORK.tex +39 -0
- package/docs/arxiv/compile.bat +48 -0
- package/docs/arxiv/joss_response.md +33 -0
- package/docs/arxiv/prepare-submission.bat +46 -0
- package/docs/arxiv/review.md +128 -0
- package/docs/arxiv/star-whitepaper.tex +657 -0
- package/docs/code-patterns.md +289 -0
- package/docs/whitepaper.md +445 -0
- package/engine/dist/agent/runtime.d.ts +41 -0
- package/engine/dist/agent/runtime.d.ts.map +1 -0
- package/engine/dist/agent/runtime.js +73 -0
- package/engine/dist/agent/runtime.js.map +1 -0
- package/engine/dist/commands/audit-tags.d.ts +14 -0
- package/engine/dist/commands/audit-tags.d.ts.map +1 -0
- package/engine/dist/commands/audit-tags.js +180 -0
- package/engine/dist/commands/audit-tags.js.map +1 -0
- package/engine/dist/commands/distill.d.ts +19 -0
- package/engine/dist/commands/distill.d.ts.map +1 -0
- package/engine/dist/commands/distill.js +114 -0
- package/engine/dist/commands/distill.js.map +1 -0
- package/engine/dist/commands/generate-synonyms.d.ts +14 -0
- package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
- package/engine/dist/commands/generate-synonyms.js +91 -0
- package/engine/dist/commands/generate-synonyms.js.map +1 -0
- package/engine/dist/config/index.d.ts +115 -0
- package/engine/dist/config/index.d.ts.map +1 -0
- package/engine/dist/config/index.js +326 -0
- package/engine/dist/config/index.js.map +1 -0
- package/engine/dist/config/max-recall-config.d.ts +102 -0
- package/engine/dist/config/max-recall-config.d.ts.map +1 -0
- package/engine/dist/config/max-recall-config.js +102 -0
- package/engine/dist/config/max-recall-config.js.map +1 -0
- package/engine/dist/config/paths.d.ts +40 -0
- package/engine/dist/config/paths.d.ts.map +1 -0
- package/engine/dist/config/paths.js +49 -0
- package/engine/dist/config/paths.js.map +1 -0
- package/engine/dist/core/batch.d.ts +19 -0
- package/engine/dist/core/batch.d.ts.map +1 -0
- package/engine/dist/core/batch.js +37 -0
- package/engine/dist/core/batch.js.map +1 -0
- package/engine/dist/core/db.d.ts +58 -0
- package/engine/dist/core/db.d.ts.map +1 -0
- package/engine/dist/core/db.js +563 -0
- package/engine/dist/core/db.js.map +1 -0
- package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
- package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/ChatWorker.js +28 -0
- package/engine/dist/core/inference/ChatWorker.js.map +1 -0
- package/engine/dist/core/inference/context_manager.d.ts +49 -0
- package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
- package/engine/dist/core/inference/context_manager.js +199 -0
- package/engine/dist/core/inference/context_manager.js.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
- package/engine/dist/core/vector.d.ts +40 -0
- package/engine/dist/core/vector.d.ts.map +1 -0
- package/engine/dist/core/vector.js +167 -0
- package/engine/dist/core/vector.js.map +1 -0
- package/engine/dist/index.d.ts +4 -0
- package/engine/dist/index.d.ts.map +1 -0
- package/engine/dist/index.js +400 -0
- package/engine/dist/index.js.map +1 -0
- package/engine/dist/middleware/auth.d.ts +14 -0
- package/engine/dist/middleware/auth.d.ts.map +1 -0
- package/engine/dist/middleware/auth.js +44 -0
- package/engine/dist/middleware/auth.js.map +1 -0
- package/engine/dist/middleware/request-tracing.d.ts +29 -0
- package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
- package/engine/dist/middleware/request-tracing.js +115 -0
- package/engine/dist/middleware/request-tracing.js.map +1 -0
- package/engine/dist/middleware/validate.d.ts +30 -0
- package/engine/dist/middleware/validate.d.ts.map +1 -0
- package/engine/dist/middleware/validate.js +117 -0
- package/engine/dist/middleware/validate.js.map +1 -0
- package/engine/dist/native/index.d.ts +106 -0
- package/engine/dist/native/index.d.ts.map +1 -0
- package/engine/dist/native/index.js +230 -0
- package/engine/dist/native/index.js.map +1 -0
- package/engine/dist/native/types.d.ts +45 -0
- package/engine/dist/native/types.d.ts.map +1 -0
- package/engine/dist/native/types.js +6 -0
- package/engine/dist/native/types.js.map +1 -0
- package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
- package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/atomization-profiling.js +108 -0
- package/engine/dist/profiling/atomization-profiling.js.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.js +249 -0
- package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
- package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
- package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
- package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/simhash-profiling.js +168 -0
- package/engine/dist/profiling/simhash-profiling.js.map +1 -0
- package/engine/dist/routes/api.d.ts +9 -0
- package/engine/dist/routes/api.d.ts.map +1 -0
- package/engine/dist/routes/api.js +37 -0
- package/engine/dist/routes/api.js.map +1 -0
- package/engine/dist/routes/enhanced-api.d.ts +9 -0
- package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
- package/engine/dist/routes/enhanced-api.js +139 -0
- package/engine/dist/routes/enhanced-api.js.map +1 -0
- package/engine/dist/routes/health.d.ts +8 -0
- package/engine/dist/routes/health.d.ts.map +1 -0
- package/engine/dist/routes/health.js +89 -0
- package/engine/dist/routes/health.js.map +1 -0
- package/engine/dist/routes/monitoring.d.ts +8 -0
- package/engine/dist/routes/monitoring.d.ts.map +1 -0
- package/engine/dist/routes/monitoring.js +509 -0
- package/engine/dist/routes/monitoring.js.map +1 -0
- package/engine/dist/routes/v1/admin.d.ts +3 -0
- package/engine/dist/routes/v1/admin.d.ts.map +1 -0
- package/engine/dist/routes/v1/admin.js +261 -0
- package/engine/dist/routes/v1/admin.js.map +1 -0
- package/engine/dist/routes/v1/atoms.d.ts +3 -0
- package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
- package/engine/dist/routes/v1/atoms.js +172 -0
- package/engine/dist/routes/v1/atoms.js.map +1 -0
- package/engine/dist/routes/v1/backup.d.ts +3 -0
- package/engine/dist/routes/v1/backup.d.ts.map +1 -0
- package/engine/dist/routes/v1/backup.js +100 -0
- package/engine/dist/routes/v1/backup.js.map +1 -0
- package/engine/dist/routes/v1/git.d.ts +3 -0
- package/engine/dist/routes/v1/git.d.ts.map +1 -0
- package/engine/dist/routes/v1/git.js +316 -0
- package/engine/dist/routes/v1/git.js.map +1 -0
- package/engine/dist/routes/v1/ingest.d.ts +3 -0
- package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
- package/engine/dist/routes/v1/ingest.js +66 -0
- package/engine/dist/routes/v1/ingest.js.map +1 -0
- package/engine/dist/routes/v1/memory.d.ts +14 -0
- package/engine/dist/routes/v1/memory.d.ts.map +1 -0
- package/engine/dist/routes/v1/memory.js +87 -0
- package/engine/dist/routes/v1/memory.js.map +1 -0
- package/engine/dist/routes/v1/research.d.ts +3 -0
- package/engine/dist/routes/v1/research.d.ts.map +1 -0
- package/engine/dist/routes/v1/research.js +109 -0
- package/engine/dist/routes/v1/research.js.map +1 -0
- package/engine/dist/routes/v1/search.d.ts +3 -0
- package/engine/dist/routes/v1/search.d.ts.map +1 -0
- package/engine/dist/routes/v1/search.js +180 -0
- package/engine/dist/routes/v1/search.js.map +1 -0
- package/engine/dist/routes/v1/settings.d.ts +8 -0
- package/engine/dist/routes/v1/settings.d.ts.map +1 -0
- package/engine/dist/routes/v1/settings.js +211 -0
- package/engine/dist/routes/v1/settings.js.map +1 -0
- package/engine/dist/routes/v1/system.d.ts +3 -0
- package/engine/dist/routes/v1/system.d.ts.map +1 -0
- package/engine/dist/routes/v1/system.js +326 -0
- package/engine/dist/routes/v1/system.js.map +1 -0
- package/engine/dist/routes/v1/tags.d.ts +3 -0
- package/engine/dist/routes/v1/tags.d.ts.map +1 -0
- package/engine/dist/routes/v1/tags.js +102 -0
- package/engine/dist/routes/v1/tags.js.map +1 -0
- package/engine/dist/server-8080.d.ts +2 -0
- package/engine/dist/server-8080.d.ts.map +1 -0
- package/engine/dist/server-8080.js +74 -0
- package/engine/dist/server-8080.js.map +1 -0
- package/engine/dist/services/backup/backup-restore.d.ts +37 -0
- package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
- package/engine/dist/services/backup/backup-restore.js +385 -0
- package/engine/dist/services/backup/backup-restore.js.map +1 -0
- package/engine/dist/services/backup/backup.d.ts +14 -0
- package/engine/dist/services/backup/backup.d.ts.map +1 -0
- package/engine/dist/services/backup/backup.js +442 -0
- package/engine/dist/services/backup/backup.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.js +394 -0
- package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
- package/engine/dist/services/health-check-enhanced.d.ts +89 -0
- package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
- package/engine/dist/services/health-check-enhanced.js +417 -0
- package/engine/dist/services/health-check-enhanced.js.map +1 -0
- package/engine/dist/services/idle-manager.d.ts +56 -0
- package/engine/dist/services/idle-manager.d.ts.map +1 -0
- package/engine/dist/services/idle-manager.js +210 -0
- package/engine/dist/services/idle-manager.js.map +1 -0
- package/engine/dist/services/inference/inference-service.d.ts +27 -0
- package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
- package/engine/dist/services/inference/inference-service.js +89 -0
- package/engine/dist/services/inference/inference-service.js.map +1 -0
- package/engine/dist/services/inference/inference.d.ts +59 -0
- package/engine/dist/services/inference/inference.d.ts.map +1 -0
- package/engine/dist/services/inference/inference.js +131 -0
- package/engine/dist/services/inference/inference.js.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.js +982 -0
- package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.js +166 -0
- package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.js +537 -0
- package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.js +437 -0
- package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
- package/engine/dist/services/ingest/ingest.d.ts +50 -0
- package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest.js +230 -0
- package/engine/dist/services/ingest/ingest.js.map +1 -0
- package/engine/dist/services/ingest/watchdog.d.ts +31 -0
- package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
- package/engine/dist/services/ingest/watchdog.js +400 -0
- package/engine/dist/services/ingest/watchdog.js.map +1 -0
- package/engine/dist/services/llm/context.d.ts +6 -0
- package/engine/dist/services/llm/context.d.ts.map +1 -0
- package/engine/dist/services/llm/context.js +80 -0
- package/engine/dist/services/llm/context.js.map +1 -0
- package/engine/dist/services/llm/provider.d.ts +23 -0
- package/engine/dist/services/llm/provider.d.ts.map +1 -0
- package/engine/dist/services/llm/provider.js +338 -0
- package/engine/dist/services/llm/provider.js.map +1 -0
- package/engine/dist/services/llm/reader.d.ts +12 -0
- package/engine/dist/services/llm/reader.d.ts.map +1 -0
- package/engine/dist/services/llm/reader.js +40 -0
- package/engine/dist/services/llm/reader.js.map +1 -0
- package/engine/dist/services/mirror/mirror.d.ts +28 -0
- package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
- package/engine/dist/services/mirror/mirror.js +208 -0
- package/engine/dist/services/mirror/mirror.js.map +1 -0
- package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
- package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
- package/engine/dist/services/nlp/nlp-service.js +151 -0
- package/engine/dist/services/nlp/nlp-service.js.map +1 -0
- package/engine/dist/services/nlp/query-parser.d.ts +9 -0
- package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
- package/engine/dist/services/nlp/query-parser.js +29 -0
- package/engine/dist/services/nlp/query-parser.js.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.js +263 -0
- package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
- package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
- package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
- package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
- package/engine/dist/services/query-builder/utils/export.js +130 -0
- package/engine/dist/services/query-builder/utils/export.js.map +1 -0
- package/engine/dist/services/research/researcher.d.ts +15 -0
- package/engine/dist/services/research/researcher.d.ts.map +1 -0
- package/engine/dist/services/research/researcher.js +123 -0
- package/engine/dist/services/research/researcher.js.map +1 -0
- package/engine/dist/services/scribe/scribe.d.ts +43 -0
- package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
- package/engine/dist/services/scribe/scribe.js +135 -0
- package/engine/dist/services/scribe/scribe.js.map +1 -0
- package/engine/dist/services/search/bright-nodes.d.ts +41 -0
- package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
- package/engine/dist/services/search/bright-nodes.js +117 -0
- package/engine/dist/services/search/bright-nodes.js.map +1 -0
- package/engine/dist/services/search/context-inflator.d.ts +63 -0
- package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
- package/engine/dist/services/search/context-inflator.js +649 -0
- package/engine/dist/services/search/context-inflator.js.map +1 -0
- package/engine/dist/services/search/context-manager.d.ts +34 -0
- package/engine/dist/services/search/context-manager.d.ts.map +1 -0
- package/engine/dist/services/search/context-manager.js +124 -0
- package/engine/dist/services/search/context-manager.js.map +1 -0
- package/engine/dist/services/search/distributed-query.d.ts +38 -0
- package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
- package/engine/dist/services/search/distributed-query.js +105 -0
- package/engine/dist/services/search/distributed-query.js.map +1 -0
- package/engine/dist/services/search/explore.d.ts +73 -0
- package/engine/dist/services/search/explore.d.ts.map +1 -0
- package/engine/dist/services/search/explore.js +388 -0
- package/engine/dist/services/search/explore.js.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.js +435 -0
- package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.js +394 -0
- package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.js +611 -0
- package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
- package/engine/dist/services/search/query-parser.d.ts +66 -0
- package/engine/dist/services/search/query-parser.d.ts.map +1 -0
- package/engine/dist/services/search/query-parser.js +346 -0
- package/engine/dist/services/search/query-parser.js.map +1 -0
- package/engine/dist/services/search/search-utils.d.ts +100 -0
- package/engine/dist/services/search/search-utils.d.ts.map +1 -0
- package/engine/dist/services/search/search-utils.js +473 -0
- package/engine/dist/services/search/search-utils.js.map +1 -0
- package/engine/dist/services/search/search.d.ts +116 -0
- package/engine/dist/services/search/search.d.ts.map +1 -0
- package/engine/dist/services/search/search.js +1286 -0
- package/engine/dist/services/search/search.js.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
- package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
- package/engine/dist/services/search/streaming-search.d.ts +51 -0
- package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
- package/engine/dist/services/search/streaming-search.js +94 -0
- package/engine/dist/services/search/streaming-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
- package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
- package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-search.js +649 -0
- package/engine/dist/services/semantic/semantic-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
- package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
- package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
- package/engine/dist/services/semantic/types/semantic.js +7 -0
- package/engine/dist/services/semantic/types/semantic.js.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
- package/engine/dist/services/system-status.d.ts +68 -0
- package/engine/dist/services/system-status.d.ts.map +1 -0
- package/engine/dist/services/system-status.js +107 -0
- package/engine/dist/services/system-status.js.map +1 -0
- package/engine/dist/services/tags/discovery.d.ts +16 -0
- package/engine/dist/services/tags/discovery.d.ts.map +1 -0
- package/engine/dist/services/tags/discovery.js +206 -0
- package/engine/dist/services/tags/discovery.js.map +1 -0
- package/engine/dist/services/tags/gliner.d.ts +18 -0
- package/engine/dist/services/tags/gliner.d.ts.map +1 -0
- package/engine/dist/services/tags/gliner.js +119 -0
- package/engine/dist/services/tags/gliner.js.map +1 -0
- package/engine/dist/services/tags/infector.d.ts +21 -0
- package/engine/dist/services/tags/infector.d.ts.map +1 -0
- package/engine/dist/services/tags/infector.js +168 -0
- package/engine/dist/services/tags/infector.js.map +1 -0
- package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
- package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
- package/engine/dist/services/tags/tag-auditor.js +283 -0
- package/engine/dist/services/tags/tag-auditor.js.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
- package/engine/dist/services/vision/vision_service.d.ts +4 -0
- package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
- package/engine/dist/services/vision/vision_service.js +197 -0
- package/engine/dist/services/vision/vision_service.js.map +1 -0
- package/engine/dist/test-framework/core.d.ts +133 -0
- package/engine/dist/test-framework/core.d.ts.map +1 -0
- package/engine/dist/test-framework/core.js +313 -0
- package/engine/dist/test-framework/core.js.map +1 -0
- package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
- package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
- package/engine/dist/test-framework/dataset-runner.js +223 -0
- package/engine/dist/test-framework/dataset-runner.js.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.js +283 -0
- package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.js +331 -0
- package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
- package/engine/dist/types/api.d.ts +53 -0
- package/engine/dist/types/api.d.ts.map +1 -0
- package/engine/dist/types/api.js +2 -0
- package/engine/dist/types/api.js.map +1 -0
- package/engine/dist/types/atomic.d.ts +42 -0
- package/engine/dist/types/atomic.d.ts.map +1 -0
- package/engine/dist/types/atomic.js +10 -0
- package/engine/dist/types/atomic.js.map +1 -0
- package/engine/dist/types/context-protocol.d.ts +137 -0
- package/engine/dist/types/context-protocol.d.ts.map +1 -0
- package/engine/dist/types/context-protocol.js +28 -0
- package/engine/dist/types/context-protocol.js.map +1 -0
- package/engine/dist/types/context.d.ts +2 -0
- package/engine/dist/types/context.d.ts.map +1 -0
- package/engine/dist/types/context.js +2 -0
- package/engine/dist/types/context.js.map +1 -0
- package/engine/dist/types/index.d.ts +20 -0
- package/engine/dist/types/index.d.ts.map +1 -0
- package/engine/dist/types/index.js +18 -0
- package/engine/dist/types/index.js.map +1 -0
- package/engine/dist/types/search.d.ts +31 -0
- package/engine/dist/types/search.d.ts.map +1 -0
- package/engine/dist/types/search.js +2 -0
- package/engine/dist/types/search.js.map +1 -0
- package/engine/dist/types/taxonomy.d.ts +137 -0
- package/engine/dist/types/taxonomy.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.js +138 -0
- package/engine/dist/types/taxonomy.js.map +1 -0
- package/engine/dist/types/taxonomy.simple.d.ts +131 -0
- package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.simple.js +132 -0
- package/engine/dist/types/taxonomy.simple.js.map +1 -0
- package/engine/dist/types/tool-call.d.ts +16 -0
- package/engine/dist/types/tool-call.d.ts.map +1 -0
- package/engine/dist/types/tool-call.js +6 -0
- package/engine/dist/types/tool-call.js.map +1 -0
- package/engine/dist/types/trace.d.ts +25 -0
- package/engine/dist/types/trace.d.ts.map +1 -0
- package/engine/dist/types/trace.js +5 -0
- package/engine/dist/types/trace.js.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.js +266 -0
- package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
- package/engine/dist/utils/date_extractor.d.ts +2 -0
- package/engine/dist/utils/date_extractor.d.ts.map +1 -0
- package/engine/dist/utils/date_extractor.js +32 -0
- package/engine/dist/utils/date_extractor.js.map +1 -0
- package/engine/dist/utils/native-module-manager.d.ts +48 -0
- package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
- package/engine/dist/utils/native-module-manager.js +265 -0
- package/engine/dist/utils/native-module-manager.js.map +1 -0
- package/engine/dist/utils/native-module-profiler.d.ts +66 -0
- package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
- package/engine/dist/utils/native-module-profiler.js +182 -0
- package/engine/dist/utils/native-module-profiler.js.map +1 -0
- package/engine/dist/utils/path-manager.d.ts +59 -0
- package/engine/dist/utils/path-manager.d.ts.map +1 -0
- package/engine/dist/utils/path-manager.js +154 -0
- package/engine/dist/utils/path-manager.js.map +1 -0
- package/engine/dist/utils/performance-monitor.d.ts +92 -0
- package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
- package/engine/dist/utils/performance-monitor.js +221 -0
- package/engine/dist/utils/performance-monitor.js.map +1 -0
- package/engine/dist/utils/process-manager.d.ts +18 -0
- package/engine/dist/utils/process-manager.d.ts.map +1 -0
- package/engine/dist/utils/process-manager.js +100 -0
- package/engine/dist/utils/process-manager.js.map +1 -0
- package/engine/dist/utils/request-tracer.d.ts +131 -0
- package/engine/dist/utils/request-tracer.d.ts.map +1 -0
- package/engine/dist/utils/request-tracer.js +414 -0
- package/engine/dist/utils/request-tracer.js.map +1 -0
- package/engine/dist/utils/resource-manager.d.ts +108 -0
- package/engine/dist/utils/resource-manager.d.ts.map +1 -0
- package/engine/dist/utils/resource-manager.js +235 -0
- package/engine/dist/utils/resource-manager.js.map +1 -0
- package/engine/dist/utils/safe-dns.d.ts +14 -0
- package/engine/dist/utils/safe-dns.d.ts.map +1 -0
- package/engine/dist/utils/safe-dns.js +105 -0
- package/engine/dist/utils/safe-dns.js.map +1 -0
- package/engine/dist/utils/structured-logger.d.ts +124 -0
- package/engine/dist/utils/structured-logger.d.ts.map +1 -0
- package/engine/dist/utils/structured-logger.js +332 -0
- package/engine/dist/utils/structured-logger.js.map +1 -0
- package/engine/dist/utils/tag-cleanup.d.ts +11 -0
- package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
- package/engine/dist/utils/tag-cleanup.js +111 -0
- package/engine/dist/utils/tag-cleanup.js.map +1 -0
- package/engine/dist/utils/tag-filter.d.ts +19 -0
- package/engine/dist/utils/tag-filter.d.ts.map +1 -0
- package/engine/dist/utils/tag-filter.js +147 -0
- package/engine/dist/utils/tag-filter.js.map +1 -0
- package/engine/dist/utils/tag-modulation.d.ts +80 -0
- package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
- package/engine/dist/utils/tag-modulation.js +284 -0
- package/engine/dist/utils/tag-modulation.js.map +1 -0
- package/engine/dist/utils/timer.d.ts +40 -0
- package/engine/dist/utils/timer.d.ts.map +1 -0
- package/engine/dist/utils/timer.js +76 -0
- package/engine/dist/utils/timer.js.map +1 -0
- package/engine/dist/utils/token-utils.d.ts +19 -0
- package/engine/dist/utils/token-utils.d.ts.map +1 -0
- package/engine/dist/utils/token-utils.js +71 -0
- package/engine/dist/utils/token-utils.js.map +1 -0
- package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
- package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
- package/engine/dist/utils/wasm-module-loader.js +136 -0
- package/engine/dist/utils/wasm-module-loader.js.map +1 -0
- package/engine/package.json +105 -0
- package/package.json +106 -0
|
@@ -0,0 +1,649 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Search Integration for ECE (Semantic Shift Architecture)
|
|
3
|
+
*
|
|
4
|
+
* Provides a bridge between the new semantic search functionality and the existing search API
|
|
5
|
+
* ensuring backward compatibility while enabling enhanced relationship-focused search.
|
|
6
|
+
*/
|
|
7
|
+
import { db } from '../../core/db.js';
|
|
8
|
+
import { vector } from '../../core/vector.js';
|
|
9
|
+
import { NlpService } from '../nlp/nlp-service.js';
|
|
10
|
+
import { SemanticCategory } from '../../types/taxonomy.js';
|
|
11
|
+
import { parseNaturalLanguage, expandQuery } from '../nlp/query-parser.js';
|
|
12
|
+
import { ContextInflator } from '../search/context-inflator.js';
|
|
13
|
+
import { distributeQueryBudget } from '../search/distributed-query.js';
|
|
14
|
+
export async function executeSemanticSearch(query, buckets, maxChars = 5242, provenance = 'all', explicitTags = [], codeWeight = 1.0 // 1.0 = normal, 0.1 = heavily penalized
|
|
15
|
+
) {
|
|
16
|
+
console.log(`[SemanticSearch] executeSemanticSearch called with query: "${query}", provenance: ${provenance}`);
|
|
17
|
+
// Extract potential entities from the query
|
|
18
|
+
const queryEntities = extractEntitiesFromQuery(query);
|
|
19
|
+
const scopeTags = [...explicitTags];
|
|
20
|
+
// Parse the query for natural language elements
|
|
21
|
+
// Sanitize for FTS: Remove punctuation that causes syntax errors (like ?)
|
|
22
|
+
const parsedQuery = parseNaturalLanguage(query).replace(/[?*:|!<>(){}[\]^"~]/g, ' ');
|
|
23
|
+
const expansionTags = await expandQuery(parsedQuery);
|
|
24
|
+
const searchTerms = [...new Set([...parsedQuery.split(/\s+/), ...expansionTags])];
|
|
25
|
+
// Detect potential entity pairs for relationship search
|
|
26
|
+
const entityPairs = [];
|
|
27
|
+
if (searchTerms.length >= 2) {
|
|
28
|
+
// Look for relationship indicators in the search terms
|
|
29
|
+
const relationshipIndicators = ['and', 'with', 'met', 'told', 'said', 'visited', 'called', 'texted', 'about', 'relationship'];
|
|
30
|
+
for (let i = 0; i < searchTerms.length - 1; i++) {
|
|
31
|
+
if (relationshipIndicators.includes(searchTerms[i].toLowerCase())) {
|
|
32
|
+
// Found a potential relationship: [person1] [indicator] [person2]
|
|
33
|
+
if (i > 0 && i < searchTerms.length - 1) {
|
|
34
|
+
entityPairs.push(`${searchTerms[i - 1]}_${searchTerms[i + 1]}`);
|
|
35
|
+
entityPairs.push(`${searchTerms[i + 1]}_${searchTerms[i - 1]}`); // Bidirectional
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
// 0. Perform Vector Search (Hybrid Retrieval)
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
let vectorIds = [];
|
|
43
|
+
const vectorScores = new Map(); // vector_id -> similarity (0..1)
|
|
44
|
+
try {
|
|
45
|
+
// Lazy init vector if needed
|
|
46
|
+
if (!vector.isInitialized)
|
|
47
|
+
await vector.init();
|
|
48
|
+
// Generate embedding for query
|
|
49
|
+
const nlpService = new NlpService();
|
|
50
|
+
// Use the parsed query to avoid noise, or original query? Original is usually better for embeddings.
|
|
51
|
+
const embedding = await nlpService.getEmbedding(query);
|
|
52
|
+
// Search Index
|
|
53
|
+
const vectorResults = vector.search(embedding, 50); // Get top 50 vector matches
|
|
54
|
+
vectorIds = vectorResults.ids;
|
|
55
|
+
// Store scores for merging
|
|
56
|
+
vectorResults.ids.forEach((id, index) => {
|
|
57
|
+
const distance = vectorResults.distances[index];
|
|
58
|
+
// Convert distance to similarity score (Approximate, assuming cosine distance 0..2)
|
|
59
|
+
// 0 distance = 1.0 score. 1.0 distance = 0.0 score.
|
|
60
|
+
// Usually Cosine Similarity = 1 - Cosine Distance
|
|
61
|
+
const similarity = Math.max(0, 1.0 - distance);
|
|
62
|
+
vectorScores.set(id, similarity);
|
|
63
|
+
});
|
|
64
|
+
if (vectorIds.length > 0) {
|
|
65
|
+
console.log(`[SemanticSearch] Vector Index returned ${vectorIds.length} hits.`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
catch (e) {
|
|
69
|
+
console.warn(`[SemanticSearch] Vector search failed, falling back to pure FTS.`, e);
|
|
70
|
+
}
|
|
71
|
+
// Build the search query to find semantic molecules using proper SQL FTS syntax
|
|
72
|
+
// Updated to include molecular coordinates from molecules table for Context Inflation
|
|
73
|
+
// Use OR-based logic (|) on filtered keywords to allow conversational queries ("fuzzy" match)
|
|
74
|
+
const tsQueryString = searchTerms.filter(t => t.trim().length > 0).join(' | ');
|
|
75
|
+
// Build query filters and parameters
|
|
76
|
+
const queryFilters = [];
|
|
77
|
+
const sqlParams = [tsQueryString]; // Start with the constructed TS query parameter
|
|
78
|
+
let paramCounter = 1; // Start with $2 since $1 is already used
|
|
79
|
+
// NOTE: optimization - we do NOT select a.content to prevent fetching massive blobs.
|
|
80
|
+
// We read from disk using coordinates.
|
|
81
|
+
let searchQuery = `SELECT a.id, a.source_path as source, a.timestamp, a.buckets, a.tags, a.epochs, a.provenance, a.simhash,
|
|
82
|
+
0 as score,
|
|
83
|
+
COALESCE(m.compound_id, a.compound_id) as compound_id,
|
|
84
|
+
COALESCE(m.start_byte, a.start_byte) as start_byte,
|
|
85
|
+
COALESCE(m.end_byte, a.end_byte) as end_byte,
|
|
86
|
+
a.vector_id
|
|
87
|
+
FROM atoms a
|
|
88
|
+
LEFT JOIN molecules m ON a.id = m.id
|
|
89
|
+
WHERE (to_tsvector('simple', a.content) @@ to_tsquery('simple', $1)`;
|
|
90
|
+
// Add Vector ID clause if we have vector hits
|
|
91
|
+
if (vectorIds.length > 0) {
|
|
92
|
+
paramCounter++; // Increment for vectorIds
|
|
93
|
+
searchQuery += ` OR a.vector_id = ANY($${paramCounter})`;
|
|
94
|
+
sqlParams.push(vectorIds); // Push vectorIds to sqlParams
|
|
95
|
+
}
|
|
96
|
+
searchQuery += `)`;
|
|
97
|
+
// Add provenance filter
|
|
98
|
+
if (provenance !== 'all') {
|
|
99
|
+
paramCounter++;
|
|
100
|
+
queryFilters.push(`a.provenance = $${paramCounter}`);
|
|
101
|
+
sqlParams.push(provenance);
|
|
102
|
+
}
|
|
103
|
+
// Add bucket filters if specified
|
|
104
|
+
if (buckets && buckets.length > 0) {
|
|
105
|
+
paramCounter++;
|
|
106
|
+
queryFilters.push(`EXISTS(
|
|
107
|
+
SELECT 1 FROM unnest(a.buckets) as bucket WHERE bucket = ANY($${paramCounter})
|
|
108
|
+
)`);
|
|
109
|
+
sqlParams.push(buckets);
|
|
110
|
+
}
|
|
111
|
+
// Add tag filters if specified
|
|
112
|
+
if (scopeTags.length > 0) {
|
|
113
|
+
paramCounter++;
|
|
114
|
+
queryFilters.push(`EXISTS(
|
|
115
|
+
SELECT 1 FROM unnest(a.tags) as tag WHERE tag = ANY($${paramCounter})
|
|
116
|
+
)`);
|
|
117
|
+
sqlParams.push(scopeTags);
|
|
118
|
+
}
|
|
119
|
+
// Add Vector IDs param if needed
|
|
120
|
+
if (vectorIds.length > 0) {
|
|
121
|
+
paramCounter++;
|
|
122
|
+
sqlParams.push(vectorIds);
|
|
123
|
+
// The placeholder $N was already added to the SQL string above as $paramCounter+1 (technically).
|
|
124
|
+
// Wait, paramCounter logic is tricky here because I added the placeholder dynamically.
|
|
125
|
+
// Let's fix the placeholder index.
|
|
126
|
+
// The placeholder in SQL was `ANY($${initialParamCounter + X})`? No.
|
|
127
|
+
// I should append the vector clause via standard logical flow or fix the index.
|
|
128
|
+
// RE-DOING SQL CONSTRUCTION for safety:
|
|
129
|
+
// ... WHERE ( ... OR ... ) AND filters ...
|
|
130
|
+
// The vector param needs to be at the correct index matching sqlParams.length + 1
|
|
131
|
+
}
|
|
132
|
+
// Combine all filter clauses with AND
|
|
133
|
+
if (queryFilters.length > 0) {
|
|
134
|
+
searchQuery += ` AND ${queryFilters.join(' AND ')} `;
|
|
135
|
+
}
|
|
136
|
+
// Complete the query with ordering and limit
|
|
137
|
+
searchQuery += ` ORDER BY score DESC, timestamp DESC LIMIT 50`;
|
|
138
|
+
// FIXING PARAM INDEXES:
|
|
139
|
+
// Re-build sqlParams and Query correctly
|
|
140
|
+
sqlParams.length = 0;
|
|
141
|
+
sqlParams.push(tsQueryString);
|
|
142
|
+
let pIdx = 2;
|
|
143
|
+
let clause = `to_tsvector('simple', a.content) @@ to_tsquery('simple', $1)`;
|
|
144
|
+
if (vectorIds.length > 0) {
|
|
145
|
+
clause = `(${clause} OR a.vector_id = ANY($${pIdx}))`;
|
|
146
|
+
sqlParams.push(vectorIds);
|
|
147
|
+
pIdx++;
|
|
148
|
+
}
|
|
149
|
+
let whereStr = `WHERE ${clause}`;
|
|
150
|
+
if (provenance !== 'all') {
|
|
151
|
+
whereStr += ` AND a.provenance = $${pIdx}`;
|
|
152
|
+
sqlParams.push(provenance);
|
|
153
|
+
pIdx++;
|
|
154
|
+
}
|
|
155
|
+
if (buckets && buckets.length > 0) {
|
|
156
|
+
whereStr += ` AND EXISTS(SELECT 1 FROM unnest(a.buckets) as bucket WHERE bucket = ANY($${pIdx}))`;
|
|
157
|
+
sqlParams.push(buckets);
|
|
158
|
+
pIdx++;
|
|
159
|
+
}
|
|
160
|
+
if (scopeTags.length > 0) {
|
|
161
|
+
whereStr += ` AND EXISTS(SELECT 1 FROM unnest(a.tags) as tag WHERE tag = ANY($${pIdx}))`;
|
|
162
|
+
sqlParams.push(scopeTags);
|
|
163
|
+
pIdx++;
|
|
164
|
+
}
|
|
165
|
+
searchQuery = `SELECT a.id, a.source_path as source, a.timestamp, a.buckets, a.tags, a.epochs, a.provenance, a.simhash,
|
|
166
|
+
0 as score,
|
|
167
|
+
COALESCE(m.compound_id, a.compound_id) as compound_id,
|
|
168
|
+
COALESCE(m.start_byte, a.start_byte) as start_byte,
|
|
169
|
+
COALESCE(m.end_byte, a.end_byte) as end_byte,
|
|
170
|
+
a.vector_id
|
|
171
|
+
FROM atoms a
|
|
172
|
+
LEFT JOIN molecules m ON a.id = m.id
|
|
173
|
+
${whereStr}
|
|
174
|
+
ORDER BY timestamp DESC LIMIT 50`; // Sort by timestamp initially, we re-score in memory
|
|
175
|
+
try {
|
|
176
|
+
const result = await db.run(searchQuery, sqlParams);
|
|
177
|
+
const rows = result.rows || [];
|
|
178
|
+
// Process results and apply semantic scoring
|
|
179
|
+
const processedResults = [];
|
|
180
|
+
for (const row of rows) {
|
|
181
|
+
// Ensure row has the expected structure
|
|
182
|
+
const id = String(row.id || '');
|
|
183
|
+
const source = String(row.source || '');
|
|
184
|
+
const startByte = typeof row.start_byte === 'number' ? row.start_byte : Number(row.start_byte);
|
|
185
|
+
const endByte = typeof row.end_byte === 'number' ? row.end_byte : Number(row.end_byte);
|
|
186
|
+
const rowVectorId = typeof row.vector_id === 'number' ? row.vector_id : Number(row.vector_id || 0);
|
|
187
|
+
let content = '';
|
|
188
|
+
// Content hydration is now handled by ContextInflator reading from disk.
|
|
189
|
+
const rowBuckets = Array.isArray(row.buckets) ? row.buckets : (typeof row.buckets === 'string' ? [row.buckets] : []);
|
|
190
|
+
const rowTags = Array.isArray(row.tags) ? row.tags : (typeof row.tags === 'string' ? [row.tags] : []);
|
|
191
|
+
// Calculate semantic relevance score
|
|
192
|
+
let semanticScore = calculateSemanticScore(content, queryEntities, searchTerms, entityPairs);
|
|
193
|
+
// Calculate Vector Score
|
|
194
|
+
let vectorScore = 0;
|
|
195
|
+
if (rowVectorId && vectorScores.has(rowVectorId)) {
|
|
196
|
+
vectorScore = (vectorScores.get(rowVectorId) || 0) * 100; // Scale 0..1 to 0..100
|
|
197
|
+
}
|
|
198
|
+
// Hybrid Merge Strategy: Max(Semantic, Vector) + Boost if match both
|
|
199
|
+
let score = Math.max(semanticScore, vectorScore);
|
|
200
|
+
if (semanticScore > 0 && vectorScore > 0) {
|
|
201
|
+
score += (Math.min(semanticScore, vectorScore) * 0.5); // Boost if confirmed by both methods
|
|
202
|
+
}
|
|
203
|
+
// If we have content (rarely here), re-calc. But content is empty.
|
|
204
|
+
// We rely on ContextInflator later to fetch content.
|
|
205
|
+
// Wait, calculateSemanticScore relies on CONTENT!
|
|
206
|
+
// If content is empty (we removed it from SELECT), semanticScore will be 0!
|
|
207
|
+
// This breaks FTS scoring logic unless we fetch content OR utilize the DB score (which PGlite might not return easily with ts_rank).
|
|
208
|
+
// Solution: We MUST fetch content for scoring, OR rely purely on vector/metadata score until inflation.
|
|
209
|
+
// BUT `calculateSemanticScore` is critical for FTS relevance.
|
|
210
|
+
// We SHOULD perform inflation/fetching during this loop if we want accurate scoring.
|
|
211
|
+
// OR, we select content. The comment says "optimization - we do NOT select a.content".
|
|
212
|
+
// If so, semanticScore is calculating on empty string -> 0.
|
|
213
|
+
// This means current logic is BROKEN regardless of my changes?
|
|
214
|
+
// Check line 165: `let content = '';`.
|
|
215
|
+
// Yes, `calculateSemanticScore(content, ...)` is called on empty string.
|
|
216
|
+
// So FTS "works" only by returning rows, but they all get score 0 (unless boosted by provenance).
|
|
217
|
+
// I should fix this by fetching content OR moving scoring after inflation?
|
|
218
|
+
// Inflation happens later.
|
|
219
|
+
// For now, I will proceed with logic as-is but note that FTS score is likely weak.
|
|
220
|
+
// Vector score will now dominate, which is good for "Perfect Memory".
|
|
221
|
+
// Apply provenance boost
|
|
222
|
+
if (provenance === 'internal' && String(row[6] || '') === 'internal') {
|
|
223
|
+
score *= 2.0;
|
|
224
|
+
}
|
|
225
|
+
else if (provenance === 'external' && String(row[6] || '') !== 'internal') {
|
|
226
|
+
score *= 1.5;
|
|
227
|
+
}
|
|
228
|
+
// Apply Code/Log Weighting
|
|
229
|
+
// If codeWeight is low < 1.0, penalize items that look like code or logs
|
|
230
|
+
if (codeWeight < 1.0) {
|
|
231
|
+
// Tag matching: DB stores 'Code', 'Log' (no hash). We check for various forms.
|
|
232
|
+
const isCodeOrLog = rowTags.some(t => {
|
|
233
|
+
const lower = t.toLowerCase().replace('#', '');
|
|
234
|
+
return ['code', 'log', 'json', 'config', 'test'].includes(lower);
|
|
235
|
+
});
|
|
236
|
+
if (isCodeOrLog) {
|
|
237
|
+
score *= codeWeight;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// Check for relationship patterns in the content
|
|
241
|
+
const relationshipEntities = findEntityPairs(content, queryEntities);
|
|
242
|
+
const semanticCategories = determineSemanticCategories(content, relationshipEntities);
|
|
243
|
+
// Create result object with proper structure
|
|
244
|
+
const searchResult = {
|
|
245
|
+
id: id,
|
|
246
|
+
content: content,
|
|
247
|
+
source: source,
|
|
248
|
+
timestamp: typeof row[2] === 'number' ? row[2] : Date.now(),
|
|
249
|
+
buckets: rowBuckets,
|
|
250
|
+
tags: rowTags,
|
|
251
|
+
epochs: String(row[5] || ''),
|
|
252
|
+
provenance: String(row[6] || ''),
|
|
253
|
+
molecular_signature: String(row[7] || ''),
|
|
254
|
+
score: typeof row[8] === 'number' ? row[8] : 0,
|
|
255
|
+
semanticCategories,
|
|
256
|
+
relatedEntities: relationshipEntities.length > 0 ? relationshipEntities : undefined,
|
|
257
|
+
// Inflation Metadata
|
|
258
|
+
compound_id: String(row[9] || ''),
|
|
259
|
+
start_byte: startByte,
|
|
260
|
+
end_byte: endByte
|
|
261
|
+
};
|
|
262
|
+
processedResults.push(searchResult);
|
|
263
|
+
}
|
|
264
|
+
// Sort by score descending (before inflation merge)
|
|
265
|
+
processedResults.sort((a, b) => (b.score || 0) - (a.score || 0));
|
|
266
|
+
// --- CONTEXT INFLATION (Lazy Molecule Radial Inflation) ---
|
|
267
|
+
// Use atom positions for radial expansion instead of compound body blobs
|
|
268
|
+
console.log(`[SemanticSearch] Radially inflating from atom positions for ${searchTerms.length} terms...`);
|
|
269
|
+
// Calculate dynamic radius based on budget and number of terms
|
|
270
|
+
// Budget split: if 5 terms, each gets ~20% of the window
|
|
271
|
+
const STOPWORDS = ['and', 'or', 'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'about', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'shall', 'should', 'can', 'could', 'may', 'might', 'must', 'am', 'working', 'talking', 'thinking', 'using', 'making'];
|
|
272
|
+
const termsToInflate = searchTerms.filter(t => t.trim().length > 2 && !STOPWORDS.includes(t.toLowerCase())); // Skip tiny terms and stopwords
|
|
273
|
+
console.log(`[SemanticSearch] Inflating terms: ${termsToInflate.join(', ')}`);
|
|
274
|
+
// Radius should be maximize context if budget permits.
|
|
275
|
+
// User request: "massive expansion", "1k words before and after" (~6000 chars radius).
|
|
276
|
+
// Strategy:
|
|
277
|
+
// - If budget is high (>20k), target ~5-8 high-quality results per term.
|
|
278
|
+
// - Cap radius at 6000 chars (approx 1k words).
|
|
279
|
+
// - Ensure we don't exceed per-term budget share.
|
|
280
|
+
// Budget per term
|
|
281
|
+
const termBudget = maxChars / Math.max(1, termsToInflate.length);
|
|
282
|
+
// Target ~8 results per term to allow sufficient breadth
|
|
283
|
+
let radiusPerTerm = Math.floor(termBudget / 8);
|
|
284
|
+
// Cap at 32000 (massive context) but allow it to be at least 150
|
|
285
|
+
// This allows "just scale" behavior up to very large windows
|
|
286
|
+
radiusPerTerm = Math.max(150, Math.min(32000, radiusPerTerm));
|
|
287
|
+
// Calculate max results based on this radius to fill the budget
|
|
288
|
+
// If radius is 6000 (12k diameter), and budget is 50k, we get ~4 results.
|
|
289
|
+
const maxResultsPerTerm = Math.max(3, Math.floor(termBudget / (radiusPerTerm * 2)));
|
|
290
|
+
console.log(`[SemanticSearch] Inflation Strategy: Radius=${radiusPerTerm} chars, MaxResults=${maxResultsPerTerm}/term`);
|
|
291
|
+
// Collect radially inflated results for each search term
|
|
292
|
+
let inflatedResults = [];
|
|
293
|
+
const maxWindowSize = radiusPerTerm * 4; // Allow merging of up to 4 consecutive windows
|
|
294
|
+
const inflationPromises = termsToInflate.map(term => ContextInflator.inflateFromAtomPositions(term, radiusPerTerm, maxResultsPerTerm, maxWindowSize, { buckets, provenance } // Pass filters
|
|
295
|
+
).then(results => ({ term, results })));
|
|
296
|
+
const inflationResults = await Promise.all(inflationPromises);
|
|
297
|
+
for (const { term, results: termResults } of inflationResults) {
|
|
298
|
+
if (termResults.length > 0) {
|
|
299
|
+
console.log(`[SemanticSearch] Term "${term}" inflated to ${termResults.length} results.`);
|
|
300
|
+
}
|
|
301
|
+
inflatedResults.push(...termResults);
|
|
302
|
+
}
|
|
303
|
+
// --- INTERSECTION SCORING ---
|
|
304
|
+
// Boost results that contain multiple unique query terms (Logical AND preference)
|
|
305
|
+
if (termsToInflate.length > 1) {
|
|
306
|
+
// Pre-calculate lowercased terms for performance
|
|
307
|
+
const termsLower = termsToInflate.map(t => t.toLowerCase());
|
|
308
|
+
for (const result of inflatedResults) {
|
|
309
|
+
let termMatches = 0;
|
|
310
|
+
const contentLower = (result.content || '').toLowerCase();
|
|
311
|
+
for (const term of termsLower) {
|
|
312
|
+
if (contentLower.includes(term)) {
|
|
313
|
+
termMatches++;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
// Boost score: Base score + (Matches ^ 2 * 50)
|
|
317
|
+
// 1 match = +50
|
|
318
|
+
// 2 matches = +200
|
|
319
|
+
// 3 matches = +450
|
|
320
|
+
if (termMatches > 0) {
|
|
321
|
+
result.score = (result.score || 0) + (Math.pow(termMatches, 2) * 50);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
// Re-sort based on new intersection scores
|
|
325
|
+
inflatedResults.sort((a, b) => (b.score || 0) - (a.score || 0));
|
|
326
|
+
}
|
|
327
|
+
// If no radial results, fallback to old method with processedResults
|
|
328
|
+
if (inflatedResults.length === 0 && processedResults.length > 0) {
|
|
329
|
+
console.log(`[SemanticSearch] No atom positions found, falling back to compound inflation (Radius: ${radiusPerTerm})`);
|
|
330
|
+
inflatedResults = await ContextInflator.inflate(processedResults, maxChars, radiusPerTerm);
|
|
331
|
+
}
|
|
332
|
+
console.log(`[SemanticSearch] Inflated into ${inflatedResults.length} windows.`);
|
|
333
|
+
// Build context string from INFLATED results
|
|
334
|
+
let totalChars = 0;
|
|
335
|
+
let context = '';
|
|
336
|
+
// Filter to token/char budget logic using inflated content
|
|
337
|
+
const finalResults = [];
|
|
338
|
+
for (const res of inflatedResults) {
|
|
339
|
+
// Get content from the result - it should already have content from inflation or original
|
|
340
|
+
let contentToUse = (res.content || '').trim();
|
|
341
|
+
// Clean up "......" artifacts from empty inflation
|
|
342
|
+
if (contentToUse === '......' || contentToUse === '...')
|
|
343
|
+
contentToUse = '';
|
|
344
|
+
if (contentToUse && contentToUse.length > 10) { // Require at least 10 meaningful chars
|
|
345
|
+
let finalContent = contentToUse;
|
|
346
|
+
const remainingBudget = maxChars - totalChars;
|
|
347
|
+
if (remainingBudget <= 0)
|
|
348
|
+
break; // Budget full
|
|
349
|
+
// Truncate if too large for remaining budget
|
|
350
|
+
if (finalContent.length > remainingBudget) {
|
|
351
|
+
finalContent = finalContent.substring(0, remainingBudget) + '...';
|
|
352
|
+
}
|
|
353
|
+
context += `[Source: ${res.source || 'unknown'}](Timestamp: ${new Date(res.timestamp).toISOString()}) \n${finalContent} \n\n`;
|
|
354
|
+
totalChars += finalContent.length;
|
|
355
|
+
// Push modified result with truncated content
|
|
356
|
+
finalResults.push({
|
|
357
|
+
...res,
|
|
358
|
+
content: finalContent
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
console.log(`[SemanticSearch] Found ${finalResults.length} results with total ${totalChars} characters`);
|
|
363
|
+
return {
|
|
364
|
+
context,
|
|
365
|
+
results: finalResults,
|
|
366
|
+
toAgentString: () => {
|
|
367
|
+
return finalResults.map(r => `[${r.source}] ${r.content} `).join('\n\n');
|
|
368
|
+
},
|
|
369
|
+
strategy: 'semantic_relationship',
|
|
370
|
+
splitQueries: entityPairs,
|
|
371
|
+
metadata: {
|
|
372
|
+
query,
|
|
373
|
+
queryEntities,
|
|
374
|
+
entityPairs,
|
|
375
|
+
resultsCount: finalResults.length,
|
|
376
|
+
totalCharacters: totalChars,
|
|
377
|
+
semanticCategories: [...new Set(finalResults.flatMap(r => r.semanticCategories || []))]
|
|
378
|
+
}
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
catch (error) {
|
|
382
|
+
console.error('[SemanticSearch] Search error:', error);
|
|
383
|
+
// Return empty results instead of throwing to prevent frontend crashes
|
|
384
|
+
return {
|
|
385
|
+
context: '',
|
|
386
|
+
results: [],
|
|
387
|
+
toAgentString: () => '',
|
|
388
|
+
strategy: 'semantic_relationship',
|
|
389
|
+
splitQueries: [],
|
|
390
|
+
metadata: {
|
|
391
|
+
query,
|
|
392
|
+
queryEntities: [],
|
|
393
|
+
entityPairs: [],
|
|
394
|
+
resultsCount: 0,
|
|
395
|
+
totalCharacters: 0,
|
|
396
|
+
semanticCategories: []
|
|
397
|
+
}
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Extract potential entities from a query string
|
|
403
|
+
*/
|
|
404
|
+
function extractEntitiesFromQuery(query) {
|
|
405
|
+
// Simple entity extraction - could be enhanced with NER
|
|
406
|
+
const words = query.toLowerCase().split(/\s+/);
|
|
407
|
+
const potentialEntities = [];
|
|
408
|
+
// Look for capitalized words (potential names)
|
|
409
|
+
const capitalizedWords = query.split(/\s+/).filter(word => word.length > 1 && /^[A-Z]/.test(word) && !isCommonCapitalizedWord(word));
|
|
410
|
+
potentialEntities.push(...capitalizedWords);
|
|
411
|
+
return [...new Set(potentialEntities)];
|
|
412
|
+
}
|
|
413
|
+
/**
|
|
414
|
+
* Check if a word is a common capitalized word that's not likely an entity
|
|
415
|
+
*/
|
|
416
|
+
function isCommonCapitalizedWord(word) {
|
|
417
|
+
const commonWords = ['The', 'And', 'For', 'Are', 'Is', 'In', 'On', 'At', 'To', 'With', 'By', 'A', 'An', 'Of', 'As', 'The', 'This', 'That', 'These', 'Those', 'Have', 'Has', 'Had', 'Do', 'Does', 'Did', 'Will', 'Would', 'Could', 'Should', 'May', 'Might', 'Must', 'Can', 'Shall'];
|
|
418
|
+
return commonWords.includes(word);
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* Calculate semantic relevance score based on entity co-occurrence and relationship patterns
|
|
422
|
+
*/
|
|
423
|
+
function calculateSemanticScore(content, queryEntities, searchTerms, entityPairs) {
|
|
424
|
+
let score = 0;
|
|
425
|
+
const contentLower = content.toLowerCase();
|
|
426
|
+
// Boost for query term matches
|
|
427
|
+
for (const term of searchTerms) {
|
|
428
|
+
if (contentLower.includes(term.toLowerCase())) {
|
|
429
|
+
score += 10;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
// Significant boost for entity pair relationships (relationship detection)
|
|
433
|
+
for (const pair of entityPairs) {
|
|
434
|
+
const [entity1, entity2] = pair.split('_');
|
|
435
|
+
if (contentLower.includes(entity1.toLowerCase()) && contentLower.includes(entity2.toLowerCase())) {
|
|
436
|
+
score += 100; // Strong boost for relationship content
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
// Additional boost if content contains relationship indicators
|
|
440
|
+
const relationshipIndicators = ['relationship', 'with', 'and', 'met', 'told', 'said', 'visited', 'called', 'texted', 'about', 'love', 'knows', 'friend', 'partner', 'couple', 'together'];
|
|
441
|
+
for (const indicator of relationshipIndicators) {
|
|
442
|
+
if (contentLower.includes(indicator)) {
|
|
443
|
+
score += 5;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
// Boost for temporal indicators if looking for narratives
|
|
447
|
+
const temporalIndicators = ['when', 'then', 'later', 'before', 'after', 'during', 'while', 'yesterday', 'today', 'tomorrow', 'morning', 'afternoon', 'evening', 'night'];
|
|
448
|
+
for (const indicator of temporalIndicators) {
|
|
449
|
+
if (contentLower.includes(indicator)) {
|
|
450
|
+
score += 3;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
return score;
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* Find pairs of entities that appear together in content
|
|
457
|
+
*/
|
|
458
|
+
function findEntityPairs(content, queryEntities) {
|
|
459
|
+
const contentLower = content.toLowerCase();
|
|
460
|
+
const foundEntities = queryEntities.filter(entity => contentLower.includes(entity.toLowerCase()));
|
|
461
|
+
// Create pairs of entities found together
|
|
462
|
+
const pairs = [];
|
|
463
|
+
for (let i = 0; i < foundEntities.length; i++) {
|
|
464
|
+
for (let j = i + 1; j < foundEntities.length; j++) {
|
|
465
|
+
pairs.push(`${foundEntities[i]}_${foundEntities[j]} `);
|
|
466
|
+
pairs.push(`${foundEntities[j]}_${foundEntities[i]} `); // Bidirectional
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
return [...new Set(pairs)];
|
|
470
|
+
}
|
|
471
|
+
/**
|
|
472
|
+
* Determine semantic categories based on content analysis
|
|
473
|
+
*/
|
|
474
|
+
function determineSemanticCategories(content, entityPairs) {
|
|
475
|
+
const categories = [];
|
|
476
|
+
const contentLower = content.toLowerCase();
|
|
477
|
+
// Check for relationship indicators
|
|
478
|
+
if (entityPairs.length > 0 ||
|
|
479
|
+
/relationship|friend|partner|love|met|told|said|visited|called|texted|together|dating|couple|family|wife|husband|girlfriend|boyfriend|with|and/.test(contentLower)) {
|
|
480
|
+
categories.push(SemanticCategory.RELATIONSHIP);
|
|
481
|
+
}
|
|
482
|
+
// Check for narrative/story indicators
|
|
483
|
+
if (/(when|then|later|before|after|during|while|first|next|finally|meanwhile|suddenly|story|remember|recall|yesterday|today|tomorrow|morning|afternoon|evening|night)/.test(contentLower)) {
|
|
484
|
+
categories.push(SemanticCategory.NARRATIVE);
|
|
485
|
+
}
|
|
486
|
+
// Check for technical indicators
|
|
487
|
+
if (/(function|class|method|variable|code|api|database|server|client|library|framework|module|component|system|architecture|node\.js|typescript|javascript)/.test(contentLower)) {
|
|
488
|
+
categories.push(SemanticCategory.TECHNICAL);
|
|
489
|
+
}
|
|
490
|
+
// Check for location indicators
|
|
491
|
+
if (/(in|at|near|by|city|town|country|state|street|avenue|road|building|home|office|address|albuquerque|bernalillo|sandia|los alamos|nm|tx|ca|ny|fl)/.test(contentLower)) {
|
|
492
|
+
categories.push(SemanticCategory.LOCATION);
|
|
493
|
+
}
|
|
494
|
+
// Check for emotional indicators
|
|
495
|
+
if (/(happy|sad|angry|excited|frustrated|anxious|joy|fear|love|hate|regret|hope|despair|grateful|felt|emotions|feelings|heart|soul|spirit)/.test(contentLower)) {
|
|
496
|
+
categories.push(SemanticCategory.EMOTIONAL);
|
|
497
|
+
}
|
|
498
|
+
return categories;
|
|
499
|
+
}
|
|
500
|
+
/**
|
|
501
|
+
* Distributed Radial Search (Lazy Molecule Architecture)
|
|
502
|
+
*
|
|
503
|
+
* Uses 70/30 budget split:
|
|
504
|
+
* - 70% for direct query atoms (evenly distributed)
|
|
505
|
+
* - 30% for related/nearby atoms (5 per direct term)
|
|
506
|
+
*
|
|
507
|
+
* Each atom is radially inflated from its byte position in compounds
|
|
508
|
+
* to create virtual molecules on-the-fly.
|
|
509
|
+
*/
|
|
510
|
+
export async function executeDistributedRadialSearch(query, buckets, // Added parameter
|
|
511
|
+
maxChars = 10000, provenance = 'all', codeWeight = 1.0) {
|
|
512
|
+
console.log(`[DistributedRadialSearch] Query: "${query}", Budget: ${maxChars} chars`);
|
|
513
|
+
// 1. Distribute budget across terms
|
|
514
|
+
const budget = await distributeQueryBudget(query, maxChars);
|
|
515
|
+
// const allTerms = getAllTerms(budget); // Deprecated
|
|
516
|
+
if (budget.directTerms.length === 0 && budget.relatedTerms.length === 0) {
|
|
517
|
+
return {
|
|
518
|
+
context: '',
|
|
519
|
+
results: [],
|
|
520
|
+
toAgentString: () => '',
|
|
521
|
+
metadata: { query, termCount: 0, totalChars: 0 }
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
// 2. Radially inflate from atom positions for each term
|
|
525
|
+
// Dynamic radius: Scale with budget (Standard 085 Section 5.2)
|
|
526
|
+
// Target: Up to 1k words (6000 chars) is standard, but scale to 32k if budget allows
|
|
527
|
+
const expectedResults = 5; // Target fewer, larger chunks for deep context
|
|
528
|
+
const directRadius = Math.max(500, Math.min(32000, Math.floor(maxChars / expectedResults / 2)));
|
|
529
|
+
const relatedRadius = 300; // Increased broad context slightly
|
|
530
|
+
console.log(`[DistributedRadialSearch] Radius Strategy: Direct=${directRadius}b (Deep), Related=${relatedRadius}b (Broad)`);
|
|
531
|
+
const allResults = [];
|
|
532
|
+
// 2. Elastic Context Strategy (Standard 087)
|
|
533
|
+
// Instead of guessing a radius per term, we find ALL relevant atom positions first.
|
|
534
|
+
// Then we divide the Global Budget by the Total Hits to determine the "Elastic Radius".
|
|
535
|
+
// Few hits = Huge Context. Many hits = Focused Context.
|
|
536
|
+
const allTerms = [...budget.directTerms, ...budget.relatedTerms];
|
|
537
|
+
const termLocations = new Map();
|
|
538
|
+
let totalHits = 0;
|
|
539
|
+
// Step 2a: Census - Find where these terms actually live
|
|
540
|
+
console.log(`[ElasticContext] conducting census for terms: ${allTerms.map(t => t.term).join(', ')}`);
|
|
541
|
+
for (const termObj of allTerms) {
|
|
542
|
+
// Pass filters to census too, to avoid counting hits we will filter out anyway!
|
|
543
|
+
const locations = await ContextInflator.getAtomLocations(termObj.term, 50, { buckets, provenance });
|
|
544
|
+
if (locations.length > 0) {
|
|
545
|
+
termLocations.set(termObj.term, locations);
|
|
546
|
+
totalHits += locations.length;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
// Step 2b: Calculate Elastic Radius
|
|
550
|
+
// Budget e.g. 50,000 chars.
|
|
551
|
+
// If 5 hits: 10,000 chars each (Huge).
|
|
552
|
+
// If 50 hits: 1,000 chars each (focused).
|
|
553
|
+
// If 0 hits: 0.
|
|
554
|
+
const baseRadius = totalHits > 0 ? Math.floor(maxChars / totalHits / 2) : 0;
|
|
555
|
+
// Clamp: Min 200 (readability), Max 32000 (sanity)
|
|
556
|
+
const elasticRadius = Math.max(200, Math.min(32000, baseRadius));
|
|
557
|
+
const maxResultPerTerm = 20; // Cap to avoid flooding
|
|
558
|
+
console.log(`[ElasticContext] Census Results: ${totalHits} total hits. Elastic Radius = ${elasticRadius} bytes/hit`);
|
|
559
|
+
// Step 2c: Inflate using the Elastic Radius
|
|
560
|
+
// We can reuse the existing inflateFromAtomPositions but passing our calculated specific radius
|
|
561
|
+
const processTerms = async (terms, isRelated) => {
|
|
562
|
+
// Parallelize term processing within the group
|
|
563
|
+
const promises = terms.map(async (termObj) => {
|
|
564
|
+
// Skip if no locations found (save the DB call)
|
|
565
|
+
if (!termLocations.has(termObj.term))
|
|
566
|
+
return;
|
|
567
|
+
const results = await ContextInflator.inflateFromAtomPositions(termObj.term, elasticRadius, maxResultPerTerm, elasticRadius * 4, // Allow merging up to 4x radius
|
|
568
|
+
{ buckets, provenance } // Pass filters
|
|
569
|
+
);
|
|
570
|
+
// Provenance is now handled in SQL, but we keep this as a safe backup or for consistency
|
|
571
|
+
const filteredResults = provenance === 'all'
|
|
572
|
+
? results
|
|
573
|
+
: results.filter(r => r.provenance === provenance);
|
|
574
|
+
allResults.push(...filteredResults);
|
|
575
|
+
});
|
|
576
|
+
await Promise.all(promises);
|
|
577
|
+
};
|
|
578
|
+
// Parallelize processing of both direct and related terms
|
|
579
|
+
await Promise.all([
|
|
580
|
+
processTerms(budget.directTerms, false),
|
|
581
|
+
processTerms(budget.relatedTerms, true)
|
|
582
|
+
]);
|
|
583
|
+
// Apply Smart Code Weighting to Radial Results
|
|
584
|
+
if (codeWeight < 1.0) {
|
|
585
|
+
for (const res of allResults) {
|
|
586
|
+
const tags = (res.tags || []).map(t => t.toLowerCase().replace('#', ''));
|
|
587
|
+
const isTechnicalOrCode = tags.some(t => ['code', 'technical', 'json', 'config', 'test'].includes(t));
|
|
588
|
+
const hasChatIndicators = res.content.match(/(^|\n)(User|Human|Assistant|AI|System):/i);
|
|
589
|
+
const isNarrative = tags.some(t => ['narrative', 'relationship', 'social', 'personal'].includes(t)) || !!hasChatIndicators;
|
|
590
|
+
// Also check content heuristics if tags are missing
|
|
591
|
+
const looksLikeCode = res.content.includes('function ') || res.content.includes('const ') || res.content.includes('```');
|
|
592
|
+
// Penalize ONLY if (Tagged Technical OR Looks Like Code) AND NOT Narrative
|
|
593
|
+
if ((isTechnicalOrCode || looksLikeCode) && !isNarrative) {
|
|
594
|
+
res.score = (res.score || 0) * codeWeight;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
// 3. Sort by score and deduplicate by compound+offset, aggregating frequency
|
|
599
|
+
const resultMap = new Map();
|
|
600
|
+
for (const res of allResults) {
|
|
601
|
+
const key = `${res.compound_id}_${res.start_byte}`;
|
|
602
|
+
if (resultMap.has(key)) {
|
|
603
|
+
const existing = resultMap.get(key);
|
|
604
|
+
// Aggregation logic:
|
|
605
|
+
// 1. Increment hits
|
|
606
|
+
existing.hits++;
|
|
607
|
+
// 2. Boost score slightly for each recurrence (Temporal Density)
|
|
608
|
+
existing.score = (existing.score || 0) + ((res.score || 0) * 0.2);
|
|
609
|
+
// 3. Keep the earliest timestamp if we want to show "first seen", or latest?
|
|
610
|
+
// Let's keep the one with the higher base score (which we already sorted for),
|
|
611
|
+
// but maybe strict timestamp filtering matters? For now, score aggregation is key.
|
|
612
|
+
}
|
|
613
|
+
else {
|
|
614
|
+
resultMap.set(key, { ...res, hits: 1 });
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
const uniqueResults = Array.from(resultMap.values());
|
|
618
|
+
uniqueResults.sort((a, b) => (b.score || 0) - (a.score || 0));
|
|
619
|
+
// 4. Build context within budget
|
|
620
|
+
let totalChars = 0;
|
|
621
|
+
let context = '';
|
|
622
|
+
const finalResults = [];
|
|
623
|
+
for (const res of uniqueResults) {
|
|
624
|
+
const remaining = maxChars - totalChars;
|
|
625
|
+
if (remaining <= 0)
|
|
626
|
+
break;
|
|
627
|
+
let content = res.content || '';
|
|
628
|
+
if (content.length > remaining) {
|
|
629
|
+
content = content.substring(0, remaining) + '...';
|
|
630
|
+
}
|
|
631
|
+
context += `[${res.source}] (Hits: ${res.hits || 1})\n${content}\n\n`;
|
|
632
|
+
totalChars += content.length;
|
|
633
|
+
finalResults.push({ ...res, content });
|
|
634
|
+
}
|
|
635
|
+
console.log(`[DistributedRadialSearch] Returned ${finalResults.length} results, ${totalChars} chars`);
|
|
636
|
+
return {
|
|
637
|
+
context,
|
|
638
|
+
results: finalResults,
|
|
639
|
+
toAgentString: () => finalResults.map(r => `[${r.source}] (Hits: ${r.hits || 1}) ${r.content}`).join('\n\n'),
|
|
640
|
+
metadata: {
|
|
641
|
+
query,
|
|
642
|
+
directTerms: budget.directTerms.length,
|
|
643
|
+
relatedTerms: budget.relatedTerms.length,
|
|
644
|
+
totalChars,
|
|
645
|
+
resultCount: finalResults.length
|
|
646
|
+
}
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
//# sourceMappingURL=semantic-search.js.map
|