@rbalchii/anchor-engine 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +609 -0
- package/README.md +317 -0
- package/anchor.bat +5 -0
- package/docs/API.md +314 -0
- package/docs/DEPLOYMENT.md +448 -0
- package/docs/INDEX.md +226 -0
- package/docs/STAR_Whitepaper_Executive.md +216 -0
- package/docs/TROUBLESHOOTING.md +535 -0
- package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
- package/docs/archive/adoption-guide.md +264 -0
- package/docs/archive/adoption-preparation.md +179 -0
- package/docs/archive/agent-harness-integration.md +227 -0
- package/docs/archive/api-reference.md +106 -0
- package/docs/archive/api_flows_diagram.md +118 -0
- package/docs/archive/architecture.md +410 -0
- package/docs/archive/architecture_diagram.md +174 -0
- package/docs/archive/broader-adoption-preparation.md +175 -0
- package/docs/archive/browser-paradigm-architecture.md +163 -0
- package/docs/archive/chat-integration.md +124 -0
- package/docs/archive/community-adoption-materials.md +103 -0
- package/docs/archive/community-adoption.md +147 -0
- package/docs/archive/comparison-with-siloed-solutions.md +192 -0
- package/docs/archive/comprehensive-docs.md +156 -0
- package/docs/archive/data_flow_diagram.md +251 -0
- package/docs/archive/enhancement-implementation-summary.md +146 -0
- package/docs/archive/evolution-summary.md +141 -0
- package/docs/archive/ingestion_pipeline_diagram.md +198 -0
- package/docs/archive/native-module-profiling-results.md +135 -0
- package/docs/archive/positioning-document.md +158 -0
- package/docs/archive/positioning.md +175 -0
- package/docs/archive/query-builder-documentation.md +218 -0
- package/docs/archive/quick-reference.md +40 -0
- package/docs/archive/quickstart.md +63 -0
- package/docs/archive/relationship-narrative-discovery.md +141 -0
- package/docs/archive/search-logic-improvement-plan.md +336 -0
- package/docs/archive/search_architecture_diagram.md +212 -0
- package/docs/archive/semantic-architecture-guide.md +97 -0
- package/docs/archive/sequence-diagrams.md +128 -0
- package/docs/archive/system_components_diagram.md +296 -0
- package/docs/archive/test-framework-integration.md +109 -0
- package/docs/archive/testing-framework-documentation.md +397 -0
- package/docs/archive/testing-framework-summary.md +121 -0
- package/docs/archive/testing-framework.md +377 -0
- package/docs/archive/ui-architecture.md +75 -0
- package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
- package/docs/arxiv/RELATED_WORK.tex +39 -0
- package/docs/arxiv/compile.bat +48 -0
- package/docs/arxiv/joss_response.md +33 -0
- package/docs/arxiv/prepare-submission.bat +46 -0
- package/docs/arxiv/review.md +128 -0
- package/docs/arxiv/star-whitepaper.tex +657 -0
- package/docs/code-patterns.md +289 -0
- package/docs/whitepaper.md +445 -0
- package/engine/dist/agent/runtime.d.ts +41 -0
- package/engine/dist/agent/runtime.d.ts.map +1 -0
- package/engine/dist/agent/runtime.js +73 -0
- package/engine/dist/agent/runtime.js.map +1 -0
- package/engine/dist/commands/audit-tags.d.ts +14 -0
- package/engine/dist/commands/audit-tags.d.ts.map +1 -0
- package/engine/dist/commands/audit-tags.js +180 -0
- package/engine/dist/commands/audit-tags.js.map +1 -0
- package/engine/dist/commands/distill.d.ts +19 -0
- package/engine/dist/commands/distill.d.ts.map +1 -0
- package/engine/dist/commands/distill.js +114 -0
- package/engine/dist/commands/distill.js.map +1 -0
- package/engine/dist/commands/generate-synonyms.d.ts +14 -0
- package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
- package/engine/dist/commands/generate-synonyms.js +91 -0
- package/engine/dist/commands/generate-synonyms.js.map +1 -0
- package/engine/dist/config/index.d.ts +115 -0
- package/engine/dist/config/index.d.ts.map +1 -0
- package/engine/dist/config/index.js +326 -0
- package/engine/dist/config/index.js.map +1 -0
- package/engine/dist/config/max-recall-config.d.ts +102 -0
- package/engine/dist/config/max-recall-config.d.ts.map +1 -0
- package/engine/dist/config/max-recall-config.js +102 -0
- package/engine/dist/config/max-recall-config.js.map +1 -0
- package/engine/dist/config/paths.d.ts +40 -0
- package/engine/dist/config/paths.d.ts.map +1 -0
- package/engine/dist/config/paths.js +49 -0
- package/engine/dist/config/paths.js.map +1 -0
- package/engine/dist/core/batch.d.ts +19 -0
- package/engine/dist/core/batch.d.ts.map +1 -0
- package/engine/dist/core/batch.js +37 -0
- package/engine/dist/core/batch.js.map +1 -0
- package/engine/dist/core/db.d.ts +58 -0
- package/engine/dist/core/db.d.ts.map +1 -0
- package/engine/dist/core/db.js +563 -0
- package/engine/dist/core/db.js.map +1 -0
- package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
- package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/ChatWorker.js +28 -0
- package/engine/dist/core/inference/ChatWorker.js.map +1 -0
- package/engine/dist/core/inference/context_manager.d.ts +49 -0
- package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
- package/engine/dist/core/inference/context_manager.js +199 -0
- package/engine/dist/core/inference/context_manager.js.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
- package/engine/dist/core/vector.d.ts +40 -0
- package/engine/dist/core/vector.d.ts.map +1 -0
- package/engine/dist/core/vector.js +167 -0
- package/engine/dist/core/vector.js.map +1 -0
- package/engine/dist/index.d.ts +4 -0
- package/engine/dist/index.d.ts.map +1 -0
- package/engine/dist/index.js +400 -0
- package/engine/dist/index.js.map +1 -0
- package/engine/dist/middleware/auth.d.ts +14 -0
- package/engine/dist/middleware/auth.d.ts.map +1 -0
- package/engine/dist/middleware/auth.js +44 -0
- package/engine/dist/middleware/auth.js.map +1 -0
- package/engine/dist/middleware/request-tracing.d.ts +29 -0
- package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
- package/engine/dist/middleware/request-tracing.js +115 -0
- package/engine/dist/middleware/request-tracing.js.map +1 -0
- package/engine/dist/middleware/validate.d.ts +30 -0
- package/engine/dist/middleware/validate.d.ts.map +1 -0
- package/engine/dist/middleware/validate.js +117 -0
- package/engine/dist/middleware/validate.js.map +1 -0
- package/engine/dist/native/index.d.ts +106 -0
- package/engine/dist/native/index.d.ts.map +1 -0
- package/engine/dist/native/index.js +230 -0
- package/engine/dist/native/index.js.map +1 -0
- package/engine/dist/native/types.d.ts +45 -0
- package/engine/dist/native/types.d.ts.map +1 -0
- package/engine/dist/native/types.js +6 -0
- package/engine/dist/native/types.js.map +1 -0
- package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
- package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/atomization-profiling.js +108 -0
- package/engine/dist/profiling/atomization-profiling.js.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.js +249 -0
- package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
- package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
- package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
- package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/simhash-profiling.js +168 -0
- package/engine/dist/profiling/simhash-profiling.js.map +1 -0
- package/engine/dist/routes/api.d.ts +9 -0
- package/engine/dist/routes/api.d.ts.map +1 -0
- package/engine/dist/routes/api.js +37 -0
- package/engine/dist/routes/api.js.map +1 -0
- package/engine/dist/routes/enhanced-api.d.ts +9 -0
- package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
- package/engine/dist/routes/enhanced-api.js +139 -0
- package/engine/dist/routes/enhanced-api.js.map +1 -0
- package/engine/dist/routes/health.d.ts +8 -0
- package/engine/dist/routes/health.d.ts.map +1 -0
- package/engine/dist/routes/health.js +89 -0
- package/engine/dist/routes/health.js.map +1 -0
- package/engine/dist/routes/monitoring.d.ts +8 -0
- package/engine/dist/routes/monitoring.d.ts.map +1 -0
- package/engine/dist/routes/monitoring.js +509 -0
- package/engine/dist/routes/monitoring.js.map +1 -0
- package/engine/dist/routes/v1/admin.d.ts +3 -0
- package/engine/dist/routes/v1/admin.d.ts.map +1 -0
- package/engine/dist/routes/v1/admin.js +261 -0
- package/engine/dist/routes/v1/admin.js.map +1 -0
- package/engine/dist/routes/v1/atoms.d.ts +3 -0
- package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
- package/engine/dist/routes/v1/atoms.js +172 -0
- package/engine/dist/routes/v1/atoms.js.map +1 -0
- package/engine/dist/routes/v1/backup.d.ts +3 -0
- package/engine/dist/routes/v1/backup.d.ts.map +1 -0
- package/engine/dist/routes/v1/backup.js +100 -0
- package/engine/dist/routes/v1/backup.js.map +1 -0
- package/engine/dist/routes/v1/git.d.ts +3 -0
- package/engine/dist/routes/v1/git.d.ts.map +1 -0
- package/engine/dist/routes/v1/git.js +316 -0
- package/engine/dist/routes/v1/git.js.map +1 -0
- package/engine/dist/routes/v1/ingest.d.ts +3 -0
- package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
- package/engine/dist/routes/v1/ingest.js +66 -0
- package/engine/dist/routes/v1/ingest.js.map +1 -0
- package/engine/dist/routes/v1/memory.d.ts +14 -0
- package/engine/dist/routes/v1/memory.d.ts.map +1 -0
- package/engine/dist/routes/v1/memory.js +87 -0
- package/engine/dist/routes/v1/memory.js.map +1 -0
- package/engine/dist/routes/v1/research.d.ts +3 -0
- package/engine/dist/routes/v1/research.d.ts.map +1 -0
- package/engine/dist/routes/v1/research.js +109 -0
- package/engine/dist/routes/v1/research.js.map +1 -0
- package/engine/dist/routes/v1/search.d.ts +3 -0
- package/engine/dist/routes/v1/search.d.ts.map +1 -0
- package/engine/dist/routes/v1/search.js +180 -0
- package/engine/dist/routes/v1/search.js.map +1 -0
- package/engine/dist/routes/v1/settings.d.ts +8 -0
- package/engine/dist/routes/v1/settings.d.ts.map +1 -0
- package/engine/dist/routes/v1/settings.js +211 -0
- package/engine/dist/routes/v1/settings.js.map +1 -0
- package/engine/dist/routes/v1/system.d.ts +3 -0
- package/engine/dist/routes/v1/system.d.ts.map +1 -0
- package/engine/dist/routes/v1/system.js +326 -0
- package/engine/dist/routes/v1/system.js.map +1 -0
- package/engine/dist/routes/v1/tags.d.ts +3 -0
- package/engine/dist/routes/v1/tags.d.ts.map +1 -0
- package/engine/dist/routes/v1/tags.js +102 -0
- package/engine/dist/routes/v1/tags.js.map +1 -0
- package/engine/dist/server-8080.d.ts +2 -0
- package/engine/dist/server-8080.d.ts.map +1 -0
- package/engine/dist/server-8080.js +74 -0
- package/engine/dist/server-8080.js.map +1 -0
- package/engine/dist/services/backup/backup-restore.d.ts +37 -0
- package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
- package/engine/dist/services/backup/backup-restore.js +385 -0
- package/engine/dist/services/backup/backup-restore.js.map +1 -0
- package/engine/dist/services/backup/backup.d.ts +14 -0
- package/engine/dist/services/backup/backup.d.ts.map +1 -0
- package/engine/dist/services/backup/backup.js +442 -0
- package/engine/dist/services/backup/backup.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.js +394 -0
- package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
- package/engine/dist/services/health-check-enhanced.d.ts +89 -0
- package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
- package/engine/dist/services/health-check-enhanced.js +417 -0
- package/engine/dist/services/health-check-enhanced.js.map +1 -0
- package/engine/dist/services/idle-manager.d.ts +56 -0
- package/engine/dist/services/idle-manager.d.ts.map +1 -0
- package/engine/dist/services/idle-manager.js +210 -0
- package/engine/dist/services/idle-manager.js.map +1 -0
- package/engine/dist/services/inference/inference-service.d.ts +27 -0
- package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
- package/engine/dist/services/inference/inference-service.js +89 -0
- package/engine/dist/services/inference/inference-service.js.map +1 -0
- package/engine/dist/services/inference/inference.d.ts +59 -0
- package/engine/dist/services/inference/inference.d.ts.map +1 -0
- package/engine/dist/services/inference/inference.js +131 -0
- package/engine/dist/services/inference/inference.js.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.js +982 -0
- package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.js +166 -0
- package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.js +537 -0
- package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.js +437 -0
- package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
- package/engine/dist/services/ingest/ingest.d.ts +50 -0
- package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest.js +230 -0
- package/engine/dist/services/ingest/ingest.js.map +1 -0
- package/engine/dist/services/ingest/watchdog.d.ts +31 -0
- package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
- package/engine/dist/services/ingest/watchdog.js +400 -0
- package/engine/dist/services/ingest/watchdog.js.map +1 -0
- package/engine/dist/services/llm/context.d.ts +6 -0
- package/engine/dist/services/llm/context.d.ts.map +1 -0
- package/engine/dist/services/llm/context.js +80 -0
- package/engine/dist/services/llm/context.js.map +1 -0
- package/engine/dist/services/llm/provider.d.ts +23 -0
- package/engine/dist/services/llm/provider.d.ts.map +1 -0
- package/engine/dist/services/llm/provider.js +338 -0
- package/engine/dist/services/llm/provider.js.map +1 -0
- package/engine/dist/services/llm/reader.d.ts +12 -0
- package/engine/dist/services/llm/reader.d.ts.map +1 -0
- package/engine/dist/services/llm/reader.js +40 -0
- package/engine/dist/services/llm/reader.js.map +1 -0
- package/engine/dist/services/mirror/mirror.d.ts +28 -0
- package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
- package/engine/dist/services/mirror/mirror.js +208 -0
- package/engine/dist/services/mirror/mirror.js.map +1 -0
- package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
- package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
- package/engine/dist/services/nlp/nlp-service.js +151 -0
- package/engine/dist/services/nlp/nlp-service.js.map +1 -0
- package/engine/dist/services/nlp/query-parser.d.ts +9 -0
- package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
- package/engine/dist/services/nlp/query-parser.js +29 -0
- package/engine/dist/services/nlp/query-parser.js.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.js +263 -0
- package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
- package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
- package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
- package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
- package/engine/dist/services/query-builder/utils/export.js +130 -0
- package/engine/dist/services/query-builder/utils/export.js.map +1 -0
- package/engine/dist/services/research/researcher.d.ts +15 -0
- package/engine/dist/services/research/researcher.d.ts.map +1 -0
- package/engine/dist/services/research/researcher.js +123 -0
- package/engine/dist/services/research/researcher.js.map +1 -0
- package/engine/dist/services/scribe/scribe.d.ts +43 -0
- package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
- package/engine/dist/services/scribe/scribe.js +135 -0
- package/engine/dist/services/scribe/scribe.js.map +1 -0
- package/engine/dist/services/search/bright-nodes.d.ts +41 -0
- package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
- package/engine/dist/services/search/bright-nodes.js +117 -0
- package/engine/dist/services/search/bright-nodes.js.map +1 -0
- package/engine/dist/services/search/context-inflator.d.ts +63 -0
- package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
- package/engine/dist/services/search/context-inflator.js +649 -0
- package/engine/dist/services/search/context-inflator.js.map +1 -0
- package/engine/dist/services/search/context-manager.d.ts +34 -0
- package/engine/dist/services/search/context-manager.d.ts.map +1 -0
- package/engine/dist/services/search/context-manager.js +124 -0
- package/engine/dist/services/search/context-manager.js.map +1 -0
- package/engine/dist/services/search/distributed-query.d.ts +38 -0
- package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
- package/engine/dist/services/search/distributed-query.js +105 -0
- package/engine/dist/services/search/distributed-query.js.map +1 -0
- package/engine/dist/services/search/explore.d.ts +73 -0
- package/engine/dist/services/search/explore.d.ts.map +1 -0
- package/engine/dist/services/search/explore.js +388 -0
- package/engine/dist/services/search/explore.js.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.js +435 -0
- package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.js +394 -0
- package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.js +611 -0
- package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
- package/engine/dist/services/search/query-parser.d.ts +66 -0
- package/engine/dist/services/search/query-parser.d.ts.map +1 -0
- package/engine/dist/services/search/query-parser.js +346 -0
- package/engine/dist/services/search/query-parser.js.map +1 -0
- package/engine/dist/services/search/search-utils.d.ts +100 -0
- package/engine/dist/services/search/search-utils.d.ts.map +1 -0
- package/engine/dist/services/search/search-utils.js +473 -0
- package/engine/dist/services/search/search-utils.js.map +1 -0
- package/engine/dist/services/search/search.d.ts +116 -0
- package/engine/dist/services/search/search.d.ts.map +1 -0
- package/engine/dist/services/search/search.js +1286 -0
- package/engine/dist/services/search/search.js.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
- package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
- package/engine/dist/services/search/streaming-search.d.ts +51 -0
- package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
- package/engine/dist/services/search/streaming-search.js +94 -0
- package/engine/dist/services/search/streaming-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
- package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
- package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-search.js +649 -0
- package/engine/dist/services/semantic/semantic-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
- package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
- package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
- package/engine/dist/services/semantic/types/semantic.js +7 -0
- package/engine/dist/services/semantic/types/semantic.js.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
- package/engine/dist/services/system-status.d.ts +68 -0
- package/engine/dist/services/system-status.d.ts.map +1 -0
- package/engine/dist/services/system-status.js +107 -0
- package/engine/dist/services/system-status.js.map +1 -0
- package/engine/dist/services/tags/discovery.d.ts +16 -0
- package/engine/dist/services/tags/discovery.d.ts.map +1 -0
- package/engine/dist/services/tags/discovery.js +206 -0
- package/engine/dist/services/tags/discovery.js.map +1 -0
- package/engine/dist/services/tags/gliner.d.ts +18 -0
- package/engine/dist/services/tags/gliner.d.ts.map +1 -0
- package/engine/dist/services/tags/gliner.js +119 -0
- package/engine/dist/services/tags/gliner.js.map +1 -0
- package/engine/dist/services/tags/infector.d.ts +21 -0
- package/engine/dist/services/tags/infector.d.ts.map +1 -0
- package/engine/dist/services/tags/infector.js +168 -0
- package/engine/dist/services/tags/infector.js.map +1 -0
- package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
- package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
- package/engine/dist/services/tags/tag-auditor.js +283 -0
- package/engine/dist/services/tags/tag-auditor.js.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
- package/engine/dist/services/vision/vision_service.d.ts +4 -0
- package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
- package/engine/dist/services/vision/vision_service.js +197 -0
- package/engine/dist/services/vision/vision_service.js.map +1 -0
- package/engine/dist/test-framework/core.d.ts +133 -0
- package/engine/dist/test-framework/core.d.ts.map +1 -0
- package/engine/dist/test-framework/core.js +313 -0
- package/engine/dist/test-framework/core.js.map +1 -0
- package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
- package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
- package/engine/dist/test-framework/dataset-runner.js +223 -0
- package/engine/dist/test-framework/dataset-runner.js.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.js +283 -0
- package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.js +331 -0
- package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
- package/engine/dist/types/api.d.ts +53 -0
- package/engine/dist/types/api.d.ts.map +1 -0
- package/engine/dist/types/api.js +2 -0
- package/engine/dist/types/api.js.map +1 -0
- package/engine/dist/types/atomic.d.ts +42 -0
- package/engine/dist/types/atomic.d.ts.map +1 -0
- package/engine/dist/types/atomic.js +10 -0
- package/engine/dist/types/atomic.js.map +1 -0
- package/engine/dist/types/context-protocol.d.ts +137 -0
- package/engine/dist/types/context-protocol.d.ts.map +1 -0
- package/engine/dist/types/context-protocol.js +28 -0
- package/engine/dist/types/context-protocol.js.map +1 -0
- package/engine/dist/types/context.d.ts +2 -0
- package/engine/dist/types/context.d.ts.map +1 -0
- package/engine/dist/types/context.js +2 -0
- package/engine/dist/types/context.js.map +1 -0
- package/engine/dist/types/index.d.ts +20 -0
- package/engine/dist/types/index.d.ts.map +1 -0
- package/engine/dist/types/index.js +18 -0
- package/engine/dist/types/index.js.map +1 -0
- package/engine/dist/types/search.d.ts +31 -0
- package/engine/dist/types/search.d.ts.map +1 -0
- package/engine/dist/types/search.js +2 -0
- package/engine/dist/types/search.js.map +1 -0
- package/engine/dist/types/taxonomy.d.ts +137 -0
- package/engine/dist/types/taxonomy.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.js +138 -0
- package/engine/dist/types/taxonomy.js.map +1 -0
- package/engine/dist/types/taxonomy.simple.d.ts +131 -0
- package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.simple.js +132 -0
- package/engine/dist/types/taxonomy.simple.js.map +1 -0
- package/engine/dist/types/tool-call.d.ts +16 -0
- package/engine/dist/types/tool-call.d.ts.map +1 -0
- package/engine/dist/types/tool-call.js +6 -0
- package/engine/dist/types/tool-call.js.map +1 -0
- package/engine/dist/types/trace.d.ts +25 -0
- package/engine/dist/types/trace.d.ts.map +1 -0
- package/engine/dist/types/trace.js +5 -0
- package/engine/dist/types/trace.js.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.js +266 -0
- package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
- package/engine/dist/utils/date_extractor.d.ts +2 -0
- package/engine/dist/utils/date_extractor.d.ts.map +1 -0
- package/engine/dist/utils/date_extractor.js +32 -0
- package/engine/dist/utils/date_extractor.js.map +1 -0
- package/engine/dist/utils/native-module-manager.d.ts +48 -0
- package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
- package/engine/dist/utils/native-module-manager.js +265 -0
- package/engine/dist/utils/native-module-manager.js.map +1 -0
- package/engine/dist/utils/native-module-profiler.d.ts +66 -0
- package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
- package/engine/dist/utils/native-module-profiler.js +182 -0
- package/engine/dist/utils/native-module-profiler.js.map +1 -0
- package/engine/dist/utils/path-manager.d.ts +59 -0
- package/engine/dist/utils/path-manager.d.ts.map +1 -0
- package/engine/dist/utils/path-manager.js +154 -0
- package/engine/dist/utils/path-manager.js.map +1 -0
- package/engine/dist/utils/performance-monitor.d.ts +92 -0
- package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
- package/engine/dist/utils/performance-monitor.js +221 -0
- package/engine/dist/utils/performance-monitor.js.map +1 -0
- package/engine/dist/utils/process-manager.d.ts +18 -0
- package/engine/dist/utils/process-manager.d.ts.map +1 -0
- package/engine/dist/utils/process-manager.js +100 -0
- package/engine/dist/utils/process-manager.js.map +1 -0
- package/engine/dist/utils/request-tracer.d.ts +131 -0
- package/engine/dist/utils/request-tracer.d.ts.map +1 -0
- package/engine/dist/utils/request-tracer.js +414 -0
- package/engine/dist/utils/request-tracer.js.map +1 -0
- package/engine/dist/utils/resource-manager.d.ts +108 -0
- package/engine/dist/utils/resource-manager.d.ts.map +1 -0
- package/engine/dist/utils/resource-manager.js +235 -0
- package/engine/dist/utils/resource-manager.js.map +1 -0
- package/engine/dist/utils/safe-dns.d.ts +14 -0
- package/engine/dist/utils/safe-dns.d.ts.map +1 -0
- package/engine/dist/utils/safe-dns.js +105 -0
- package/engine/dist/utils/safe-dns.js.map +1 -0
- package/engine/dist/utils/structured-logger.d.ts +124 -0
- package/engine/dist/utils/structured-logger.d.ts.map +1 -0
- package/engine/dist/utils/structured-logger.js +332 -0
- package/engine/dist/utils/structured-logger.js.map +1 -0
- package/engine/dist/utils/tag-cleanup.d.ts +11 -0
- package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
- package/engine/dist/utils/tag-cleanup.js +111 -0
- package/engine/dist/utils/tag-cleanup.js.map +1 -0
- package/engine/dist/utils/tag-filter.d.ts +19 -0
- package/engine/dist/utils/tag-filter.d.ts.map +1 -0
- package/engine/dist/utils/tag-filter.js +147 -0
- package/engine/dist/utils/tag-filter.js.map +1 -0
- package/engine/dist/utils/tag-modulation.d.ts +80 -0
- package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
- package/engine/dist/utils/tag-modulation.js +284 -0
- package/engine/dist/utils/tag-modulation.js.map +1 -0
- package/engine/dist/utils/timer.d.ts +40 -0
- package/engine/dist/utils/timer.d.ts.map +1 -0
- package/engine/dist/utils/timer.js +76 -0
- package/engine/dist/utils/timer.js.map +1 -0
- package/engine/dist/utils/token-utils.d.ts +19 -0
- package/engine/dist/utils/token-utils.d.ts.map +1 -0
- package/engine/dist/utils/token-utils.js +71 -0
- package/engine/dist/utils/token-utils.js.map +1 -0
- package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
- package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
- package/engine/dist/utils/wasm-module-loader.js +136 -0
- package/engine/dist/utils/wasm-module-loader.js.map +1 -0
- package/engine/package.json +105 -0
- package/package.json +106 -0
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Ingestion Service for ECE (Semantic Shift Refactor)
|
|
3
|
+
*
|
|
4
|
+
* Replaces the old atomizer with semantic molecule processing
|
|
5
|
+
* that creates high-level semantic tags and atomic entities.
|
|
6
|
+
*/
|
|
7
|
+
import { SemanticMoleculeProcessor } from './semantic-molecule-processor.js';
|
|
8
|
+
import { db } from '../../core/db.js';
|
|
9
|
+
import * as crypto from 'crypto';
|
|
10
|
+
import { Timer } from '../../utils/timer.js';
|
|
11
|
+
export class SemanticIngestionService {
|
|
12
|
+
moleculeProcessor;
|
|
13
|
+
constructor() {
|
|
14
|
+
this.moleculeProcessor = new SemanticMoleculeProcessor();
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Ingest content using the new semantic architecture
|
|
18
|
+
* Creates molecules with high-level semantic tags and atomic entities
|
|
19
|
+
*/
|
|
20
|
+
async ingestContent(content, source, type = 'text', bucket = 'default', buckets = [], tags = [] // These will be high-level semantic categories
|
|
21
|
+
) {
|
|
22
|
+
const timer = new Timer('IngestionService');
|
|
23
|
+
try {
|
|
24
|
+
console.log(`[IngestionService] Starting ingestion for source: ${source}, type: ${type}, length: ${content.length} chars`);
|
|
25
|
+
// Handle legacy single-bucket param
|
|
26
|
+
const allBuckets = bucket ? [...buckets, bucket] : buckets;
|
|
27
|
+
console.log(`[IngestionService] Processing with buckets: [${allBuckets.join(', ')}], tags: [${tags.join(', ')}]`);
|
|
28
|
+
// Ensure explicit metadata tags exist (Fix for missing UI toggles when NER fails)
|
|
29
|
+
// This ensures 'indexTags' never receives an empty list, so buckets are always indexed.
|
|
30
|
+
const metadataTags = [`source:${source}`, `type:${type}`];
|
|
31
|
+
const effectiveTags = [...new Set([...tags, ...metadataTags])];
|
|
32
|
+
console.log(`[IngestionService] Effective tags after adding metadata: [${effectiveTags.join(', ')}]`);
|
|
33
|
+
// Validate content length to prevent oversized atoms
|
|
34
|
+
const MAX_CONTENT_LENGTH = 500 * 1024; // 500KB limit
|
|
35
|
+
if (content.length > MAX_CONTENT_LENGTH) {
|
|
36
|
+
console.warn(`[SemanticIngestionService] Content exceeds maximum length (${content.length} chars), performing automatic chunking...`);
|
|
37
|
+
// Split the content into smaller chunks and process each separately
|
|
38
|
+
timer.log('Starting large content ingestion');
|
|
39
|
+
const result = await this.ingestLargeContent(content, source, type, bucket, buckets, effectiveTags);
|
|
40
|
+
timer.logTotalAndReset(`Completed large content ingestion for ${source}`);
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
timer.log('Starting content splitting');
|
|
44
|
+
// Split content into text chunks (molecules)
|
|
45
|
+
const textChunks = this.splitIntoMolecules(content);
|
|
46
|
+
console.log(`[IngestionService] Content split into ${textChunks.length} chunks`);
|
|
47
|
+
timer.logLap(`Split content into ${textChunks.length} chunks`);
|
|
48
|
+
timer.log('Starting molecule processing');
|
|
49
|
+
// Process each chunk into semantic molecules - OPTIMIZED FOR PARALLEL PROCESSING
|
|
50
|
+
const chunksWithMetadata = textChunks.map((chunk, index) => ({
|
|
51
|
+
content: chunk,
|
|
52
|
+
source: `${source}_chunk_${index}`,
|
|
53
|
+
timestamp: Date.now() + index, // Slightly offset timestamps
|
|
54
|
+
provenance: 'external'
|
|
55
|
+
}));
|
|
56
|
+
console.log(`[IngestionService] Processing ${chunksWithMetadata.length} chunks through molecule processor...`);
|
|
57
|
+
// Process chunks in parallel to reduce serial processing time
|
|
58
|
+
const semanticMolecules = await Promise.all(chunksWithMetadata.map(chunk => this.moleculeProcessor.processTextChunk(chunk.content, chunk.source, chunk.timestamp, chunk.provenance)));
|
|
59
|
+
console.log(`[IngestionService] Processed ${semanticMolecules.length} semantic molecules with a total of ${semanticMolecules.reduce((sum, mol) => sum + mol.containedEntities.length, 0)} atomic entities`);
|
|
60
|
+
timer.logLap(`Processed ${semanticMolecules.length} semantic molecules`);
|
|
61
|
+
// Refactored to use the shared helper method
|
|
62
|
+
const result = await this.saveMoleculesBatched([semanticMolecules], source, type, allBuckets, effectiveTags);
|
|
63
|
+
// Construct the compatible return object
|
|
64
|
+
return {
|
|
65
|
+
status: result.status,
|
|
66
|
+
id: semanticMolecules[0]?.id || 'unknown',
|
|
67
|
+
message: result.message
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
catch (e) {
|
|
71
|
+
console.error('[SemanticIngestionService] Ingest Error:', e);
|
|
72
|
+
return { status: 'error', id: 'unknown', message: e.message };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Helper to validate and save a batch of molecules to the database
|
|
77
|
+
* Handles the transaction, deduplication, and bulk insertion
|
|
78
|
+
*/
|
|
79
|
+
async saveMoleculesBatched(moleculeBatches, source, type, buckets, tags) {
|
|
80
|
+
const timer = new Timer('SaveMoleculesBatched');
|
|
81
|
+
// Flatten the batches for this transaction (or we could process per batch)
|
|
82
|
+
// For ingestContent (single file), it's one batch.
|
|
83
|
+
// For ingestLargeContent, we might call this iteratively.
|
|
84
|
+
const molecules = moleculeBatches.flat();
|
|
85
|
+
if (molecules.length === 0) {
|
|
86
|
+
return { status: 'success', message: 'No molecules to save' };
|
|
87
|
+
}
|
|
88
|
+
// SHARED ZERO VECTOR OPTIMIZATION
|
|
89
|
+
const ZERO_VECTOR_STR = JSON.stringify(new Array(768).fill(0.1));
|
|
90
|
+
const allAtomsToInsert = [];
|
|
91
|
+
// Prepare atoms
|
|
92
|
+
for (const molecule of molecules) {
|
|
93
|
+
// Use the ID from the molecule if it exists (it was generated by the processor)
|
|
94
|
+
// or generate a new one if strictly necessary.
|
|
95
|
+
// The processor should be the source of truth, but the original code overrode it.
|
|
96
|
+
// Let's respect the processor's ID to keep the object consistent.
|
|
97
|
+
const id = molecule.id || `mol_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
98
|
+
const timestamp = molecule.timestamp;
|
|
99
|
+
const hash = crypto.createHash('sha256').update(molecule.content).digest('hex');
|
|
100
|
+
// Prepare molecule atom
|
|
101
|
+
allAtomsToInsert.push({
|
|
102
|
+
id,
|
|
103
|
+
timestamp,
|
|
104
|
+
content: molecule.content,
|
|
105
|
+
source_path: source,
|
|
106
|
+
source_id: source,
|
|
107
|
+
sequence: 0,
|
|
108
|
+
type: type || 'semantic_molecule',
|
|
109
|
+
hash,
|
|
110
|
+
buckets: buckets,
|
|
111
|
+
tags: [...tags, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))],
|
|
112
|
+
epochs: [],
|
|
113
|
+
provenance: molecule.provenance,
|
|
114
|
+
simhash: "0",
|
|
115
|
+
embedding: ZERO_VECTOR_STR
|
|
116
|
+
});
|
|
117
|
+
// Prepare atomic entities
|
|
118
|
+
for (const entity of molecule.containedEntities) {
|
|
119
|
+
const entityHash = crypto.createHash('sha256').update(entity).digest('hex').substring(0, 16);
|
|
120
|
+
const atomId = `atom_${id}_${entityHash}`;
|
|
121
|
+
const atomHash = crypto.createHash('sha256').update(entity).digest('hex');
|
|
122
|
+
// Truncate entity tag
|
|
123
|
+
const entityTagRaw = `entity:${entity.toLowerCase()}`;
|
|
124
|
+
const entityTag = entityTagRaw.length > 255 ? entityTagRaw.substring(0, 255) : entityTagRaw;
|
|
125
|
+
allAtomsToInsert.push({
|
|
126
|
+
id: atomId,
|
|
127
|
+
timestamp,
|
|
128
|
+
content: entity,
|
|
129
|
+
source_path: `${source}_entities`,
|
|
130
|
+
source_id: id,
|
|
131
|
+
sequence: 0,
|
|
132
|
+
type: 'atomic_entity',
|
|
133
|
+
hash: atomHash,
|
|
134
|
+
buckets: [...buckets, 'entities'],
|
|
135
|
+
tags: [entityTag, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))],
|
|
136
|
+
epochs: [],
|
|
137
|
+
provenance: 'internal',
|
|
138
|
+
simhash: "0",
|
|
139
|
+
embedding: ZERO_VECTOR_STR
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// Database Transaction
|
|
144
|
+
await db.run('BEGIN');
|
|
145
|
+
try {
|
|
146
|
+
// Bulk Insert Atoms
|
|
147
|
+
if (allAtomsToInsert.length > 0) {
|
|
148
|
+
// Deduplicate by ID
|
|
149
|
+
const uniqueAtomsMap = new Map();
|
|
150
|
+
for (const atom of allAtomsToInsert) {
|
|
151
|
+
if (!uniqueAtomsMap.has(atom.id)) {
|
|
152
|
+
uniqueAtomsMap.set(atom.id, atom);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
const uniqueAtoms = Array.from(uniqueAtomsMap.values());
|
|
156
|
+
const ATOM_BATCH_SIZE = 100; // Smaller batch size to be safe
|
|
157
|
+
for (let i = 0; i < uniqueAtoms.length; i += ATOM_BATCH_SIZE) {
|
|
158
|
+
const batch = uniqueAtoms.slice(i, i + ATOM_BATCH_SIZE);
|
|
159
|
+
const atomValues = [];
|
|
160
|
+
const atomPlaceholders = [];
|
|
161
|
+
let pIdx = 1;
|
|
162
|
+
for (const atom of batch) {
|
|
163
|
+
atomPlaceholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2}, $${pIdx + 3}, $${pIdx + 4}, $${pIdx + 5}, $${pIdx + 6}, $${pIdx + 7}, $${pIdx + 8}, $${pIdx + 9}, $${pIdx + 10}, $${pIdx + 11}, $${pIdx + 12}, $${pIdx + 13})`);
|
|
164
|
+
atomValues.push(atom.id, atom.timestamp, atom.content, atom.source_path, atom.source_id, atom.sequence, atom.type, atom.hash, atom.buckets, atom.tags, atom.epochs, atom.provenance, atom.simhash, atom.embedding);
|
|
165
|
+
pIdx += 14;
|
|
166
|
+
}
|
|
167
|
+
const atomQuery = `
|
|
168
|
+
INSERT INTO atoms (id, timestamp, content, source_path, source_id, sequence, type, hash, buckets, tags, epochs, provenance, simhash, embedding)
|
|
169
|
+
VALUES ${atomPlaceholders.join(', ')}
|
|
170
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
171
|
+
content = EXCLUDED.content,
|
|
172
|
+
timestamp = EXCLUDED.timestamp,
|
|
173
|
+
source_path = EXCLUDED.source_path,
|
|
174
|
+
source_id = EXCLUDED.source_id,
|
|
175
|
+
sequence = EXCLUDED.sequence,
|
|
176
|
+
type = EXCLUDED.type,
|
|
177
|
+
hash = EXCLUDED.hash,
|
|
178
|
+
buckets = EXCLUDED.buckets,
|
|
179
|
+
tags = EXCLUDED.tags,
|
|
180
|
+
epochs = EXCLUDED.epochs,
|
|
181
|
+
provenance = EXCLUDED.provenance,
|
|
182
|
+
simhash = EXCLUDED.simhash,
|
|
183
|
+
embedding = EXCLUDED.embedding
|
|
184
|
+
`;
|
|
185
|
+
await db.run(atomQuery, atomValues);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
// Bulk Insert Tags
|
|
189
|
+
const allTagEntries = [];
|
|
190
|
+
const tagEntrySet = new Set();
|
|
191
|
+
for (const atom of allAtomsToInsert) {
|
|
192
|
+
for (const bucket of atom.buckets) {
|
|
193
|
+
for (const tag of atom.tags) {
|
|
194
|
+
if (!tag || tag.length > 255)
|
|
195
|
+
continue;
|
|
196
|
+
const entryKey = `${atom.id}-${tag}-${bucket}`;
|
|
197
|
+
if (!tagEntrySet.has(entryKey)) {
|
|
198
|
+
tagEntrySet.add(entryKey);
|
|
199
|
+
allTagEntries.push({ atomId: atom.id, tag, bucket });
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
if (allTagEntries.length > 0) {
|
|
205
|
+
const TAG_BATCH_SIZE = 500;
|
|
206
|
+
for (let i = 0; i < allTagEntries.length; i += TAG_BATCH_SIZE) {
|
|
207
|
+
const batch = allTagEntries.slice(i, i + TAG_BATCH_SIZE);
|
|
208
|
+
const tagValues = [];
|
|
209
|
+
const tagPlaceholders = [];
|
|
210
|
+
let pIdx = 1;
|
|
211
|
+
for (const entry of batch) {
|
|
212
|
+
tagPlaceholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2})`);
|
|
213
|
+
tagValues.push(entry.atomId, entry.tag, entry.bucket);
|
|
214
|
+
pIdx += 3;
|
|
215
|
+
}
|
|
216
|
+
const tagQuery = `
|
|
217
|
+
INSERT INTO tags (atom_id, tag, bucket)
|
|
218
|
+
VALUES ${tagPlaceholders.join(', ')}
|
|
219
|
+
ON CONFLICT (atom_id, tag, bucket) DO NOTHING
|
|
220
|
+
`;
|
|
221
|
+
await db.run(tagQuery, tagValues);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
await db.run('COMMIT');
|
|
225
|
+
timer.logTotalAndReset(`Saved batch of ${molecules.length} molecules`);
|
|
226
|
+
return {
|
|
227
|
+
status: 'success',
|
|
228
|
+
message: `Saved ${molecules.length} molecules with ${molecules.reduce((sum, m) => sum + m.containedEntities.length, 0)} entities`
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
catch (error) {
|
|
232
|
+
console.error('[IngestionService] Database transaction error:', error);
|
|
233
|
+
await db.run('ROLLBACK');
|
|
234
|
+
throw error;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Split content into semantic molecules (text chunks)
|
|
239
|
+
* This replaces the old atomizer logic
|
|
240
|
+
*/
|
|
241
|
+
splitIntoMolecules(content) {
|
|
242
|
+
// Split by paragraphs or sentences, preserving semantic meaning
|
|
243
|
+
// This is a simplified version - could be enhanced with more sophisticated NLP
|
|
244
|
+
// First, try to split by paragraphs
|
|
245
|
+
const paragraphs = content.split(/\n\s*\n/).filter(p => p.trim().length > 0);
|
|
246
|
+
// If paragraphs are too long, split further by sentences
|
|
247
|
+
const chunks = [];
|
|
248
|
+
for (const paragraph of paragraphs) {
|
|
249
|
+
if (paragraph.length <= 500) { // Max length for a semantic molecule
|
|
250
|
+
chunks.push(paragraph.trim());
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
// Split long paragraphs into sentences
|
|
254
|
+
const sentences = this.splitIntoSentences(paragraph);
|
|
255
|
+
let currentChunk = '';
|
|
256
|
+
for (const sentence of sentences) {
|
|
257
|
+
if ((currentChunk + ' ' + sentence).length > 500) {
|
|
258
|
+
if (currentChunk) {
|
|
259
|
+
chunks.push(currentChunk.trim());
|
|
260
|
+
}
|
|
261
|
+
currentChunk = sentence;
|
|
262
|
+
}
|
|
263
|
+
else {
|
|
264
|
+
currentChunk += (currentChunk ? ' ' : '') + sentence;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
if (currentChunk) {
|
|
268
|
+
chunks.push(currentChunk.trim());
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
return chunks.filter(chunk => chunk.length > 10); // Filter out very short chunks
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Split text into sentences
|
|
276
|
+
*/
|
|
277
|
+
splitIntoSentences(text) {
|
|
278
|
+
// Simple sentence splitting - could be enhanced with NLP
|
|
279
|
+
return text
|
|
280
|
+
.split(/(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s+/g)
|
|
281
|
+
.map(s => s.trim())
|
|
282
|
+
.filter(s => s.length > 0);
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Process a single text chunk into a semantic molecule
|
|
286
|
+
*/
|
|
287
|
+
async processSingleChunk(content, source, timestamp = Date.now()) {
|
|
288
|
+
return await this.moleculeProcessor.processTextChunk(content, source, timestamp);
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Ingest large content by automatically chunking it into smaller pieces
|
|
292
|
+
* HEAVILY OPTIMIZED: Process all chunks in parallel with maximum concurrency and use single bulk database operation
|
|
293
|
+
*/
|
|
294
|
+
async ingestLargeContent(content, source, type = 'text', bucket = 'default', buckets = [], tags = []) {
|
|
295
|
+
const allBuckets = bucket ? [...buckets, bucket] : buckets;
|
|
296
|
+
const chunkSize = 100 * 1024; // Reduced to 100KB to prevent memory issues with PGlite while maintaining reasonable performance
|
|
297
|
+
const overlapSize = 1 * 1024; // Reduced overlap to 1KB to minimize redundancy
|
|
298
|
+
const chunks = [];
|
|
299
|
+
let start = 0;
|
|
300
|
+
while (start < content.length) {
|
|
301
|
+
let end = start + chunkSize;
|
|
302
|
+
// If we're near the end, just take the remainder
|
|
303
|
+
if (end >= content.length) {
|
|
304
|
+
end = content.length;
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
// Try to find a good break point (sentence or paragraph boundary)
|
|
308
|
+
let breakPoint = end;
|
|
309
|
+
const searchWindow = content.substring(end, Math.min(end + 5000, content.length));
|
|
310
|
+
// Look for a good break point
|
|
311
|
+
const paragraphBreak = searchWindow.lastIndexOf('\n\n');
|
|
312
|
+
const sentenceBreak = searchWindow.lastIndexOf('. ');
|
|
313
|
+
const newlineBreak = searchWindow.lastIndexOf('\n');
|
|
314
|
+
// Choose the closest appropriate break point
|
|
315
|
+
if (paragraphBreak !== -1) {
|
|
316
|
+
breakPoint = end + paragraphBreak + 2; // +2 for \n\n
|
|
317
|
+
}
|
|
318
|
+
else if (sentenceBreak !== -1) {
|
|
319
|
+
breakPoint = end + sentenceBreak + 2; // +2 for '. '
|
|
320
|
+
}
|
|
321
|
+
else if (newlineBreak !== -1) {
|
|
322
|
+
breakPoint = end + newlineBreak + 1; // +1 for '\n'
|
|
323
|
+
}
|
|
324
|
+
else {
|
|
325
|
+
// If no good break point found, just break at chunkSize
|
|
326
|
+
breakPoint = end;
|
|
327
|
+
}
|
|
328
|
+
// Ensure we don't go beyond the content length
|
|
329
|
+
breakPoint = Math.min(breakPoint, content.length);
|
|
330
|
+
// If the break point is too close to start, just break at chunkSize
|
|
331
|
+
if (breakPoint - start < chunkSize * 0.5) {
|
|
332
|
+
breakPoint = Math.min(start + chunkSize, content.length);
|
|
333
|
+
}
|
|
334
|
+
end = breakPoint;
|
|
335
|
+
}
|
|
336
|
+
// Add overlap from previous chunk if not the first chunk
|
|
337
|
+
const overlapStart = start > 0 ? Math.max(0, start - overlapSize) : start;
|
|
338
|
+
const chunk = content.substring(overlapStart, end);
|
|
339
|
+
chunks.push(chunk);
|
|
340
|
+
start = end;
|
|
341
|
+
}
|
|
342
|
+
console.log(`[IngestionService] Split large content (${content.length} chars) into ${chunks.length} chunks of ~${Math.round(chunkSize / 1024)}KB each`);
|
|
343
|
+
// STREAMING BATCH IMPLEMENTATION
|
|
344
|
+
// We process chunks in groups (Strides) to avoid OOM and CPU starvation
|
|
345
|
+
// Since NLP is CPU-bound, parallel processing of batches doesn't help throughput and only hurts RAM/GC.
|
|
346
|
+
// Process 1 chunk (100KB) at a time to ensure maximum stability and lowest memory footprint.
|
|
347
|
+
const BATCH_SIZE = 1; // Reduced from 50 to 1 for serial processing of large chunks
|
|
348
|
+
let totalMolecules = 0;
|
|
349
|
+
let totalEntities = 0;
|
|
350
|
+
console.log(`[IngestionService] Split large content (${content.length} chars) into ${chunks.length} chunks. Processing in batches of ${BATCH_SIZE}...`);
|
|
351
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
352
|
+
const batchChunks = chunks.slice(i, i + BATCH_SIZE);
|
|
353
|
+
console.log(`[IngestionService] Processing batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(chunks.length / BATCH_SIZE)} (${batchChunks.length} chunks)...`);
|
|
354
|
+
// 1. Process text chunks into molecules (Parallel within the batch)
|
|
355
|
+
const batchPromptResults = await Promise.all(batchChunks.map(async (chunk, batchIndex) => {
|
|
356
|
+
const globalIndex = i + batchIndex;
|
|
357
|
+
const chunkSource = `${source}_chunk_${globalIndex + 1}_of_${chunks.length}`;
|
|
358
|
+
const textChunks = this.splitIntoMolecules(chunk);
|
|
359
|
+
const chunksWithMetadata = textChunks.map((textChunk, idx) => ({
|
|
360
|
+
content: textChunk,
|
|
361
|
+
source: `${chunkSource}_molecule_${idx}`,
|
|
362
|
+
timestamp: Date.now() + globalIndex * 1000 + idx,
|
|
363
|
+
provenance: 'external'
|
|
364
|
+
}));
|
|
365
|
+
return await this.moleculeProcessor.processTextChunks(chunksWithMetadata);
|
|
366
|
+
}));
|
|
367
|
+
// Flatten the batch results
|
|
368
|
+
const batchMolecules = batchPromptResults.flat();
|
|
369
|
+
if (batchMolecules.length > 0) {
|
|
370
|
+
// 2. Save this batch immediately to releasing memory
|
|
371
|
+
await this.saveMoleculesBatched([batchMolecules], source, type, allBuckets, tags);
|
|
372
|
+
totalMolecules += batchMolecules.length;
|
|
373
|
+
totalEntities += batchMolecules.reduce((sum, m) => sum + m.containedEntities.length, 0);
|
|
374
|
+
// Optional: Hint at GC (not available in standard JS, but ensuring scope clear helps)
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
return {
|
|
378
|
+
status: 'success',
|
|
379
|
+
id: `multi_chunk_${Date.now()}`,
|
|
380
|
+
message: `Processed large content in ${chunks.length} chunks (streaming), ingested ${totalMolecules} semantic molecules with ${totalEntities} atomic entities`
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Internal method to ingest a single chunk without length validation
|
|
385
|
+
* Optimized for Big O performance using Batched Transactions
|
|
386
|
+
*/
|
|
387
|
+
async ingestSingleChunk(content, source, type = 'text', bucket = 'default', buckets = [], tags = []) {
|
|
388
|
+
// This method bypasses the length validation to avoid recursion
|
|
389
|
+
try {
|
|
390
|
+
// Handle legacy single-bucket param
|
|
391
|
+
const allBuckets = bucket ? [...buckets, bucket] : buckets;
|
|
392
|
+
// Split content into text chunks (molecules)
|
|
393
|
+
const textChunks = this.splitIntoMolecules(content);
|
|
394
|
+
// Process each chunk into semantic molecules - OPTIMIZED FOR PARALLEL PROCESSING
|
|
395
|
+
const chunksWithMetadata = textChunks.map((chunk, index) => ({
|
|
396
|
+
content: chunk,
|
|
397
|
+
source: `${source}_chunk_${index}`,
|
|
398
|
+
timestamp: Date.now() + index, // Slightly offset timestamps
|
|
399
|
+
provenance: 'external'
|
|
400
|
+
}));
|
|
401
|
+
// Process chunks in parallel to reduce serial processing time
|
|
402
|
+
const semanticMolecules = await Promise.all(chunksWithMetadata.map(chunk => this.moleculeProcessor.processTextChunk(chunk.content, chunk.source, chunk.timestamp, chunk.provenance)));
|
|
403
|
+
// Batched Ingestion Logic
|
|
404
|
+
// Use Map for deduplication (Fixes "ON CONFLICT... cannot affect row a second time")
|
|
405
|
+
const atomsToInsert = new Map();
|
|
406
|
+
const tagsToInsert = [];
|
|
407
|
+
const edgesToInsert = []; // For variant relationships
|
|
408
|
+
// Optimize: Reuse zero vector string to save RAM
|
|
409
|
+
const ZERO_VECTOR_STR = JSON.stringify(new Array(768).fill(0.1));
|
|
410
|
+
for (const molecule of semanticMolecules) {
|
|
411
|
+
const id = `mol_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`;
|
|
412
|
+
const timestamp = molecule.timestamp;
|
|
413
|
+
const hash = crypto.createHash('sha256').update(molecule.content).digest('hex');
|
|
414
|
+
// Prepare Payload (always happens regardless of vector processing)
|
|
415
|
+
const atomType = type || 'semantic_molecule';
|
|
416
|
+
const embeddingStr = ZERO_VECTOR_STR; // Use pre-computed zero vector string
|
|
417
|
+
atomsToInsert.set(id, {
|
|
418
|
+
id,
|
|
419
|
+
timestamp,
|
|
420
|
+
content: molecule.content,
|
|
421
|
+
source_path: source,
|
|
422
|
+
source_id: source,
|
|
423
|
+
sequence: 0,
|
|
424
|
+
type: atomType,
|
|
425
|
+
hash,
|
|
426
|
+
buckets: allBuckets,
|
|
427
|
+
tags: [...tags, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))],
|
|
428
|
+
epochs: [],
|
|
429
|
+
provenance: molecule.provenance,
|
|
430
|
+
simhash: "0",
|
|
431
|
+
embedding: embeddingStr,
|
|
432
|
+
vector_id: null // No vector ID when not using vectors
|
|
433
|
+
});
|
|
434
|
+
// Prepare Tags for Molecule
|
|
435
|
+
tagsToInsert.push({
|
|
436
|
+
atomId: id,
|
|
437
|
+
tags: [...tags, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))],
|
|
438
|
+
buckets: allBuckets
|
|
439
|
+
});
|
|
440
|
+
// Also store the atomic entities separately if needed
|
|
441
|
+
for (const entity of molecule.containedEntities) {
|
|
442
|
+
// Fix for index size limit: Hash the entity for the ID
|
|
443
|
+
const entityHash = crypto.createHash('sha256').update(entity).digest('hex').substring(0, 16);
|
|
444
|
+
const atomId = `atom_${id}_${entityHash}`;
|
|
445
|
+
const atomHash = crypto.createHash('sha256').update(entity).digest('hex');
|
|
446
|
+
// Truncate entity tag
|
|
447
|
+
const entityTagRaw = `entity:${entity.toLowerCase()}`;
|
|
448
|
+
const entityTag = entityTagRaw.length > 255 ? entityTagRaw.substring(0, 255) : entityTagRaw;
|
|
449
|
+
const entityTags = [entityTag, ...molecule.semanticTags.map((tag) => tag.replace('#', ''))];
|
|
450
|
+
const entityBuckets = [...allBuckets, 'entities'];
|
|
451
|
+
// Prepare Payload for Entity
|
|
452
|
+
// DEDUP CHECK: If this entity already exists in the map (from another sentence), ignore duplicate push
|
|
453
|
+
if (!atomsToInsert.has(atomId)) {
|
|
454
|
+
atomsToInsert.set(atomId, {
|
|
455
|
+
id: atomId,
|
|
456
|
+
timestamp,
|
|
457
|
+
content: entity,
|
|
458
|
+
source_path: `${source}_entities`,
|
|
459
|
+
source_id: id,
|
|
460
|
+
sequence: 0,
|
|
461
|
+
type: 'atomic_entity',
|
|
462
|
+
hash: atomHash,
|
|
463
|
+
buckets: entityBuckets,
|
|
464
|
+
tags: entityTags,
|
|
465
|
+
epochs: [],
|
|
466
|
+
provenance: 'internal',
|
|
467
|
+
simhash: "0",
|
|
468
|
+
embedding: ZERO_VECTOR_STR, // Use shared zero vector string
|
|
469
|
+
vector_id: null
|
|
470
|
+
});
|
|
471
|
+
// Prepare Tags for Entity
|
|
472
|
+
tagsToInsert.push({
|
|
473
|
+
atomId: atomId,
|
|
474
|
+
tags: entityTags,
|
|
475
|
+
buckets: entityBuckets
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
// Execute Batch Transaction
|
|
481
|
+
if (atomsToInsert.size > 0) {
|
|
482
|
+
await db.run('BEGIN');
|
|
483
|
+
try {
|
|
484
|
+
// 1. Bulk Insert Atoms (Optimized batch size)
|
|
485
|
+
const atomList = Array.from(atomsToInsert.values());
|
|
486
|
+
const ATOM_BATCH_SIZE = 500; // Increased batch size for better performance
|
|
487
|
+
for (let i = 0; i < atomList.length; i += ATOM_BATCH_SIZE) {
|
|
488
|
+
const batch = atomList.slice(i, i + ATOM_BATCH_SIZE);
|
|
489
|
+
const atomValues = [];
|
|
490
|
+
const atomPlaceholders = [];
|
|
491
|
+
let pIdx = 1;
|
|
492
|
+
for (const atom of batch) {
|
|
493
|
+
atomPlaceholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2}, $${pIdx + 3}, $${pIdx + 4}, $${pIdx + 5}, $${pIdx + 6}, $${pIdx + 7}, $${pIdx + 8}, $${pIdx + 9}, $${pIdx + 10}, $${pIdx + 11}, $${pIdx + 12}, $${pIdx + 13})`);
|
|
494
|
+
atomValues.push(atom.id, atom.timestamp, atom.content, atom.source_path, atom.source_id, atom.sequence, atom.type, atom.hash, atom.buckets, atom.tags, atom.epochs, atom.provenance, atom.simhash, atom.embedding);
|
|
495
|
+
pIdx += 14;
|
|
496
|
+
}
|
|
497
|
+
const atomQuery = `
|
|
498
|
+
INSERT INTO atoms (id, timestamp, content, source_path, source_id, sequence, type, hash, buckets, tags, epochs, provenance, simhash, embedding)
|
|
499
|
+
VALUES ${atomPlaceholders.join(', ')}
|
|
500
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
501
|
+
content = EXCLUDED.content,
|
|
502
|
+
timestamp = EXCLUDED.timestamp,
|
|
503
|
+
source_path = EXCLUDED.source_path,
|
|
504
|
+
source_id = EXCLUDED.source_id,
|
|
505
|
+
sequence = EXCLUDED.sequence,
|
|
506
|
+
type = EXCLUDED.type,
|
|
507
|
+
hash = EXCLUDED.hash,
|
|
508
|
+
buckets = EXCLUDED.buckets,
|
|
509
|
+
tags = EXCLUDED.tags,
|
|
510
|
+
epochs = EXCLUDED.epochs,
|
|
511
|
+
provenance = EXCLUDED.provenance,
|
|
512
|
+
simhash = EXCLUDED.simhash,
|
|
513
|
+
embedding = EXCLUDED.embedding
|
|
514
|
+
`;
|
|
515
|
+
await db.run(atomQuery, atomValues);
|
|
516
|
+
}
|
|
517
|
+
// 2. Bulk Insert Tags (Optimized batch size)
|
|
518
|
+
const allTagEntries = [];
|
|
519
|
+
for (const item of tagsToInsert) {
|
|
520
|
+
for (const bucket of item.buckets) {
|
|
521
|
+
for (const tag of item.tags) {
|
|
522
|
+
if (!tag || tag.length > 255)
|
|
523
|
+
continue;
|
|
524
|
+
allTagEntries.push({ atomId: item.atomId, tag, bucket });
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
const TAG_BATCH_SIZE = 1000; // Increased batch size for better performance
|
|
529
|
+
for (let i = 0; i < allTagEntries.length; i += TAG_BATCH_SIZE) {
|
|
530
|
+
const batch = allTagEntries.slice(i, i + TAG_BATCH_SIZE);
|
|
531
|
+
const batchValues = [];
|
|
532
|
+
const placeholders = [];
|
|
533
|
+
let pIdx = 1;
|
|
534
|
+
for (const entry of batch) {
|
|
535
|
+
placeholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2})`);
|
|
536
|
+
batchValues.push(entry.atomId, entry.tag, entry.bucket);
|
|
537
|
+
pIdx += 3;
|
|
538
|
+
}
|
|
539
|
+
if (batchValues.length > 0) {
|
|
540
|
+
const tagQuery = `
|
|
541
|
+
INSERT INTO tags (atom_id, tag, bucket)
|
|
542
|
+
VALUES ${placeholders.join(', ')}
|
|
543
|
+
ON CONFLICT (atom_id, tag, bucket) DO NOTHING
|
|
544
|
+
`;
|
|
545
|
+
await db.run(tagQuery, batchValues);
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
// 3. Bulk Insert Edges (Sub-batched)
|
|
549
|
+
if (edgesToInsert.length > 0) {
|
|
550
|
+
const EDGE_BATCH_SIZE = 100; // Increased batch size for better performance
|
|
551
|
+
for (let i = 0; i < edgesToInsert.length; i += EDGE_BATCH_SIZE) {
|
|
552
|
+
const batch = edgesToInsert.slice(i, i + EDGE_BATCH_SIZE);
|
|
553
|
+
const batchValues = [];
|
|
554
|
+
const placeholders = [];
|
|
555
|
+
let pIdx = 1;
|
|
556
|
+
for (const edge of batch) {
|
|
557
|
+
placeholders.push(`($${pIdx}, $${pIdx + 1}, $${pIdx + 2}, $${pIdx + 3})`);
|
|
558
|
+
batchValues.push(edge.source, edge.target, edge.relation, edge.weight);
|
|
559
|
+
pIdx += 4;
|
|
560
|
+
}
|
|
561
|
+
const edgeQuery = `
|
|
562
|
+
INSERT INTO edges (source_id, target_id, relation, weight)
|
|
563
|
+
VALUES ${placeholders.join(', ')}
|
|
564
|
+
ON CONFLICT (source_id, target_id, relation) DO NOTHING
|
|
565
|
+
`;
|
|
566
|
+
await db.run(edgeQuery, batchValues);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
await db.run('COMMIT');
|
|
570
|
+
}
|
|
571
|
+
catch (error) {
|
|
572
|
+
await db.run('ROLLBACK');
|
|
573
|
+
throw error;
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
return {
|
|
577
|
+
status: 'success',
|
|
578
|
+
id: semanticMolecules[0]?.id || 'unknown',
|
|
579
|
+
message: `Ingested ${semanticMolecules.length} semantic molecules with ${semanticMolecules.reduce((sum, mol) => sum + mol.containedEntities.length, 0)} atomic entities`
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
catch (e) {
|
|
583
|
+
console.error('[SemanticIngestionService] Single Chunk Ingest Error:', e);
|
|
584
|
+
return { status: 'error', id: 'unknown', message: e.message };
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
/**
|
|
588
|
+
* Index tags in the separate tags table for efficient retrieval/filtering
|
|
589
|
+
*/
|
|
590
|
+
async indexTags(atomId, tags, buckets) {
|
|
591
|
+
if (!tags.length || !buckets.length)
|
|
592
|
+
return;
|
|
593
|
+
// Use a simple Set to deduplicate quickly
|
|
594
|
+
const uniqueEntries = new Set();
|
|
595
|
+
const values = [];
|
|
596
|
+
const placeholders = [];
|
|
597
|
+
let i = 1;
|
|
598
|
+
for (const bucket of buckets) {
|
|
599
|
+
for (const tag of tags) {
|
|
600
|
+
if (!tag)
|
|
601
|
+
continue;
|
|
602
|
+
if (tag.length > 255)
|
|
603
|
+
continue; // Skip tags that are too long for the index
|
|
604
|
+
const key = `${atomId}:${tag}:${bucket}`;
|
|
605
|
+
if (uniqueEntries.has(key))
|
|
606
|
+
continue;
|
|
607
|
+
uniqueEntries.add(key);
|
|
608
|
+
placeholders.push(`($${i}, $${i + 1}, $${i + 2})`);
|
|
609
|
+
values.push(atomId, tag, bucket);
|
|
610
|
+
i += 3;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
if (values.length === 0)
|
|
614
|
+
return;
|
|
615
|
+
try {
|
|
616
|
+
await db.run(`INSERT INTO tags (atom_id, tag, bucket) VALUES ${placeholders.join(', ')}
|
|
617
|
+
ON CONFLICT (atom_id, tag, bucket) DO NOTHING`, values);
|
|
618
|
+
}
|
|
619
|
+
catch (e) {
|
|
620
|
+
// Warn but don't fail ingestion
|
|
621
|
+
console.warn(`[SemanticIngestionService] Failed to index tags`, e);
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
//# sourceMappingURL=semantic-ingestion-service.js.map
|