@rbalchii/anchor-engine 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +609 -0
- package/README.md +317 -0
- package/anchor.bat +5 -0
- package/docs/API.md +314 -0
- package/docs/DEPLOYMENT.md +448 -0
- package/docs/INDEX.md +226 -0
- package/docs/STAR_Whitepaper_Executive.md +216 -0
- package/docs/TROUBLESHOOTING.md +535 -0
- package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
- package/docs/archive/adoption-guide.md +264 -0
- package/docs/archive/adoption-preparation.md +179 -0
- package/docs/archive/agent-harness-integration.md +227 -0
- package/docs/archive/api-reference.md +106 -0
- package/docs/archive/api_flows_diagram.md +118 -0
- package/docs/archive/architecture.md +410 -0
- package/docs/archive/architecture_diagram.md +174 -0
- package/docs/archive/broader-adoption-preparation.md +175 -0
- package/docs/archive/browser-paradigm-architecture.md +163 -0
- package/docs/archive/chat-integration.md +124 -0
- package/docs/archive/community-adoption-materials.md +103 -0
- package/docs/archive/community-adoption.md +147 -0
- package/docs/archive/comparison-with-siloed-solutions.md +192 -0
- package/docs/archive/comprehensive-docs.md +156 -0
- package/docs/archive/data_flow_diagram.md +251 -0
- package/docs/archive/enhancement-implementation-summary.md +146 -0
- package/docs/archive/evolution-summary.md +141 -0
- package/docs/archive/ingestion_pipeline_diagram.md +198 -0
- package/docs/archive/native-module-profiling-results.md +135 -0
- package/docs/archive/positioning-document.md +158 -0
- package/docs/archive/positioning.md +175 -0
- package/docs/archive/query-builder-documentation.md +218 -0
- package/docs/archive/quick-reference.md +40 -0
- package/docs/archive/quickstart.md +63 -0
- package/docs/archive/relationship-narrative-discovery.md +141 -0
- package/docs/archive/search-logic-improvement-plan.md +336 -0
- package/docs/archive/search_architecture_diagram.md +212 -0
- package/docs/archive/semantic-architecture-guide.md +97 -0
- package/docs/archive/sequence-diagrams.md +128 -0
- package/docs/archive/system_components_diagram.md +296 -0
- package/docs/archive/test-framework-integration.md +109 -0
- package/docs/archive/testing-framework-documentation.md +397 -0
- package/docs/archive/testing-framework-summary.md +121 -0
- package/docs/archive/testing-framework.md +377 -0
- package/docs/archive/ui-architecture.md +75 -0
- package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
- package/docs/arxiv/RELATED_WORK.tex +39 -0
- package/docs/arxiv/compile.bat +48 -0
- package/docs/arxiv/joss_response.md +33 -0
- package/docs/arxiv/prepare-submission.bat +46 -0
- package/docs/arxiv/review.md +128 -0
- package/docs/arxiv/star-whitepaper.tex +657 -0
- package/docs/code-patterns.md +289 -0
- package/docs/whitepaper.md +445 -0
- package/engine/dist/agent/runtime.d.ts +41 -0
- package/engine/dist/agent/runtime.d.ts.map +1 -0
- package/engine/dist/agent/runtime.js +73 -0
- package/engine/dist/agent/runtime.js.map +1 -0
- package/engine/dist/commands/audit-tags.d.ts +14 -0
- package/engine/dist/commands/audit-tags.d.ts.map +1 -0
- package/engine/dist/commands/audit-tags.js +180 -0
- package/engine/dist/commands/audit-tags.js.map +1 -0
- package/engine/dist/commands/distill.d.ts +19 -0
- package/engine/dist/commands/distill.d.ts.map +1 -0
- package/engine/dist/commands/distill.js +114 -0
- package/engine/dist/commands/distill.js.map +1 -0
- package/engine/dist/commands/generate-synonyms.d.ts +14 -0
- package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
- package/engine/dist/commands/generate-synonyms.js +91 -0
- package/engine/dist/commands/generate-synonyms.js.map +1 -0
- package/engine/dist/config/index.d.ts +115 -0
- package/engine/dist/config/index.d.ts.map +1 -0
- package/engine/dist/config/index.js +326 -0
- package/engine/dist/config/index.js.map +1 -0
- package/engine/dist/config/max-recall-config.d.ts +102 -0
- package/engine/dist/config/max-recall-config.d.ts.map +1 -0
- package/engine/dist/config/max-recall-config.js +102 -0
- package/engine/dist/config/max-recall-config.js.map +1 -0
- package/engine/dist/config/paths.d.ts +40 -0
- package/engine/dist/config/paths.d.ts.map +1 -0
- package/engine/dist/config/paths.js +49 -0
- package/engine/dist/config/paths.js.map +1 -0
- package/engine/dist/core/batch.d.ts +19 -0
- package/engine/dist/core/batch.d.ts.map +1 -0
- package/engine/dist/core/batch.js +37 -0
- package/engine/dist/core/batch.js.map +1 -0
- package/engine/dist/core/db.d.ts +58 -0
- package/engine/dist/core/db.d.ts.map +1 -0
- package/engine/dist/core/db.js +563 -0
- package/engine/dist/core/db.js.map +1 -0
- package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
- package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/ChatWorker.js +28 -0
- package/engine/dist/core/inference/ChatWorker.js.map +1 -0
- package/engine/dist/core/inference/context_manager.d.ts +49 -0
- package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
- package/engine/dist/core/inference/context_manager.js +199 -0
- package/engine/dist/core/inference/context_manager.js.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
- package/engine/dist/core/vector.d.ts +40 -0
- package/engine/dist/core/vector.d.ts.map +1 -0
- package/engine/dist/core/vector.js +167 -0
- package/engine/dist/core/vector.js.map +1 -0
- package/engine/dist/index.d.ts +4 -0
- package/engine/dist/index.d.ts.map +1 -0
- package/engine/dist/index.js +400 -0
- package/engine/dist/index.js.map +1 -0
- package/engine/dist/middleware/auth.d.ts +14 -0
- package/engine/dist/middleware/auth.d.ts.map +1 -0
- package/engine/dist/middleware/auth.js +44 -0
- package/engine/dist/middleware/auth.js.map +1 -0
- package/engine/dist/middleware/request-tracing.d.ts +29 -0
- package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
- package/engine/dist/middleware/request-tracing.js +115 -0
- package/engine/dist/middleware/request-tracing.js.map +1 -0
- package/engine/dist/middleware/validate.d.ts +30 -0
- package/engine/dist/middleware/validate.d.ts.map +1 -0
- package/engine/dist/middleware/validate.js +117 -0
- package/engine/dist/middleware/validate.js.map +1 -0
- package/engine/dist/native/index.d.ts +106 -0
- package/engine/dist/native/index.d.ts.map +1 -0
- package/engine/dist/native/index.js +230 -0
- package/engine/dist/native/index.js.map +1 -0
- package/engine/dist/native/types.d.ts +45 -0
- package/engine/dist/native/types.d.ts.map +1 -0
- package/engine/dist/native/types.js +6 -0
- package/engine/dist/native/types.js.map +1 -0
- package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
- package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/atomization-profiling.js +108 -0
- package/engine/dist/profiling/atomization-profiling.js.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.js +249 -0
- package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
- package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
- package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
- package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/simhash-profiling.js +168 -0
- package/engine/dist/profiling/simhash-profiling.js.map +1 -0
- package/engine/dist/routes/api.d.ts +9 -0
- package/engine/dist/routes/api.d.ts.map +1 -0
- package/engine/dist/routes/api.js +37 -0
- package/engine/dist/routes/api.js.map +1 -0
- package/engine/dist/routes/enhanced-api.d.ts +9 -0
- package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
- package/engine/dist/routes/enhanced-api.js +139 -0
- package/engine/dist/routes/enhanced-api.js.map +1 -0
- package/engine/dist/routes/health.d.ts +8 -0
- package/engine/dist/routes/health.d.ts.map +1 -0
- package/engine/dist/routes/health.js +89 -0
- package/engine/dist/routes/health.js.map +1 -0
- package/engine/dist/routes/monitoring.d.ts +8 -0
- package/engine/dist/routes/monitoring.d.ts.map +1 -0
- package/engine/dist/routes/monitoring.js +509 -0
- package/engine/dist/routes/monitoring.js.map +1 -0
- package/engine/dist/routes/v1/admin.d.ts +3 -0
- package/engine/dist/routes/v1/admin.d.ts.map +1 -0
- package/engine/dist/routes/v1/admin.js +261 -0
- package/engine/dist/routes/v1/admin.js.map +1 -0
- package/engine/dist/routes/v1/atoms.d.ts +3 -0
- package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
- package/engine/dist/routes/v1/atoms.js +172 -0
- package/engine/dist/routes/v1/atoms.js.map +1 -0
- package/engine/dist/routes/v1/backup.d.ts +3 -0
- package/engine/dist/routes/v1/backup.d.ts.map +1 -0
- package/engine/dist/routes/v1/backup.js +100 -0
- package/engine/dist/routes/v1/backup.js.map +1 -0
- package/engine/dist/routes/v1/git.d.ts +3 -0
- package/engine/dist/routes/v1/git.d.ts.map +1 -0
- package/engine/dist/routes/v1/git.js +316 -0
- package/engine/dist/routes/v1/git.js.map +1 -0
- package/engine/dist/routes/v1/ingest.d.ts +3 -0
- package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
- package/engine/dist/routes/v1/ingest.js +66 -0
- package/engine/dist/routes/v1/ingest.js.map +1 -0
- package/engine/dist/routes/v1/memory.d.ts +14 -0
- package/engine/dist/routes/v1/memory.d.ts.map +1 -0
- package/engine/dist/routes/v1/memory.js +87 -0
- package/engine/dist/routes/v1/memory.js.map +1 -0
- package/engine/dist/routes/v1/research.d.ts +3 -0
- package/engine/dist/routes/v1/research.d.ts.map +1 -0
- package/engine/dist/routes/v1/research.js +109 -0
- package/engine/dist/routes/v1/research.js.map +1 -0
- package/engine/dist/routes/v1/search.d.ts +3 -0
- package/engine/dist/routes/v1/search.d.ts.map +1 -0
- package/engine/dist/routes/v1/search.js +180 -0
- package/engine/dist/routes/v1/search.js.map +1 -0
- package/engine/dist/routes/v1/settings.d.ts +8 -0
- package/engine/dist/routes/v1/settings.d.ts.map +1 -0
- package/engine/dist/routes/v1/settings.js +211 -0
- package/engine/dist/routes/v1/settings.js.map +1 -0
- package/engine/dist/routes/v1/system.d.ts +3 -0
- package/engine/dist/routes/v1/system.d.ts.map +1 -0
- package/engine/dist/routes/v1/system.js +326 -0
- package/engine/dist/routes/v1/system.js.map +1 -0
- package/engine/dist/routes/v1/tags.d.ts +3 -0
- package/engine/dist/routes/v1/tags.d.ts.map +1 -0
- package/engine/dist/routes/v1/tags.js +102 -0
- package/engine/dist/routes/v1/tags.js.map +1 -0
- package/engine/dist/server-8080.d.ts +2 -0
- package/engine/dist/server-8080.d.ts.map +1 -0
- package/engine/dist/server-8080.js +74 -0
- package/engine/dist/server-8080.js.map +1 -0
- package/engine/dist/services/backup/backup-restore.d.ts +37 -0
- package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
- package/engine/dist/services/backup/backup-restore.js +385 -0
- package/engine/dist/services/backup/backup-restore.js.map +1 -0
- package/engine/dist/services/backup/backup.d.ts +14 -0
- package/engine/dist/services/backup/backup.d.ts.map +1 -0
- package/engine/dist/services/backup/backup.js +442 -0
- package/engine/dist/services/backup/backup.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.js +394 -0
- package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
- package/engine/dist/services/health-check-enhanced.d.ts +89 -0
- package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
- package/engine/dist/services/health-check-enhanced.js +417 -0
- package/engine/dist/services/health-check-enhanced.js.map +1 -0
- package/engine/dist/services/idle-manager.d.ts +56 -0
- package/engine/dist/services/idle-manager.d.ts.map +1 -0
- package/engine/dist/services/idle-manager.js +210 -0
- package/engine/dist/services/idle-manager.js.map +1 -0
- package/engine/dist/services/inference/inference-service.d.ts +27 -0
- package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
- package/engine/dist/services/inference/inference-service.js +89 -0
- package/engine/dist/services/inference/inference-service.js.map +1 -0
- package/engine/dist/services/inference/inference.d.ts +59 -0
- package/engine/dist/services/inference/inference.d.ts.map +1 -0
- package/engine/dist/services/inference/inference.js +131 -0
- package/engine/dist/services/inference/inference.js.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.js +982 -0
- package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.js +166 -0
- package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.js +537 -0
- package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.js +437 -0
- package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
- package/engine/dist/services/ingest/ingest.d.ts +50 -0
- package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest.js +230 -0
- package/engine/dist/services/ingest/ingest.js.map +1 -0
- package/engine/dist/services/ingest/watchdog.d.ts +31 -0
- package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
- package/engine/dist/services/ingest/watchdog.js +400 -0
- package/engine/dist/services/ingest/watchdog.js.map +1 -0
- package/engine/dist/services/llm/context.d.ts +6 -0
- package/engine/dist/services/llm/context.d.ts.map +1 -0
- package/engine/dist/services/llm/context.js +80 -0
- package/engine/dist/services/llm/context.js.map +1 -0
- package/engine/dist/services/llm/provider.d.ts +23 -0
- package/engine/dist/services/llm/provider.d.ts.map +1 -0
- package/engine/dist/services/llm/provider.js +338 -0
- package/engine/dist/services/llm/provider.js.map +1 -0
- package/engine/dist/services/llm/reader.d.ts +12 -0
- package/engine/dist/services/llm/reader.d.ts.map +1 -0
- package/engine/dist/services/llm/reader.js +40 -0
- package/engine/dist/services/llm/reader.js.map +1 -0
- package/engine/dist/services/mirror/mirror.d.ts +28 -0
- package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
- package/engine/dist/services/mirror/mirror.js +208 -0
- package/engine/dist/services/mirror/mirror.js.map +1 -0
- package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
- package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
- package/engine/dist/services/nlp/nlp-service.js +151 -0
- package/engine/dist/services/nlp/nlp-service.js.map +1 -0
- package/engine/dist/services/nlp/query-parser.d.ts +9 -0
- package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
- package/engine/dist/services/nlp/query-parser.js +29 -0
- package/engine/dist/services/nlp/query-parser.js.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.js +263 -0
- package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
- package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
- package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
- package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
- package/engine/dist/services/query-builder/utils/export.js +130 -0
- package/engine/dist/services/query-builder/utils/export.js.map +1 -0
- package/engine/dist/services/research/researcher.d.ts +15 -0
- package/engine/dist/services/research/researcher.d.ts.map +1 -0
- package/engine/dist/services/research/researcher.js +123 -0
- package/engine/dist/services/research/researcher.js.map +1 -0
- package/engine/dist/services/scribe/scribe.d.ts +43 -0
- package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
- package/engine/dist/services/scribe/scribe.js +135 -0
- package/engine/dist/services/scribe/scribe.js.map +1 -0
- package/engine/dist/services/search/bright-nodes.d.ts +41 -0
- package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
- package/engine/dist/services/search/bright-nodes.js +117 -0
- package/engine/dist/services/search/bright-nodes.js.map +1 -0
- package/engine/dist/services/search/context-inflator.d.ts +63 -0
- package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
- package/engine/dist/services/search/context-inflator.js +649 -0
- package/engine/dist/services/search/context-inflator.js.map +1 -0
- package/engine/dist/services/search/context-manager.d.ts +34 -0
- package/engine/dist/services/search/context-manager.d.ts.map +1 -0
- package/engine/dist/services/search/context-manager.js +124 -0
- package/engine/dist/services/search/context-manager.js.map +1 -0
- package/engine/dist/services/search/distributed-query.d.ts +38 -0
- package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
- package/engine/dist/services/search/distributed-query.js +105 -0
- package/engine/dist/services/search/distributed-query.js.map +1 -0
- package/engine/dist/services/search/explore.d.ts +73 -0
- package/engine/dist/services/search/explore.d.ts.map +1 -0
- package/engine/dist/services/search/explore.js +388 -0
- package/engine/dist/services/search/explore.js.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.js +435 -0
- package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.js +394 -0
- package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.js +611 -0
- package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
- package/engine/dist/services/search/query-parser.d.ts +66 -0
- package/engine/dist/services/search/query-parser.d.ts.map +1 -0
- package/engine/dist/services/search/query-parser.js +346 -0
- package/engine/dist/services/search/query-parser.js.map +1 -0
- package/engine/dist/services/search/search-utils.d.ts +100 -0
- package/engine/dist/services/search/search-utils.d.ts.map +1 -0
- package/engine/dist/services/search/search-utils.js +473 -0
- package/engine/dist/services/search/search-utils.js.map +1 -0
- package/engine/dist/services/search/search.d.ts +116 -0
- package/engine/dist/services/search/search.d.ts.map +1 -0
- package/engine/dist/services/search/search.js +1286 -0
- package/engine/dist/services/search/search.js.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
- package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
- package/engine/dist/services/search/streaming-search.d.ts +51 -0
- package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
- package/engine/dist/services/search/streaming-search.js +94 -0
- package/engine/dist/services/search/streaming-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
- package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
- package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-search.js +649 -0
- package/engine/dist/services/semantic/semantic-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
- package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
- package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
- package/engine/dist/services/semantic/types/semantic.js +7 -0
- package/engine/dist/services/semantic/types/semantic.js.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
- package/engine/dist/services/system-status.d.ts +68 -0
- package/engine/dist/services/system-status.d.ts.map +1 -0
- package/engine/dist/services/system-status.js +107 -0
- package/engine/dist/services/system-status.js.map +1 -0
- package/engine/dist/services/tags/discovery.d.ts +16 -0
- package/engine/dist/services/tags/discovery.d.ts.map +1 -0
- package/engine/dist/services/tags/discovery.js +206 -0
- package/engine/dist/services/tags/discovery.js.map +1 -0
- package/engine/dist/services/tags/gliner.d.ts +18 -0
- package/engine/dist/services/tags/gliner.d.ts.map +1 -0
- package/engine/dist/services/tags/gliner.js +119 -0
- package/engine/dist/services/tags/gliner.js.map +1 -0
- package/engine/dist/services/tags/infector.d.ts +21 -0
- package/engine/dist/services/tags/infector.d.ts.map +1 -0
- package/engine/dist/services/tags/infector.js +168 -0
- package/engine/dist/services/tags/infector.js.map +1 -0
- package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
- package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
- package/engine/dist/services/tags/tag-auditor.js +283 -0
- package/engine/dist/services/tags/tag-auditor.js.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
- package/engine/dist/services/vision/vision_service.d.ts +4 -0
- package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
- package/engine/dist/services/vision/vision_service.js +197 -0
- package/engine/dist/services/vision/vision_service.js.map +1 -0
- package/engine/dist/test-framework/core.d.ts +133 -0
- package/engine/dist/test-framework/core.d.ts.map +1 -0
- package/engine/dist/test-framework/core.js +313 -0
- package/engine/dist/test-framework/core.js.map +1 -0
- package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
- package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
- package/engine/dist/test-framework/dataset-runner.js +223 -0
- package/engine/dist/test-framework/dataset-runner.js.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.js +283 -0
- package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.js +331 -0
- package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
- package/engine/dist/types/api.d.ts +53 -0
- package/engine/dist/types/api.d.ts.map +1 -0
- package/engine/dist/types/api.js +2 -0
- package/engine/dist/types/api.js.map +1 -0
- package/engine/dist/types/atomic.d.ts +42 -0
- package/engine/dist/types/atomic.d.ts.map +1 -0
- package/engine/dist/types/atomic.js +10 -0
- package/engine/dist/types/atomic.js.map +1 -0
- package/engine/dist/types/context-protocol.d.ts +137 -0
- package/engine/dist/types/context-protocol.d.ts.map +1 -0
- package/engine/dist/types/context-protocol.js +28 -0
- package/engine/dist/types/context-protocol.js.map +1 -0
- package/engine/dist/types/context.d.ts +2 -0
- package/engine/dist/types/context.d.ts.map +1 -0
- package/engine/dist/types/context.js +2 -0
- package/engine/dist/types/context.js.map +1 -0
- package/engine/dist/types/index.d.ts +20 -0
- package/engine/dist/types/index.d.ts.map +1 -0
- package/engine/dist/types/index.js +18 -0
- package/engine/dist/types/index.js.map +1 -0
- package/engine/dist/types/search.d.ts +31 -0
- package/engine/dist/types/search.d.ts.map +1 -0
- package/engine/dist/types/search.js +2 -0
- package/engine/dist/types/search.js.map +1 -0
- package/engine/dist/types/taxonomy.d.ts +137 -0
- package/engine/dist/types/taxonomy.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.js +138 -0
- package/engine/dist/types/taxonomy.js.map +1 -0
- package/engine/dist/types/taxonomy.simple.d.ts +131 -0
- package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.simple.js +132 -0
- package/engine/dist/types/taxonomy.simple.js.map +1 -0
- package/engine/dist/types/tool-call.d.ts +16 -0
- package/engine/dist/types/tool-call.d.ts.map +1 -0
- package/engine/dist/types/tool-call.js +6 -0
- package/engine/dist/types/tool-call.js.map +1 -0
- package/engine/dist/types/trace.d.ts +25 -0
- package/engine/dist/types/trace.d.ts.map +1 -0
- package/engine/dist/types/trace.js +5 -0
- package/engine/dist/types/trace.js.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.js +266 -0
- package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
- package/engine/dist/utils/date_extractor.d.ts +2 -0
- package/engine/dist/utils/date_extractor.d.ts.map +1 -0
- package/engine/dist/utils/date_extractor.js +32 -0
- package/engine/dist/utils/date_extractor.js.map +1 -0
- package/engine/dist/utils/native-module-manager.d.ts +48 -0
- package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
- package/engine/dist/utils/native-module-manager.js +265 -0
- package/engine/dist/utils/native-module-manager.js.map +1 -0
- package/engine/dist/utils/native-module-profiler.d.ts +66 -0
- package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
- package/engine/dist/utils/native-module-profiler.js +182 -0
- package/engine/dist/utils/native-module-profiler.js.map +1 -0
- package/engine/dist/utils/path-manager.d.ts +59 -0
- package/engine/dist/utils/path-manager.d.ts.map +1 -0
- package/engine/dist/utils/path-manager.js +154 -0
- package/engine/dist/utils/path-manager.js.map +1 -0
- package/engine/dist/utils/performance-monitor.d.ts +92 -0
- package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
- package/engine/dist/utils/performance-monitor.js +221 -0
- package/engine/dist/utils/performance-monitor.js.map +1 -0
- package/engine/dist/utils/process-manager.d.ts +18 -0
- package/engine/dist/utils/process-manager.d.ts.map +1 -0
- package/engine/dist/utils/process-manager.js +100 -0
- package/engine/dist/utils/process-manager.js.map +1 -0
- package/engine/dist/utils/request-tracer.d.ts +131 -0
- package/engine/dist/utils/request-tracer.d.ts.map +1 -0
- package/engine/dist/utils/request-tracer.js +414 -0
- package/engine/dist/utils/request-tracer.js.map +1 -0
- package/engine/dist/utils/resource-manager.d.ts +108 -0
- package/engine/dist/utils/resource-manager.d.ts.map +1 -0
- package/engine/dist/utils/resource-manager.js +235 -0
- package/engine/dist/utils/resource-manager.js.map +1 -0
- package/engine/dist/utils/safe-dns.d.ts +14 -0
- package/engine/dist/utils/safe-dns.d.ts.map +1 -0
- package/engine/dist/utils/safe-dns.js +105 -0
- package/engine/dist/utils/safe-dns.js.map +1 -0
- package/engine/dist/utils/structured-logger.d.ts +124 -0
- package/engine/dist/utils/structured-logger.d.ts.map +1 -0
- package/engine/dist/utils/structured-logger.js +332 -0
- package/engine/dist/utils/structured-logger.js.map +1 -0
- package/engine/dist/utils/tag-cleanup.d.ts +11 -0
- package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
- package/engine/dist/utils/tag-cleanup.js +111 -0
- package/engine/dist/utils/tag-cleanup.js.map +1 -0
- package/engine/dist/utils/tag-filter.d.ts +19 -0
- package/engine/dist/utils/tag-filter.d.ts.map +1 -0
- package/engine/dist/utils/tag-filter.js +147 -0
- package/engine/dist/utils/tag-filter.js.map +1 -0
- package/engine/dist/utils/tag-modulation.d.ts +80 -0
- package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
- package/engine/dist/utils/tag-modulation.js +284 -0
- package/engine/dist/utils/tag-modulation.js.map +1 -0
- package/engine/dist/utils/timer.d.ts +40 -0
- package/engine/dist/utils/timer.d.ts.map +1 -0
- package/engine/dist/utils/timer.js +76 -0
- package/engine/dist/utils/timer.js.map +1 -0
- package/engine/dist/utils/token-utils.d.ts +19 -0
- package/engine/dist/utils/token-utils.d.ts.map +1 -0
- package/engine/dist/utils/token-utils.js +71 -0
- package/engine/dist/utils/token-utils.js.map +1 -0
- package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
- package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
- package/engine/dist/utils/wasm-module-loader.js +136 -0
- package/engine/dist/utils/wasm-module-loader.js.map +1 -0
- package/engine/package.json +105 -0
- package/package.json +106 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ingest Service - Memory Ingestion with Provenance Tracking
|
|
3
|
+
*
|
|
4
|
+
* Implements the Data Provenance feature by adding a 'provenance' column
|
|
5
|
+
* to distinguish between "Sovereign" (User-Created) and "Ancillary" (External) data.
|
|
6
|
+
*/
|
|
7
|
+
import { db } from '../../core/db.js';
|
|
8
|
+
import crypto from 'crypto';
|
|
9
|
+
import { config } from '../../config/index.js';
|
|
10
|
+
import { cleanContent, getCleaningStats } from './content-cleaner.js';
|
|
11
|
+
/**
|
|
12
|
+
* Determines the provenance of content based on its source
|
|
13
|
+
*/
|
|
14
|
+
function determineProvenance(source, type) {
|
|
15
|
+
const normalizedSource = source.replace(/\\/g, '/');
|
|
16
|
+
// 1. Explicit Trusted Inbox (or default 'inbox' folder)
|
|
17
|
+
// Matches "inbox/..." or ".../inbox/..."
|
|
18
|
+
if (normalizedSource.includes('/inbox/') || normalizedSource.startsWith('inbox/') ||
|
|
19
|
+
normalizedSource.includes('/internal-inbox/') || normalizedSource.startsWith('internal-inbox/') ||
|
|
20
|
+
normalizedSource.includes('/sovereign/') ||
|
|
21
|
+
type === 'user') {
|
|
22
|
+
return 'internal';
|
|
23
|
+
}
|
|
24
|
+
// 2. Explicit External Inbox
|
|
25
|
+
// Matches "external-inbox/..." or ".../external-inbox/..."
|
|
26
|
+
if (normalizedSource.includes('/external-inbox/') || normalizedSource.startsWith('external-inbox/') ||
|
|
27
|
+
normalizedSource.includes('web_scrape') ||
|
|
28
|
+
normalizedSource.includes('news_agent') ||
|
|
29
|
+
type === 'external') {
|
|
30
|
+
return 'external';
|
|
31
|
+
}
|
|
32
|
+
// Default to external only if it didn't match the explicitly internal folders above
|
|
33
|
+
// Note: We flipped the order to prioritize the known 'inbox' check which was failing before (falling through to default)
|
|
34
|
+
return 'external';
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Ingest content into the memory database with provenance tracking
|
|
38
|
+
*/
|
|
39
|
+
export async function ingestContent(content, source, type = 'text', buckets = ['core'], tags = [], options = {}) {
|
|
40
|
+
if (!content) {
|
|
41
|
+
throw new Error('Content is required for ingestion');
|
|
42
|
+
}
|
|
43
|
+
// --- DATA REFINERY: Clean content ---
|
|
44
|
+
let processedContent = content;
|
|
45
|
+
const processedTags = [...tags];
|
|
46
|
+
if (!options.skipCleaning) {
|
|
47
|
+
const startTime = Date.now();
|
|
48
|
+
// Configure cleaning based on content type
|
|
49
|
+
const cleanOptions = {
|
|
50
|
+
stripHtml: type === 'html' || type === 'web_page',
|
|
51
|
+
decodeHtml: true,
|
|
52
|
+
normalizeUnicode: true,
|
|
53
|
+
removeControlChars: true,
|
|
54
|
+
normalizeWhitespace: true,
|
|
55
|
+
removeBoilerplate: type === 'web_page',
|
|
56
|
+
normalizeLineEndings: true,
|
|
57
|
+
collapseBlankLines: true
|
|
58
|
+
};
|
|
59
|
+
processedContent = cleanContent(content, cleanOptions);
|
|
60
|
+
const stats = getCleaningStats(content, processedContent);
|
|
61
|
+
if (stats.charsRemoved > 0) {
|
|
62
|
+
console.log(`[Ingest] 🏭 Content cleaned: -${stats.charsRemoved} chars (${stats.reductionPercent}% reduction) in ${Date.now() - startTime}ms`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// Auto-assign provenance based on source
|
|
66
|
+
const provenance = determineProvenance(source, type);
|
|
67
|
+
// Generate hash for content deduplication (using processed content)
|
|
68
|
+
const hash = crypto.createHash('md5').update(processedContent).digest('hex');
|
|
69
|
+
// Check if content with same hash already exists
|
|
70
|
+
const existingQuery = `SELECT id FROM atoms WHERE simhash = $1`;
|
|
71
|
+
const existingResult = await db.run(existingQuery, [BigInt(hash)]);
|
|
72
|
+
if (existingResult.rows && existingResult.rows.length > 0) {
|
|
73
|
+
return {
|
|
74
|
+
status: 'skipped',
|
|
75
|
+
id: existingResult.rows[0][0],
|
|
76
|
+
message: 'Content with same hash already exists'
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
// Generate unique ID
|
|
80
|
+
const id = `mem_${Date.now()}_${crypto.randomBytes(8).toString('hex').substring(0, 16)}`;
|
|
81
|
+
const timestamp = Date.now();
|
|
82
|
+
// Process content into atomic structure using AtomizerService (Legacy Pipeline)
|
|
83
|
+
const { AtomizerService } = await import('./atomizer-service.js');
|
|
84
|
+
const { AtomicIngestService } = await import('./ingest-atomic.js');
|
|
85
|
+
const atomizer = new AtomizerService();
|
|
86
|
+
const atomicIngest = new AtomicIngestService();
|
|
87
|
+
// Ensure provenance matches expected type for atomizer
|
|
88
|
+
const atomizerProvenance = (provenance === 'system') ? 'internal' : provenance;
|
|
89
|
+
const atomizeResult = await atomizer.atomize(processedContent, source, atomizerProvenance, timestamp);
|
|
90
|
+
// Skip ingestion if transient data was detected
|
|
91
|
+
if (!atomizeResult) {
|
|
92
|
+
console.log(`[Ingest] ⚠️ SKIP: ${source} - Transient data detected, skipping ingestion`);
|
|
93
|
+
return {
|
|
94
|
+
status: 'skipped',
|
|
95
|
+
message: 'Content skipped (transient data)'
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
const { compound, molecules, atoms } = atomizeResult;
|
|
99
|
+
// Ingest result using AtomicIngestService
|
|
100
|
+
await atomicIngest.ingestResult(compound, molecules, atoms, buckets);
|
|
101
|
+
// Return success (ID is compound ID)
|
|
102
|
+
return {
|
|
103
|
+
status: 'success',
|
|
104
|
+
id: compound.id,
|
|
105
|
+
message: 'Content ingested successfully with provenance tracking'
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Ingest pre-processed atoms
|
|
110
|
+
*/
|
|
111
|
+
/**
|
|
112
|
+
* Ingest pre-processed atoms (Batched)
|
|
113
|
+
*/
|
|
114
|
+
export async function ingestAtoms(atoms, source, buckets = ['core'], tags = [], // Batch-level tags (e.g., "inbox")
|
|
115
|
+
fileTimestamp) {
|
|
116
|
+
if (atoms.length === 0)
|
|
117
|
+
return 0;
|
|
118
|
+
const BATCH_SIZE = 50;
|
|
119
|
+
let inserted = 0;
|
|
120
|
+
// Process in chunks
|
|
121
|
+
for (let i = 0; i < atoms.length; i += BATCH_SIZE) {
|
|
122
|
+
const chunk = atoms.slice(i, i + BATCH_SIZE);
|
|
123
|
+
// --- 1. Prepare Atoms Batch ---
|
|
124
|
+
const atomValuePlaceholders = [];
|
|
125
|
+
const atomParams = [];
|
|
126
|
+
let paramIndex = 1;
|
|
127
|
+
for (const atom of chunk) {
|
|
128
|
+
// Standard 096: Timestamp Assignment
|
|
129
|
+
let finalTimestamp = atom.timestamp;
|
|
130
|
+
if (!finalTimestamp || finalTimestamp <= 0 || isNaN(finalTimestamp)) {
|
|
131
|
+
finalTimestamp = (fileTimestamp != null) ? fileTimestamp : Date.now();
|
|
132
|
+
}
|
|
133
|
+
// Simhash to BigInt
|
|
134
|
+
let simhashBigInt = null;
|
|
135
|
+
if (atom.simhash) {
|
|
136
|
+
try {
|
|
137
|
+
simhashBigInt = BigInt(atom.simhash);
|
|
138
|
+
}
|
|
139
|
+
catch (e) { /* ignore */ }
|
|
140
|
+
}
|
|
141
|
+
// Embedding
|
|
142
|
+
let embeddingArray = new Array(config.MODELS.EMBEDDING_DIM).fill(0.1);
|
|
143
|
+
if (atom.embedding && atom.embedding.length === config.MODELS.EMBEDDING_DIM) {
|
|
144
|
+
embeddingArray = atom.embedding;
|
|
145
|
+
}
|
|
146
|
+
// Payload
|
|
147
|
+
const payloadJson = atom.payload ? JSON.stringify(atom.payload) : '{}';
|
|
148
|
+
atomValuePlaceholders.push(`($${paramIndex}, $${paramIndex + 1}, $${paramIndex + 2}, $${paramIndex + 3}, $${paramIndex + 4}, $${paramIndex + 5}, $${paramIndex + 6}, $${paramIndex + 7})`);
|
|
149
|
+
atomParams.push(atom.id, atom.content, atom.sourcePath, finalTimestamp, simhashBigInt || 0n, embeddingArray, atom.provenance, payloadJson);
|
|
150
|
+
paramIndex += 8;
|
|
151
|
+
}
|
|
152
|
+
const atomInsertQuery = `
|
|
153
|
+
INSERT INTO atoms (id, content, source_path, timestamp, simhash, embedding, provenance, payload)
|
|
154
|
+
VALUES ${atomValuePlaceholders.join(', ')}
|
|
155
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
156
|
+
content = EXCLUDED.content,
|
|
157
|
+
source_path = EXCLUDED.source_path,
|
|
158
|
+
timestamp = EXCLUDED.timestamp,
|
|
159
|
+
simhash = EXCLUDED.simhash,
|
|
160
|
+
embedding = EXCLUDED.embedding,
|
|
161
|
+
provenance = EXCLUDED.provenance,
|
|
162
|
+
payload = EXCLUDED.payload
|
|
163
|
+
`;
|
|
164
|
+
try {
|
|
165
|
+
await db.run(atomInsertQuery, atomParams);
|
|
166
|
+
}
|
|
167
|
+
catch (e) {
|
|
168
|
+
console.error(`[Ingest] Batch insert failed for chunk starting at index ${i}:`, e.message);
|
|
169
|
+
continue; // Skip tags if atoms fail
|
|
170
|
+
}
|
|
171
|
+
// --- 2. Prepare Tags Batch ---
|
|
172
|
+
const tagValuePlaceholders = [];
|
|
173
|
+
const tagParams = [];
|
|
174
|
+
let tagParamIndex = 1;
|
|
175
|
+
for (const atom of chunk) {
|
|
176
|
+
const atomSpecificTags = atom.tags || [];
|
|
177
|
+
const finalTags = [...new Set([...tags, ...atomSpecificTags])];
|
|
178
|
+
for (const tag of finalTags) {
|
|
179
|
+
tagValuePlaceholders.push(`($${tagParamIndex}, $${tagParamIndex + 1}, $${tagParamIndex + 2})`);
|
|
180
|
+
tagParams.push(atom.id, tag, buckets[0]);
|
|
181
|
+
tagParamIndex += 3;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
if (tagParams.length > 0) {
|
|
185
|
+
const tagInsertQuery = `
|
|
186
|
+
INSERT INTO tags (atom_id, tag, bucket)
|
|
187
|
+
VALUES ${tagValuePlaceholders.join(', ')}
|
|
188
|
+
ON CONFLICT (atom_id, tag, bucket) DO UPDATE SET
|
|
189
|
+
bucket = EXCLUDED.bucket
|
|
190
|
+
`;
|
|
191
|
+
try {
|
|
192
|
+
await db.run(tagInsertQuery, tagParams);
|
|
193
|
+
}
|
|
194
|
+
catch (e) {
|
|
195
|
+
console.warn(`[Ingest] Batch tag insert failed for chunk ${i}:`, e.message);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
inserted += chunk.length;
|
|
199
|
+
}
|
|
200
|
+
return inserted;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Bulk import YAML content with provenance tracking
|
|
204
|
+
*/
|
|
205
|
+
export async function importYamlContent(yamlContent) {
|
|
206
|
+
let imported = 0;
|
|
207
|
+
let skipped = 0;
|
|
208
|
+
let errors = 0;
|
|
209
|
+
for (const record of yamlContent) {
|
|
210
|
+
try {
|
|
211
|
+
if (!record.content) {
|
|
212
|
+
errors++;
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
const result = await ingestContent(record.content, record.source || 'yaml_import', record.type || 'text', record.buckets || ['imported'], record.tags || []);
|
|
216
|
+
if (result.status === 'success') {
|
|
217
|
+
imported++;
|
|
218
|
+
}
|
|
219
|
+
else if (result.status === 'skipped') {
|
|
220
|
+
skipped++;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
catch (error) {
|
|
224
|
+
console.error('YAML import error for record:', record, error);
|
|
225
|
+
errors++;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return { imported, skipped, errors };
|
|
229
|
+
}
|
|
230
|
+
//# sourceMappingURL=ingest.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../../src/services/ingest/ingest.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AACtC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC/C,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAOtE;;GAEG;AACH,SAAS,mBAAmB,CAAC,MAAc,EAAE,IAAa;IACxD,MAAM,gBAAgB,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAEpD,wDAAwD;IACxD,yCAAyC;IACzC,IAAI,gBAAgB,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,gBAAgB,CAAC,UAAU,CAAC,QAAQ,CAAC;QAC/E,gBAAgB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,gBAAgB,CAAC,UAAU,CAAC,iBAAiB,CAAC;QAC/F,gBAAgB,CAAC,QAAQ,CAAC,aAAa,CAAC;QACxC,IAAI,KAAK,MAAM,EAAE,CAAC;QAClB,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,6BAA6B;IAC7B,2DAA2D;IAC3D,IAAI,gBAAgB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,gBAAgB,CAAC,UAAU,CAAC,iBAAiB,CAAC;QACjG,gBAAgB,CAAC,QAAQ,CAAC,YAAY,CAAC;QACvC,gBAAgB,CAAC,QAAQ,CAAC,YAAY,CAAC;QACvC,IAAI,KAAK,UAAU,EAAE,CAAC;QACtB,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,oFAAoF;IACpF,yHAAyH;IACzH,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,OAAe,EACf,MAAc,EACd,OAAe,MAAM,EACrB,UAAoB,CAAC,MAAM,CAAC,EAC5B,OAAiB,EAAE,EACnB,UAAyB,EAAE;IAG3B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,uCAAuC;IACvC,IAAI,gBAAgB,GAAG,OAAO,CAAC;IAC/B,MAAM,aAAa,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IAEhC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC;QAC1B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,2CAA2C;QAC3C,MAAM,YAAY,GAAG;YACnB,SAAS,EAAE,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,UAAU;YACjD,UAAU,EAAE,IAAI;YAChB,gBAAgB,EAAE,IAAI;YACtB,kBAAkB,EAAE,IAAI;YACxB,mBAAmB,EAAE,IAAI;YACzB,iBAAiB,EAAE,IAAI,KAAK,UAAU;YACtC,oBAAoB,EAAE,IAAI;YAC1B,kBAAkB,EAAE,IAAI;SACzB,CAAC;QAEF,gBAAgB,GAAG,YAAY,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QAE1D,IAAI,KAAK,CAAC,YAAY,GAAG,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,GAAG,CAAC,iCAAiC,KAAK,CAAC,YAAY,WAAW,KAAK,CAAC,gBAAgB,mBAAmB,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,IAAI,CAAC,CAAC;QACjJ,CAAC;IACH,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAG,mBAAmB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAErD,oEAAoE;IACpE,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAE7E,iDAAiD;IACjD,MAAM,aAAa,GAAG,yCAAyC,CAAC;IAChE,MAAM,cAAc,GAAG,MAAM,EAAE,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAEnE,IAAI,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1D,OAAO;YACL,MAAM,EAAE,SAAS;YACjB,EAAE,EAAE,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7B,OAAO,EAAE,uCAAuC;SACjD,CAAC;IACJ,CAAC;IAED,qBAAqB;IACrB,MAAM,EAAE,GAAG,OAAO,IAAI,CAAC,GAAG,EAAE,IAAI,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;IACzF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,gFAAgF;IAChF,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;IAClE,MAAM,EAAE,mBAAmB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAEnE,MAAM,QAAQ,GAAG,IAAI,eAAe,EAAE,CAAC;IACvC,MAAM,YAAY,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAE/C,uDAAuD;IACvD,MAAM,kBAAkB,GAAG,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC;IAE/E,MAAM,aAAa,GAAG,MAAM,QAAQ,CAAC,OAAO,CAC1C,gBAAgB,EAChB,MAAM,EACN,kBAAkB,EAClB,SAAS,CACV,CAAC;IAEF,gDAAgD;IAChD,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,OAAO,CAAC,GAAG,CAAC,qBAAqB,MAAM,gDAAgD,CAAC,CAAC;QACzF,OAAO;YACL,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,kCAAkC;SAC5C,CAAC;IACJ,CAAC;IAED,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,aAAa,CAAC;IAErD,0CAA0C;IAC1C,MAAM,YAAY,CAAC,YAAY,CAAC,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;IAErE,qCAAqC;IACrC,OAAO;QACL,MAAM,EAAE,SAAS;QACjB,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,OAAO,EAAE,wDAAwD;KAClE,CAAC;AACJ,CAAC;AAiBD;;GAEG;AACH;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,KAAmB,EACnB,MAAc,EACd,UAAoB,CAAC,MAAM,CAAC,EAC5B,OAAiB,EAAE,EAAE,mCAAmC;AACxD,aAAsB;IAGtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACjC,MAAM,UAAU,GAAG,EAAE,CAAC;IACtB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,oBAAoB;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;QAE7C,iCAAiC;QACjC,MAAM,qBAAqB,GAAa,EAAE,CAAC;QAC3C,MAAM,UAAU,GAAU,EAAE,CAAC;QAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,qCAAqC;YACrC,IAAI,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC;YACpC,IAAI,CAAC,cAAc,IAAI,cAAc,IAAI,CAAC,IAAI,KAAK,CAAC,cAAc,CAAC,EAAE,CAAC;gBACpE,cAAc,GAAG,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;YACxE,CAAC;YAED,oBAAoB;YACpB,IAAI,aAAa,GAAkB,IAAI,CAAC;YACxC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;gBACjB,IAAI,CAAC;oBAAC,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAAC,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC;YAC1E,CAAC;YAED,YAAY;YACZ,IAAI,cAAc,GAAa,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAChF,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;gBAC5E,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC;YAClC,CAAC;YAED,UAAU;YACV,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAEvE,qBAAqB,CAAC,IAAI,CAAC,KAAK,UAAU,MAAM,UAAU,GAAG,CAAC,MAAM,UAAU,GAAG,CAAC,MAAM,UAAU,GAAG,CAAC,MAAM,UAAU,GAAG,CAAC,MAAM,UAAU,GAAG,CAAC,MAAM,UAAU,GAAG,CAAC,MAAM,UAAU,GAAG,CAAC,GAAG,CAAC,CAAC;YAC3L,UAAU,CAAC,IAAI,CACb,IAAI,CAAC,EAAE,EACP,IAAI,CAAC,OAAO,EACZ,IAAI,CAAC,UAAU,EACf,cAAc,EACd,aAAa,IAAI,EAAE,EACnB,cAAc,EACd,IAAI,CAAC,UAAU,EACf,WAAW,CACZ,CAAC;YACF,UAAU,IAAI,CAAC,CAAC;QAClB,CAAC;QAED,MAAM,eAAe,GAAG;;eAEb,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC;;;;;;;;;KAS1C,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,GAAG,CAAC,eAAe,EAAE,UAAU,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,CAAM,EAAE,CAAC;YAChB,OAAO,CAAC,KAAK,CAAC,4DAA4D,CAAC,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;YAC3F,SAAS,CAAC,0BAA0B;QACtC,CAAC;QAED,gCAAgC;QAChC,MAAM,oBAAoB,GAAa,EAAE,CAAC;QAC1C,MAAM,SAAS,GAAU,EAAE,CAAC;QAC5B,IAAI,aAAa,GAAG,CAAC,CAAC;QAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,gBAAgB,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;YACzC,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;YAE/D,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;gBAC5B,oBAAoB,CAAC,IAAI,CAAC,KAAK,aAAa,MAAM,aAAa,GAAG,CAAC,MAAM,aAAa,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC/F,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,aAAa,IAAI,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,cAAc,GAAG;;iBAEZ,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC;;;OAGzC,CAAC;YACF,IAAI,CAAC;gBACH,MAAM,EAAE,CAAC,GAAG,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;YAC1C,CAAC;YAAC,OAAO,CAAM,EAAE,CAAC;gBAChB,OAAO,CAAC,IAAI,CAAC,8CAA8C,CAAC,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;YAC9E,CAAC;QACH,CAAC;QAED,QAAQ,IAAI,KAAK,CAAC,MAAM,CAAC;IAC3B,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,WAAkB;IACxD,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;QACjC,IAAI,CAAC;YACH,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,EAAE,CAAC;gBACT,SAAS;YACX,CAAC;YAED,MAAM,MAAM,GAAG,MAAM,aAAa,CAChC,MAAM,CAAC,OAAO,EACd,MAAM,CAAC,MAAM,IAAI,aAAa,EAC9B,MAAM,CAAC,IAAI,IAAI,MAAM,EACrB,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,CAAC,EAC9B,MAAM,CAAC,IAAI,IAAI,EAAE,CAClB,CAAC;YAEF,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAChC,QAAQ,EAAE,CAAC;YACb,CAAC;iBAAM,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBACvC,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,+BAA+B,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;YAC9D,MAAM,EAAE,CAAC;QACX,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;AACvC,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Watchdog Service
|
|
3
|
+
*
|
|
4
|
+
* Scans the Notebook directory for changes and ingests new content.
|
|
5
|
+
* Uses 'chokidar' for efficient file watching.
|
|
6
|
+
*/
|
|
7
|
+
export declare function startWatchdog(): Promise<void>;
|
|
8
|
+
export declare function getWatchedPaths(): string[];
|
|
9
|
+
export declare function addWatchPath(newPath: string): Promise<boolean>;
|
|
10
|
+
export declare function removeWatchPath(pathToRemove: string): Promise<boolean>;
|
|
11
|
+
/**
|
|
12
|
+
* Stop the watchdog service
|
|
13
|
+
*/
|
|
14
|
+
export declare function stopWatchdog(): Promise<void>;
|
|
15
|
+
/**
|
|
16
|
+
* Get watchdog status
|
|
17
|
+
*/
|
|
18
|
+
export declare function getWatcherStatus(): {
|
|
19
|
+
isRunning: boolean;
|
|
20
|
+
watchedPaths: string[];
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Trigger manual ingestion scan
|
|
24
|
+
*/
|
|
25
|
+
export declare function triggerManualIngest(): Promise<{
|
|
26
|
+
status: string;
|
|
27
|
+
message: string;
|
|
28
|
+
filesProcessed?: number;
|
|
29
|
+
filesIngested?: number;
|
|
30
|
+
}>;
|
|
31
|
+
//# sourceMappingURL=watchdog.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"watchdog.d.ts","sourceRoot":"","sources":["../../../src/services/ingest/watchdog.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA8CH,wBAAsB,aAAa,kBAkDlC;AAGD,wBAAgB,eAAe,IAAI,MAAM,EAAE,CAY1C;AAED,wBAAsB,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAwCpE;AAED,wBAAsB,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAgC5E;AAED;;GAEG;AACH,wBAAsB,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAMlD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI;IAAE,SAAS,EAAE,OAAO,CAAC;IAAC,YAAY,EAAE,MAAM,EAAE,CAAA;CAAE,CASjF;AAED;;GAEG;AACH,wBAAsB,mBAAmB,IAAI,OAAO,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAkEzI"}
|
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Watchdog Service
|
|
3
|
+
*
|
|
4
|
+
* Scans the Notebook directory for changes and ingests new content.
|
|
5
|
+
* Uses 'chokidar' for efficient file watching.
|
|
6
|
+
*/
|
|
7
|
+
import * as chokidar from 'chokidar';
|
|
8
|
+
import * as fs from 'fs';
|
|
9
|
+
import * as path from 'path';
|
|
10
|
+
import * as crypto from 'crypto';
|
|
11
|
+
import { db } from '../../core/db.js';
|
|
12
|
+
import { NOTEBOOK_DIR, PROJECT_ROOT } from '../../config/paths.js';
|
|
13
|
+
import { config } from '../../config/index.js';
|
|
14
|
+
import { pathManager } from '../../utils/path-manager.js';
|
|
15
|
+
import { systemStatus } from '../system-status.js';
|
|
16
|
+
let watcher = null;
|
|
17
|
+
const IGNORE_PATTERNS = /(^|[\/\\])\../; // Ignore dotfiles
|
|
18
|
+
// Post-ingestion synonym generation
|
|
19
|
+
let ingestionTimeout = null;
|
|
20
|
+
const INGESTION_DEBOUNCE_MS = 30000; // Wait 30 seconds after last ingestion
|
|
21
|
+
async function triggerPostIngestionSynonyms() {
|
|
22
|
+
// Clear any pending timeout
|
|
23
|
+
if (ingestionTimeout) {
|
|
24
|
+
clearTimeout(ingestionTimeout);
|
|
25
|
+
}
|
|
26
|
+
// Set new timeout to generate synonyms after ingestion stops
|
|
27
|
+
ingestionTimeout = setTimeout(async () => {
|
|
28
|
+
console.log('[Watchdog] Post-ingestion synonym generation starting...');
|
|
29
|
+
try {
|
|
30
|
+
const { AutoSynonymGenerator } = await import('../synonyms/auto-synonym-generator.js');
|
|
31
|
+
const generator = new AutoSynonymGenerator();
|
|
32
|
+
const synonyms = await generator.generateSynonymRings();
|
|
33
|
+
const synonymDir = path.join(pathManager.getDatabasePath(), 'synonyms');
|
|
34
|
+
if (!fs.existsSync(synonymDir)) {
|
|
35
|
+
fs.mkdirSync(synonymDir, { recursive: true });
|
|
36
|
+
}
|
|
37
|
+
const synonymPath = path.join(synonymDir, 'synonym-ring-auto.json');
|
|
38
|
+
await generator.saveSynonymRings(synonyms, synonymPath);
|
|
39
|
+
console.log(`[Watchdog] ✅ Post-ingestion synonym rings saved to ${synonymPath}`);
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
console.warn('[Watchdog] Post-ingestion synonym generation failed:', error.message);
|
|
43
|
+
}
|
|
44
|
+
}, INGESTION_DEBOUNCE_MS);
|
|
45
|
+
}
|
|
46
|
+
export async function startWatchdog() {
|
|
47
|
+
if (watcher)
|
|
48
|
+
return;
|
|
49
|
+
if (!fs.existsSync(NOTEBOOK_DIR)) {
|
|
50
|
+
console.warn(`[Watchdog] Notebook directory not found: ${NOTEBOOK_DIR}. Skipping watch.`);
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
const inbox = path.join(PROJECT_ROOT, 'inbox');
|
|
54
|
+
const externalInbox = path.join(PROJECT_ROOT, 'external-inbox');
|
|
55
|
+
// Auto-create inbox directories if missing (Standard 051: Ephemeral Index)
|
|
56
|
+
// These are gitignored and should be created on-demand
|
|
57
|
+
if (!fs.existsSync(inbox)) {
|
|
58
|
+
fs.mkdirSync(inbox, { recursive: true });
|
|
59
|
+
console.log(`[Watchdog] Created inbox directory: ${inbox}`);
|
|
60
|
+
}
|
|
61
|
+
if (!fs.existsSync(externalInbox)) {
|
|
62
|
+
fs.mkdirSync(externalInbox, { recursive: true });
|
|
63
|
+
console.log(`[Watchdog] Created external-inbox directory: ${externalInbox}`);
|
|
64
|
+
}
|
|
65
|
+
console.log(`[Watchdog] Starting watch on: ${inbox} and ${externalInbox}`);
|
|
66
|
+
// Load extra paths from config
|
|
67
|
+
const extraPaths = config.WATCHER_EXTRA_PATHS || [];
|
|
68
|
+
const validExtraPaths = extraPaths.filter((p) => {
|
|
69
|
+
if (fs.existsSync(p))
|
|
70
|
+
return true;
|
|
71
|
+
console.warn(`[Watchdog] Extra path not found: ${p}`);
|
|
72
|
+
return false;
|
|
73
|
+
});
|
|
74
|
+
const pathsToWatch = [inbox, externalInbox, ...validExtraPaths];
|
|
75
|
+
watcher = chokidar.watch(pathsToWatch, {
|
|
76
|
+
ignored: IGNORE_PATTERNS,
|
|
77
|
+
persistent: true,
|
|
78
|
+
ignoreInitial: false, // Force scan on start to ingest existing files
|
|
79
|
+
awaitWriteFinish: {
|
|
80
|
+
stabilityThreshold: config.WATCHER_STABILITY_THRESHOLD_MS,
|
|
81
|
+
pollInterval: 100
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
watcher
|
|
85
|
+
.on('add', (path) => processFile(path, 'add'))
|
|
86
|
+
.on('change', (path) => processFile(path, 'change'))
|
|
87
|
+
.on('addDir', (path) => console.log(`[Watchdog] Detected new directory: ${path}`));
|
|
88
|
+
// .on('unlink', (path) => deleteFile(path)); // Implement delete logic later
|
|
89
|
+
}
|
|
90
|
+
// Dynamic Path Management
|
|
91
|
+
export function getWatchedPaths() {
|
|
92
|
+
if (!watcher)
|
|
93
|
+
return [];
|
|
94
|
+
// chokidar.getWatched() returns an object where keys are paths
|
|
95
|
+
// But it returns all subdirectories too. We mainly want the roots we added.
|
|
96
|
+
// For simplicity, we can return the configured roots + static roots.
|
|
97
|
+
// Better approach: Return the paths explicitly tracked
|
|
98
|
+
const inbox = path.join(PROJECT_ROOT, 'inbox');
|
|
99
|
+
const externalInbox = path.join(PROJECT_ROOT, 'external-inbox');
|
|
100
|
+
const extraPaths = config.WATCHER_EXTRA_PATHS || [];
|
|
101
|
+
return [inbox, externalInbox, ...extraPaths];
|
|
102
|
+
}
|
|
103
|
+
export async function addWatchPath(newPath) {
|
|
104
|
+
if (!fs.existsSync(newPath)) {
|
|
105
|
+
throw new Error(`Path does not exist: ${newPath}`);
|
|
106
|
+
}
|
|
107
|
+
// Add to watcher if it's running
|
|
108
|
+
if (watcher) {
|
|
109
|
+
watcher.add(newPath);
|
|
110
|
+
console.log(`[Watchdog] Added dynamic watch path: ${newPath}`);
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
console.log(`[Watchdog] Path saved for later (watchdog not running): ${newPath}`);
|
|
114
|
+
}
|
|
115
|
+
// Update Config (In-Memory)
|
|
116
|
+
if (!config.WATCHER_EXTRA_PATHS)
|
|
117
|
+
config.WATCHER_EXTRA_PATHS = [];
|
|
118
|
+
if (!config.WATCHER_EXTRA_PATHS.includes(newPath)) {
|
|
119
|
+
config.WATCHER_EXTRA_PATHS.push(newPath);
|
|
120
|
+
// Persist to user_settings.json (always do this, even if watchdog isn't running)
|
|
121
|
+
try {
|
|
122
|
+
const settingsPath = path.join(process.cwd(), 'user_settings.json');
|
|
123
|
+
if (fs.existsSync(settingsPath)) {
|
|
124
|
+
const settingsRequest = await fs.promises.readFile(settingsPath, 'utf8');
|
|
125
|
+
const settings = JSON.parse(settingsRequest);
|
|
126
|
+
if (!settings.watcher)
|
|
127
|
+
settings.watcher = {};
|
|
128
|
+
if (!settings.watcher.extra_paths)
|
|
129
|
+
settings.watcher.extra_paths = [];
|
|
130
|
+
if (!settings.watcher.extra_paths.includes(newPath)) {
|
|
131
|
+
settings.watcher.extra_paths.push(newPath);
|
|
132
|
+
await fs.promises.writeFile(settingsPath, JSON.stringify(settings, null, 4));
|
|
133
|
+
console.log(`[Watchdog] Persisted path to user_settings.json`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
catch (e) {
|
|
138
|
+
console.error(`[Watchdog] Failed to persist settings: ${e.message}`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return true;
|
|
142
|
+
}
|
|
143
|
+
export async function removeWatchPath(pathToRemove) {
|
|
144
|
+
// Remove from chokidar watcher if it exists (watchdog is running)
|
|
145
|
+
if (watcher) {
|
|
146
|
+
watcher.unwatch(pathToRemove);
|
|
147
|
+
console.log(`[Watchdog] Removed watch path: ${pathToRemove}`);
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
console.log(`[Watchdog] Path marked for removal (watchdog not running): ${pathToRemove}`);
|
|
151
|
+
}
|
|
152
|
+
// Update Config (In-Memory)
|
|
153
|
+
if (config.WATCHER_EXTRA_PATHS && config.WATCHER_EXTRA_PATHS.includes(pathToRemove)) {
|
|
154
|
+
config.WATCHER_EXTRA_PATHS = config.WATCHER_EXTRA_PATHS.filter((p) => p !== pathToRemove);
|
|
155
|
+
// Persist to user_settings.json
|
|
156
|
+
try {
|
|
157
|
+
const settingsPath = path.join(process.cwd(), 'user_settings.json');
|
|
158
|
+
if (fs.existsSync(settingsPath)) {
|
|
159
|
+
const settingsRequest = await fs.promises.readFile(settingsPath, 'utf8');
|
|
160
|
+
const settings = JSON.parse(settingsRequest);
|
|
161
|
+
if (settings.watcher && settings.watcher.extra_paths) {
|
|
162
|
+
settings.watcher.extra_paths = settings.watcher.extra_paths.filter((p) => p !== pathToRemove);
|
|
163
|
+
await fs.promises.writeFile(settingsPath, JSON.stringify(settings, null, 4));
|
|
164
|
+
console.log(`[Watchdog] Persisted path removal to user_settings.json`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
catch (e) {
|
|
169
|
+
console.error(`[Watchdog] Failed to persist settings removal: ${e.message}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return true;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Stop the watchdog service
|
|
176
|
+
*/
|
|
177
|
+
export async function stopWatchdog() {
|
|
178
|
+
if (watcher) {
|
|
179
|
+
await watcher.close();
|
|
180
|
+
watcher = null;
|
|
181
|
+
console.log('[Watchdog] Stopped watching files');
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Get watchdog status
|
|
186
|
+
*/
|
|
187
|
+
export function getWatcherStatus() {
|
|
188
|
+
const inbox = path.join(PROJECT_ROOT, 'inbox');
|
|
189
|
+
const externalInbox = path.join(PROJECT_ROOT, 'external-inbox');
|
|
190
|
+
const extraPaths = config.WATCHER_EXTRA_PATHS || [];
|
|
191
|
+
return {
|
|
192
|
+
isRunning: watcher !== null,
|
|
193
|
+
watchedPaths: [inbox, externalInbox, ...extraPaths]
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Trigger manual ingestion scan
|
|
198
|
+
*/
|
|
199
|
+
export async function triggerManualIngest() {
|
|
200
|
+
try {
|
|
201
|
+
const inbox = path.join(PROJECT_ROOT, 'inbox');
|
|
202
|
+
const externalInbox = path.join(PROJECT_ROOT, 'external-inbox');
|
|
203
|
+
if (!fs.existsSync(inbox)) {
|
|
204
|
+
return { status: 'error', message: 'Inbox directory not found' };
|
|
205
|
+
}
|
|
206
|
+
let filesProcessed = 0;
|
|
207
|
+
let filesIngested = 0;
|
|
208
|
+
// Scan inbox directory
|
|
209
|
+
const files = fs.readdirSync(inbox, { recursive: true });
|
|
210
|
+
for (const file of files) {
|
|
211
|
+
const filePath = path.join(inbox, file);
|
|
212
|
+
// Skip directories and ignored patterns
|
|
213
|
+
if (fs.statSync(filePath).isDirectory())
|
|
214
|
+
continue;
|
|
215
|
+
if (IGNORE_PATTERNS.test(file))
|
|
216
|
+
continue;
|
|
217
|
+
filesProcessed++;
|
|
218
|
+
// Trigger actual ingestion by calling processFile
|
|
219
|
+
try {
|
|
220
|
+
await processFile(filePath, 'manual');
|
|
221
|
+
filesIngested++;
|
|
222
|
+
}
|
|
223
|
+
catch (error) {
|
|
224
|
+
console.error(`[ManualIngest] Failed to process ${file}:`, error.message);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// Also scan external-inbox if it exists
|
|
228
|
+
if (fs.existsSync(externalInbox)) {
|
|
229
|
+
const externalFiles = fs.readdirSync(externalInbox, { recursive: true });
|
|
230
|
+
for (const file of externalFiles) {
|
|
231
|
+
const filePath = path.join(externalInbox, file);
|
|
232
|
+
if (fs.statSync(filePath).isDirectory())
|
|
233
|
+
continue;
|
|
234
|
+
if (IGNORE_PATTERNS.test(file))
|
|
235
|
+
continue;
|
|
236
|
+
filesProcessed++;
|
|
237
|
+
try {
|
|
238
|
+
await processFile(filePath, 'manual');
|
|
239
|
+
filesIngested++;
|
|
240
|
+
}
|
|
241
|
+
catch (error) {
|
|
242
|
+
console.error(`[ManualIngest] Failed to process ${file}:`, error.message);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
return {
|
|
247
|
+
status: 'success',
|
|
248
|
+
message: `Manual ingest complete: ${filesIngested}/${filesProcessed} files processed`,
|
|
249
|
+
filesProcessed,
|
|
250
|
+
filesIngested
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
catch (error) {
|
|
254
|
+
return {
|
|
255
|
+
status: 'error',
|
|
256
|
+
message: `Manual ingest failed: ${error.message}`
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
// Revert to AtomizerService for performance
|
|
261
|
+
// import { SemanticIngestionService } from '../semantic/semantic-ingestion-service.js';
|
|
262
|
+
import { AtomizerService } from './atomizer-service.js';
|
|
263
|
+
import { AtomicIngestService } from './ingest-atomic.js';
|
|
264
|
+
// import { ingestAtoms } from './ingest.js'; // Already imported at top of file
|
|
265
|
+
// Singleton Services
|
|
266
|
+
// const semanticIngest = new SemanticIngestionService();
|
|
267
|
+
const atomizer = new AtomizerService();
|
|
268
|
+
const atomicIngest = new AtomicIngestService();
|
|
269
|
+
async function processFile(filePath, event) {
|
|
270
|
+
// Accept markdown, text, YAML, CSV, JSON, and HTML files
|
|
271
|
+
if (!filePath.endsWith('.md') && !filePath.endsWith('.txt') && !filePath.endsWith('.yaml') &&
|
|
272
|
+
!filePath.endsWith('.csv') && !filePath.endsWith('.json') &&
|
|
273
|
+
!filePath.endsWith('.html') && !filePath.endsWith('.htm'))
|
|
274
|
+
return;
|
|
275
|
+
if (filePath.includes('mirrored_brain'))
|
|
276
|
+
return;
|
|
277
|
+
console.log(`[Watchdog] Detected ${event}: ${filePath}`);
|
|
278
|
+
// Set system status to ingesting
|
|
279
|
+
systemStatus.setState('ingesting', `Processing: ${path.basename(filePath)}`);
|
|
280
|
+
try {
|
|
281
|
+
const buffer = await fs.promises.readFile(filePath);
|
|
282
|
+
if (buffer.length === 0)
|
|
283
|
+
return;
|
|
284
|
+
// 1. Calculate File Hash (Raw)
|
|
285
|
+
const fileHash = crypto.createHash('sha256').update(buffer).digest('hex');
|
|
286
|
+
const relativePath = path.relative(NOTEBOOK_DIR, filePath);
|
|
287
|
+
const content = buffer.toString('utf8');
|
|
288
|
+
// 2. Check Source Table (Change Detection)
|
|
289
|
+
const sourceQuery = `SELECT path, hash FROM sources WHERE path = $1`;
|
|
290
|
+
const sourceResult = await db.run(sourceQuery, [relativePath]);
|
|
291
|
+
// Handle potential null result
|
|
292
|
+
if (!sourceResult || !sourceResult.rows) {
|
|
293
|
+
console.log(`[Watchdog] No existing record for path: ${relativePath}`);
|
|
294
|
+
}
|
|
295
|
+
if (sourceResult && sourceResult.rows && sourceResult.rows.length > 0) {
|
|
296
|
+
const row = sourceResult.rows[0];
|
|
297
|
+
// Handle both array and object formats that PGlite might return
|
|
298
|
+
let existingHash;
|
|
299
|
+
if (Array.isArray(row)) {
|
|
300
|
+
// Row is in array format [path, hash]
|
|
301
|
+
existingHash = row[1];
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
// Row is in object format {path, hash}
|
|
305
|
+
existingHash = row.hash;
|
|
306
|
+
}
|
|
307
|
+
if (existingHash === fileHash) {
|
|
308
|
+
console.log(`[Watchdog] File unchanged (hash match): ${relativePath}`);
|
|
309
|
+
systemStatus.setState('idle');
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
console.log(`[Watchdog] Processing Pipeline: ${relativePath}`);
|
|
314
|
+
systemStatus.setProgress(0, 100, 'Starting ingestion...');
|
|
315
|
+
// 3. DETERMINE METADATA
|
|
316
|
+
// Determine buckets
|
|
317
|
+
const parts = relativePath.split(path.sep);
|
|
318
|
+
let bucket = 'notebook';
|
|
319
|
+
// logic: if inside a root folder (inbox/external-inbox) and has a subfolder, use subfolder as bucket
|
|
320
|
+
// otherwise use the root folder
|
|
321
|
+
if (parts.length >= 2) {
|
|
322
|
+
const root = parts[0];
|
|
323
|
+
if ((root === 'inbox' || root === 'external-inbox') && parts.length > 2) {
|
|
324
|
+
bucket = parts[1];
|
|
325
|
+
}
|
|
326
|
+
else {
|
|
327
|
+
bucket = root;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
// Determine type (auto-detect HTML for cleaning)
|
|
331
|
+
const ext = path.extname(filePath).replace('.', '');
|
|
332
|
+
let type = ext || 'text';
|
|
333
|
+
// Auto-detect HTML content for cleaning pipeline
|
|
334
|
+
if (ext === 'html' || ext === 'htm') {
|
|
335
|
+
type = 'web_page'; // Triggers full HTML cleaning
|
|
336
|
+
}
|
|
337
|
+
// Determine Provenance
|
|
338
|
+
let provenance = 'internal';
|
|
339
|
+
if (relativePath.includes('external-inbox') || relativePath.includes('web_scrape')) {
|
|
340
|
+
provenance = 'external';
|
|
341
|
+
}
|
|
342
|
+
// 4. ATOMIZE (Legacy Pipeline)
|
|
343
|
+
// This is the fast, regex-based splitter that respects token limits and semantics without heavy NLP
|
|
344
|
+
const atomizeResult = await atomizer.atomize(content, relativePath, provenance);
|
|
345
|
+
// Skip ingestion if transient data was detected
|
|
346
|
+
if (!atomizeResult) {
|
|
347
|
+
console.log(`[Watchdog] ⚠️ SKIP: ${relativePath} - Transient data, skipping ingestion`);
|
|
348
|
+
return; // Exit early, no ingestion
|
|
349
|
+
}
|
|
350
|
+
const { compound, molecules, atoms } = atomizeResult;
|
|
351
|
+
// 5. INGEST (Atomic)
|
|
352
|
+
// Use the specialized AtomicIngestService for efficiency
|
|
353
|
+
await atomicIngest.ingestResult(compound, molecules, atoms, [bucket]);
|
|
354
|
+
// 6. Update Source Table
|
|
355
|
+
await db.run(`INSERT INTO sources (path, hash, total_atoms, last_ingest)
|
|
356
|
+
VALUES ($1, $2, $3, $4)
|
|
357
|
+
ON CONFLICT (path) DO UPDATE SET
|
|
358
|
+
hash = EXCLUDED.hash,
|
|
359
|
+
total_atoms = EXCLUDED.total_atoms,
|
|
360
|
+
last_ingest = EXCLUDED.last_ingest`, [
|
|
361
|
+
relativePath,
|
|
362
|
+
fileHash,
|
|
363
|
+
atoms.length,
|
|
364
|
+
Date.now()
|
|
365
|
+
]);
|
|
366
|
+
console.log(`[Watchdog] Sync Complete: ${relativePath}`);
|
|
367
|
+
// Trigger Mirror: write cleaned content directly (O(1) vs full rebuild)
|
|
368
|
+
console.log(`[Watchdog] Preparing mirror write...`);
|
|
369
|
+
console.log(`[Watchdog] compound exists: ${!!compound}`);
|
|
370
|
+
console.log(`[Watchdog] compound.compound_body exists: ${!!compound?.compound_body}`);
|
|
371
|
+
console.log(`[Watchdog] compound.compound_body length: ${compound?.compound_body?.length || 0}`);
|
|
372
|
+
console.log(`[Watchdog] provenance: ${provenance}`);
|
|
373
|
+
try {
|
|
374
|
+
console.log(`[Watchdog] Importing mirror module...`);
|
|
375
|
+
const { writeMirroredFile } = await import('../mirror/mirror.js');
|
|
376
|
+
console.log(`[Watchdog] Mirror module imported, calling writeMirroredFile...`);
|
|
377
|
+
await writeMirroredFile(relativePath, compound.compound_body, provenance);
|
|
378
|
+
console.log(`[Watchdog] ✓ Mirror write completed successfully`);
|
|
379
|
+
}
|
|
380
|
+
catch (e) {
|
|
381
|
+
console.error(`[Watchdog] ✗ Mirror write failed:`, e.message);
|
|
382
|
+
console.error(`[Watchdog] Stack trace:`, e.stack);
|
|
383
|
+
}
|
|
384
|
+
// Trigger post-ingestion synonym generation (debounced)
|
|
385
|
+
triggerPostIngestionSynonyms();
|
|
386
|
+
// Reset system status to idle after ingestion completes
|
|
387
|
+
if (typeof global.gc === 'function')
|
|
388
|
+
global.gc();
|
|
389
|
+
systemStatus.setState('idle');
|
|
390
|
+
systemStatus.clearProgress();
|
|
391
|
+
console.log(`[SystemStatus] Ingestion complete, system ready for search`);
|
|
392
|
+
}
|
|
393
|
+
catch (error) {
|
|
394
|
+
console.error(`[Watchdog] Error processing ${filePath}:`, error.message);
|
|
395
|
+
systemStatus.setState('idle');
|
|
396
|
+
systemStatus.clearProgress();
|
|
397
|
+
throw error;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
//# sourceMappingURL=watchdog.js.map
|