@rbalchii/anchor-engine 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +609 -0
- package/README.md +317 -0
- package/anchor.bat +5 -0
- package/docs/API.md +314 -0
- package/docs/DEPLOYMENT.md +448 -0
- package/docs/INDEX.md +226 -0
- package/docs/STAR_Whitepaper_Executive.md +216 -0
- package/docs/TROUBLESHOOTING.md +535 -0
- package/docs/archive/GIT_BACKUP_VERIFICATION.md +297 -0
- package/docs/archive/adoption-guide.md +264 -0
- package/docs/archive/adoption-preparation.md +179 -0
- package/docs/archive/agent-harness-integration.md +227 -0
- package/docs/archive/api-reference.md +106 -0
- package/docs/archive/api_flows_diagram.md +118 -0
- package/docs/archive/architecture.md +410 -0
- package/docs/archive/architecture_diagram.md +174 -0
- package/docs/archive/broader-adoption-preparation.md +175 -0
- package/docs/archive/browser-paradigm-architecture.md +163 -0
- package/docs/archive/chat-integration.md +124 -0
- package/docs/archive/community-adoption-materials.md +103 -0
- package/docs/archive/community-adoption.md +147 -0
- package/docs/archive/comparison-with-siloed-solutions.md +192 -0
- package/docs/archive/comprehensive-docs.md +156 -0
- package/docs/archive/data_flow_diagram.md +251 -0
- package/docs/archive/enhancement-implementation-summary.md +146 -0
- package/docs/archive/evolution-summary.md +141 -0
- package/docs/archive/ingestion_pipeline_diagram.md +198 -0
- package/docs/archive/native-module-profiling-results.md +135 -0
- package/docs/archive/positioning-document.md +158 -0
- package/docs/archive/positioning.md +175 -0
- package/docs/archive/query-builder-documentation.md +218 -0
- package/docs/archive/quick-reference.md +40 -0
- package/docs/archive/quickstart.md +63 -0
- package/docs/archive/relationship-narrative-discovery.md +141 -0
- package/docs/archive/search-logic-improvement-plan.md +336 -0
- package/docs/archive/search_architecture_diagram.md +212 -0
- package/docs/archive/semantic-architecture-guide.md +97 -0
- package/docs/archive/sequence-diagrams.md +128 -0
- package/docs/archive/system_components_diagram.md +296 -0
- package/docs/archive/test-framework-integration.md +109 -0
- package/docs/archive/testing-framework-documentation.md +397 -0
- package/docs/archive/testing-framework-summary.md +121 -0
- package/docs/archive/testing-framework.md +377 -0
- package/docs/archive/ui-architecture.md +75 -0
- package/docs/arxiv/BIBLIOGRAPHY.bib +145 -0
- package/docs/arxiv/RELATED_WORK.tex +39 -0
- package/docs/arxiv/compile.bat +48 -0
- package/docs/arxiv/joss_response.md +33 -0
- package/docs/arxiv/prepare-submission.bat +46 -0
- package/docs/arxiv/review.md +128 -0
- package/docs/arxiv/star-whitepaper.tex +657 -0
- package/docs/code-patterns.md +289 -0
- package/docs/whitepaper.md +445 -0
- package/engine/dist/agent/runtime.d.ts +41 -0
- package/engine/dist/agent/runtime.d.ts.map +1 -0
- package/engine/dist/agent/runtime.js +73 -0
- package/engine/dist/agent/runtime.js.map +1 -0
- package/engine/dist/commands/audit-tags.d.ts +14 -0
- package/engine/dist/commands/audit-tags.d.ts.map +1 -0
- package/engine/dist/commands/audit-tags.js +180 -0
- package/engine/dist/commands/audit-tags.js.map +1 -0
- package/engine/dist/commands/distill.d.ts +19 -0
- package/engine/dist/commands/distill.d.ts.map +1 -0
- package/engine/dist/commands/distill.js +114 -0
- package/engine/dist/commands/distill.js.map +1 -0
- package/engine/dist/commands/generate-synonyms.d.ts +14 -0
- package/engine/dist/commands/generate-synonyms.d.ts.map +1 -0
- package/engine/dist/commands/generate-synonyms.js +91 -0
- package/engine/dist/commands/generate-synonyms.js.map +1 -0
- package/engine/dist/config/index.d.ts +115 -0
- package/engine/dist/config/index.d.ts.map +1 -0
- package/engine/dist/config/index.js +326 -0
- package/engine/dist/config/index.js.map +1 -0
- package/engine/dist/config/max-recall-config.d.ts +102 -0
- package/engine/dist/config/max-recall-config.d.ts.map +1 -0
- package/engine/dist/config/max-recall-config.js +102 -0
- package/engine/dist/config/max-recall-config.js.map +1 -0
- package/engine/dist/config/paths.d.ts +40 -0
- package/engine/dist/config/paths.d.ts.map +1 -0
- package/engine/dist/config/paths.js +49 -0
- package/engine/dist/config/paths.js.map +1 -0
- package/engine/dist/core/batch.d.ts +19 -0
- package/engine/dist/core/batch.d.ts.map +1 -0
- package/engine/dist/core/batch.js +37 -0
- package/engine/dist/core/batch.js.map +1 -0
- package/engine/dist/core/db.d.ts +58 -0
- package/engine/dist/core/db.d.ts.map +1 -0
- package/engine/dist/core/db.js +563 -0
- package/engine/dist/core/db.js.map +1 -0
- package/engine/dist/core/inference/ChatWorker.d.ts +2 -0
- package/engine/dist/core/inference/ChatWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/ChatWorker.js +28 -0
- package/engine/dist/core/inference/ChatWorker.js.map +1 -0
- package/engine/dist/core/inference/context_manager.d.ts +49 -0
- package/engine/dist/core/inference/context_manager.d.ts.map +1 -0
- package/engine/dist/core/inference/context_manager.js +199 -0
- package/engine/dist/core/inference/context_manager.js.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts +2 -0
- package/engine/dist/core/inference/llamaLoaderWorker.d.ts.map +1 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js +23 -0
- package/engine/dist/core/inference/llamaLoaderWorker.js.map +1 -0
- package/engine/dist/core/vector.d.ts +40 -0
- package/engine/dist/core/vector.d.ts.map +1 -0
- package/engine/dist/core/vector.js +167 -0
- package/engine/dist/core/vector.js.map +1 -0
- package/engine/dist/index.d.ts +4 -0
- package/engine/dist/index.d.ts.map +1 -0
- package/engine/dist/index.js +400 -0
- package/engine/dist/index.js.map +1 -0
- package/engine/dist/middleware/auth.d.ts +14 -0
- package/engine/dist/middleware/auth.d.ts.map +1 -0
- package/engine/dist/middleware/auth.js +44 -0
- package/engine/dist/middleware/auth.js.map +1 -0
- package/engine/dist/middleware/request-tracing.d.ts +29 -0
- package/engine/dist/middleware/request-tracing.d.ts.map +1 -0
- package/engine/dist/middleware/request-tracing.js +115 -0
- package/engine/dist/middleware/request-tracing.js.map +1 -0
- package/engine/dist/middleware/validate.d.ts +30 -0
- package/engine/dist/middleware/validate.d.ts.map +1 -0
- package/engine/dist/middleware/validate.js +117 -0
- package/engine/dist/middleware/validate.js.map +1 -0
- package/engine/dist/native/index.d.ts +106 -0
- package/engine/dist/native/index.d.ts.map +1 -0
- package/engine/dist/native/index.js +230 -0
- package/engine/dist/native/index.js.map +1 -0
- package/engine/dist/native/types.d.ts +45 -0
- package/engine/dist/native/types.d.ts.map +1 -0
- package/engine/dist/native/types.js +6 -0
- package/engine/dist/native/types.js.map +1 -0
- package/engine/dist/profiling/atomization-profiling.d.ts +8 -0
- package/engine/dist/profiling/atomization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/atomization-profiling.js +108 -0
- package/engine/dist/profiling/atomization-profiling.js.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts +8 -0
- package/engine/dist/profiling/bottleneck-identification.d.ts.map +1 -0
- package/engine/dist/profiling/bottleneck-identification.js +249 -0
- package/engine/dist/profiling/bottleneck-identification.js.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts +12 -0
- package/engine/dist/profiling/content-sanitization-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/content-sanitization-profiling.js +266 -0
- package/engine/dist/profiling/content-sanitization-profiling.js.map +1 -0
- package/engine/dist/profiling/simhash-profiling.d.ts +11 -0
- package/engine/dist/profiling/simhash-profiling.d.ts.map +1 -0
- package/engine/dist/profiling/simhash-profiling.js +168 -0
- package/engine/dist/profiling/simhash-profiling.js.map +1 -0
- package/engine/dist/routes/api.d.ts +9 -0
- package/engine/dist/routes/api.d.ts.map +1 -0
- package/engine/dist/routes/api.js +37 -0
- package/engine/dist/routes/api.js.map +1 -0
- package/engine/dist/routes/enhanced-api.d.ts +9 -0
- package/engine/dist/routes/enhanced-api.d.ts.map +1 -0
- package/engine/dist/routes/enhanced-api.js +139 -0
- package/engine/dist/routes/enhanced-api.js.map +1 -0
- package/engine/dist/routes/health.d.ts +8 -0
- package/engine/dist/routes/health.d.ts.map +1 -0
- package/engine/dist/routes/health.js +89 -0
- package/engine/dist/routes/health.js.map +1 -0
- package/engine/dist/routes/monitoring.d.ts +8 -0
- package/engine/dist/routes/monitoring.d.ts.map +1 -0
- package/engine/dist/routes/monitoring.js +509 -0
- package/engine/dist/routes/monitoring.js.map +1 -0
- package/engine/dist/routes/v1/admin.d.ts +3 -0
- package/engine/dist/routes/v1/admin.d.ts.map +1 -0
- package/engine/dist/routes/v1/admin.js +261 -0
- package/engine/dist/routes/v1/admin.js.map +1 -0
- package/engine/dist/routes/v1/atoms.d.ts +3 -0
- package/engine/dist/routes/v1/atoms.d.ts.map +1 -0
- package/engine/dist/routes/v1/atoms.js +172 -0
- package/engine/dist/routes/v1/atoms.js.map +1 -0
- package/engine/dist/routes/v1/backup.d.ts +3 -0
- package/engine/dist/routes/v1/backup.d.ts.map +1 -0
- package/engine/dist/routes/v1/backup.js +100 -0
- package/engine/dist/routes/v1/backup.js.map +1 -0
- package/engine/dist/routes/v1/git.d.ts +3 -0
- package/engine/dist/routes/v1/git.d.ts.map +1 -0
- package/engine/dist/routes/v1/git.js +316 -0
- package/engine/dist/routes/v1/git.js.map +1 -0
- package/engine/dist/routes/v1/ingest.d.ts +3 -0
- package/engine/dist/routes/v1/ingest.d.ts.map +1 -0
- package/engine/dist/routes/v1/ingest.js +66 -0
- package/engine/dist/routes/v1/ingest.js.map +1 -0
- package/engine/dist/routes/v1/memory.d.ts +14 -0
- package/engine/dist/routes/v1/memory.d.ts.map +1 -0
- package/engine/dist/routes/v1/memory.js +87 -0
- package/engine/dist/routes/v1/memory.js.map +1 -0
- package/engine/dist/routes/v1/research.d.ts +3 -0
- package/engine/dist/routes/v1/research.d.ts.map +1 -0
- package/engine/dist/routes/v1/research.js +109 -0
- package/engine/dist/routes/v1/research.js.map +1 -0
- package/engine/dist/routes/v1/search.d.ts +3 -0
- package/engine/dist/routes/v1/search.d.ts.map +1 -0
- package/engine/dist/routes/v1/search.js +180 -0
- package/engine/dist/routes/v1/search.js.map +1 -0
- package/engine/dist/routes/v1/settings.d.ts +8 -0
- package/engine/dist/routes/v1/settings.d.ts.map +1 -0
- package/engine/dist/routes/v1/settings.js +211 -0
- package/engine/dist/routes/v1/settings.js.map +1 -0
- package/engine/dist/routes/v1/system.d.ts +3 -0
- package/engine/dist/routes/v1/system.d.ts.map +1 -0
- package/engine/dist/routes/v1/system.js +326 -0
- package/engine/dist/routes/v1/system.js.map +1 -0
- package/engine/dist/routes/v1/tags.d.ts +3 -0
- package/engine/dist/routes/v1/tags.d.ts.map +1 -0
- package/engine/dist/routes/v1/tags.js +102 -0
- package/engine/dist/routes/v1/tags.js.map +1 -0
- package/engine/dist/server-8080.d.ts +2 -0
- package/engine/dist/server-8080.d.ts.map +1 -0
- package/engine/dist/server-8080.js +74 -0
- package/engine/dist/server-8080.js.map +1 -0
- package/engine/dist/services/backup/backup-restore.d.ts +37 -0
- package/engine/dist/services/backup/backup-restore.d.ts.map +1 -0
- package/engine/dist/services/backup/backup-restore.js +385 -0
- package/engine/dist/services/backup/backup-restore.js.map +1 -0
- package/engine/dist/services/backup/backup.d.ts +14 -0
- package/engine/dist/services/backup/backup.d.ts.map +1 -0
- package/engine/dist/services/backup/backup.js +442 -0
- package/engine/dist/services/backup/backup.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts +127 -0
- package/engine/dist/services/distillation/radial-distiller-v2.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js +503 -0
- package/engine/dist/services/distillation/radial-distiller-v2.js.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts +63 -0
- package/engine/dist/services/distillation/radial-distiller.d.ts.map +1 -0
- package/engine/dist/services/distillation/radial-distiller.js +394 -0
- package/engine/dist/services/distillation/radial-distiller.js.map +1 -0
- package/engine/dist/services/health-check-enhanced.d.ts +89 -0
- package/engine/dist/services/health-check-enhanced.d.ts.map +1 -0
- package/engine/dist/services/health-check-enhanced.js +417 -0
- package/engine/dist/services/health-check-enhanced.js.map +1 -0
- package/engine/dist/services/idle-manager.d.ts +56 -0
- package/engine/dist/services/idle-manager.d.ts.map +1 -0
- package/engine/dist/services/idle-manager.js +210 -0
- package/engine/dist/services/idle-manager.js.map +1 -0
- package/engine/dist/services/inference/inference-service.d.ts +27 -0
- package/engine/dist/services/inference/inference-service.d.ts.map +1 -0
- package/engine/dist/services/inference/inference-service.js +89 -0
- package/engine/dist/services/inference/inference-service.js.map +1 -0
- package/engine/dist/services/inference/inference.d.ts +59 -0
- package/engine/dist/services/inference/inference.d.ts.map +1 -0
- package/engine/dist/services/inference/inference.js +131 -0
- package/engine/dist/services/inference/inference.js.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts +74 -0
- package/engine/dist/services/ingest/atomizer-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/atomizer-service.js +982 -0
- package/engine/dist/services/ingest/atomizer-service.js.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts +43 -0
- package/engine/dist/services/ingest/content-cleaner.d.ts.map +1 -0
- package/engine/dist/services/ingest/content-cleaner.js +166 -0
- package/engine/dist/services/ingest/content-cleaner.js.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts +103 -0
- package/engine/dist/services/ingest/github-ingest-service.d.ts.map +1 -0
- package/engine/dist/services/ingest/github-ingest-service.js +537 -0
- package/engine/dist/services/ingest/github-ingest-service.js.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts +16 -0
- package/engine/dist/services/ingest/ingest-atomic.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest-atomic.js +437 -0
- package/engine/dist/services/ingest/ingest-atomic.js.map +1 -0
- package/engine/dist/services/ingest/ingest.d.ts +50 -0
- package/engine/dist/services/ingest/ingest.d.ts.map +1 -0
- package/engine/dist/services/ingest/ingest.js +230 -0
- package/engine/dist/services/ingest/ingest.js.map +1 -0
- package/engine/dist/services/ingest/watchdog.d.ts +31 -0
- package/engine/dist/services/ingest/watchdog.d.ts.map +1 -0
- package/engine/dist/services/ingest/watchdog.js +400 -0
- package/engine/dist/services/ingest/watchdog.js.map +1 -0
- package/engine/dist/services/llm/context.d.ts +6 -0
- package/engine/dist/services/llm/context.d.ts.map +1 -0
- package/engine/dist/services/llm/context.js +80 -0
- package/engine/dist/services/llm/context.js.map +1 -0
- package/engine/dist/services/llm/provider.d.ts +23 -0
- package/engine/dist/services/llm/provider.d.ts.map +1 -0
- package/engine/dist/services/llm/provider.js +338 -0
- package/engine/dist/services/llm/provider.js.map +1 -0
- package/engine/dist/services/llm/reader.d.ts +12 -0
- package/engine/dist/services/llm/reader.d.ts.map +1 -0
- package/engine/dist/services/llm/reader.js +40 -0
- package/engine/dist/services/llm/reader.js.map +1 -0
- package/engine/dist/services/mirror/mirror.d.ts +28 -0
- package/engine/dist/services/mirror/mirror.d.ts.map +1 -0
- package/engine/dist/services/mirror/mirror.js +208 -0
- package/engine/dist/services/mirror/mirror.js.map +1 -0
- package/engine/dist/services/nlp/nlp-service.d.ts +70 -0
- package/engine/dist/services/nlp/nlp-service.d.ts.map +1 -0
- package/engine/dist/services/nlp/nlp-service.js +151 -0
- package/engine/dist/services/nlp/nlp-service.js.map +1 -0
- package/engine/dist/services/nlp/query-parser.d.ts +9 -0
- package/engine/dist/services/nlp/query-parser.d.ts.map +1 -0
- package/engine/dist/services/nlp/query-parser.js +29 -0
- package/engine/dist/services/nlp/query-parser.js.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts +95 -0
- package/engine/dist/services/query-builder/DataFrame.d.ts.map +1 -0
- package/engine/dist/services/query-builder/DataFrame.js +263 -0
- package/engine/dist/services/query-builder/DataFrame.js.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts +106 -0
- package/engine/dist/services/query-builder/QueryBuilder.d.ts.map +1 -0
- package/engine/dist/services/query-builder/QueryBuilder.js +235 -0
- package/engine/dist/services/query-builder/QueryBuilder.js.map +1 -0
- package/engine/dist/services/query-builder/utils/export.d.ts +11 -0
- package/engine/dist/services/query-builder/utils/export.d.ts.map +1 -0
- package/engine/dist/services/query-builder/utils/export.js +130 -0
- package/engine/dist/services/query-builder/utils/export.js.map +1 -0
- package/engine/dist/services/research/researcher.d.ts +15 -0
- package/engine/dist/services/research/researcher.d.ts.map +1 -0
- package/engine/dist/services/research/researcher.js +123 -0
- package/engine/dist/services/research/researcher.js.map +1 -0
- package/engine/dist/services/scribe/scribe.d.ts +43 -0
- package/engine/dist/services/scribe/scribe.d.ts.map +1 -0
- package/engine/dist/services/scribe/scribe.js +135 -0
- package/engine/dist/services/scribe/scribe.js.map +1 -0
- package/engine/dist/services/search/bright-nodes.d.ts +41 -0
- package/engine/dist/services/search/bright-nodes.d.ts.map +1 -0
- package/engine/dist/services/search/bright-nodes.js +117 -0
- package/engine/dist/services/search/bright-nodes.js.map +1 -0
- package/engine/dist/services/search/context-inflator.d.ts +63 -0
- package/engine/dist/services/search/context-inflator.d.ts.map +1 -0
- package/engine/dist/services/search/context-inflator.js +649 -0
- package/engine/dist/services/search/context-inflator.js.map +1 -0
- package/engine/dist/services/search/context-manager.d.ts +34 -0
- package/engine/dist/services/search/context-manager.d.ts.map +1 -0
- package/engine/dist/services/search/context-manager.js +124 -0
- package/engine/dist/services/search/context-manager.js.map +1 -0
- package/engine/dist/services/search/distributed-query.d.ts +38 -0
- package/engine/dist/services/search/distributed-query.d.ts.map +1 -0
- package/engine/dist/services/search/distributed-query.js +105 -0
- package/engine/dist/services/search/distributed-query.js.map +1 -0
- package/engine/dist/services/search/explore.d.ts +73 -0
- package/engine/dist/services/search/explore.d.ts.map +1 -0
- package/engine/dist/services/search/explore.js +388 -0
- package/engine/dist/services/search/explore.js.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts +76 -0
- package/engine/dist/services/search/graph-context-serializer.d.ts.map +1 -0
- package/engine/dist/services/search/graph-context-serializer.js +435 -0
- package/engine/dist/services/search/graph-context-serializer.js.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts +122 -0
- package/engine/dist/services/search/llm-context-formatter.d.ts.map +1 -0
- package/engine/dist/services/search/llm-context-formatter.js +394 -0
- package/engine/dist/services/search/llm-context-formatter.js.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts +115 -0
- package/engine/dist/services/search/physics-tag-walker.d.ts.map +1 -0
- package/engine/dist/services/search/physics-tag-walker.js +611 -0
- package/engine/dist/services/search/physics-tag-walker.js.map +1 -0
- package/engine/dist/services/search/query-parser.d.ts +66 -0
- package/engine/dist/services/search/query-parser.d.ts.map +1 -0
- package/engine/dist/services/search/query-parser.js +346 -0
- package/engine/dist/services/search/query-parser.js.map +1 -0
- package/engine/dist/services/search/search-utils.d.ts +100 -0
- package/engine/dist/services/search/search-utils.d.ts.map +1 -0
- package/engine/dist/services/search/search-utils.js +473 -0
- package/engine/dist/services/search/search-utils.js.map +1 -0
- package/engine/dist/services/search/search.d.ts +116 -0
- package/engine/dist/services/search/search.d.ts.map +1 -0
- package/engine/dist/services/search/search.js +1286 -0
- package/engine/dist/services/search/search.js.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts +48 -0
- package/engine/dist/services/search/sovereign-system-prompt.d.ts.map +1 -0
- package/engine/dist/services/search/sovereign-system-prompt.js +101 -0
- package/engine/dist/services/search/sovereign-system-prompt.js.map +1 -0
- package/engine/dist/services/search/streaming-search.d.ts +51 -0
- package/engine/dist/services/search/streaming-search.d.ts.map +1 -0
- package/engine/dist/services/search/streaming-search.js +94 -0
- package/engine/dist/services/search/streaming-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts +53 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js +625 -0
- package/engine/dist/services/semantic/semantic-ingestion-service.js.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts +68 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js +176 -0
- package/engine/dist/services/semantic/semantic-molecule-processor.js.map +1 -0
- package/engine/dist/services/semantic/semantic-search.d.ts +52 -0
- package/engine/dist/services/semantic/semantic-search.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-search.js +649 -0
- package/engine/dist/services/semantic/semantic-search.js.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts +64 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.d.ts.map +1 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js +191 -0
- package/engine/dist/services/semantic/semantic-tag-deriver.js.map +1 -0
- package/engine/dist/services/semantic/types/semantic.d.ts +26 -0
- package/engine/dist/services/semantic/types/semantic.d.ts.map +1 -0
- package/engine/dist/services/semantic/types/semantic.js +7 -0
- package/engine/dist/services/semantic/types/semantic.js.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts +79 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.d.ts.map +1 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js +415 -0
- package/engine/dist/services/synonyms/auto-synonym-generator.js.map +1 -0
- package/engine/dist/services/system-status.d.ts +68 -0
- package/engine/dist/services/system-status.d.ts.map +1 -0
- package/engine/dist/services/system-status.js +107 -0
- package/engine/dist/services/system-status.js.map +1 -0
- package/engine/dist/services/tags/discovery.d.ts +16 -0
- package/engine/dist/services/tags/discovery.d.ts.map +1 -0
- package/engine/dist/services/tags/discovery.js +206 -0
- package/engine/dist/services/tags/discovery.js.map +1 -0
- package/engine/dist/services/tags/gliner.d.ts +18 -0
- package/engine/dist/services/tags/gliner.d.ts.map +1 -0
- package/engine/dist/services/tags/gliner.js +119 -0
- package/engine/dist/services/tags/gliner.js.map +1 -0
- package/engine/dist/services/tags/infector.d.ts +21 -0
- package/engine/dist/services/tags/infector.d.ts.map +1 -0
- package/engine/dist/services/tags/infector.js +168 -0
- package/engine/dist/services/tags/infector.js.map +1 -0
- package/engine/dist/services/tags/tag-auditor.d.ts +77 -0
- package/engine/dist/services/tags/tag-auditor.d.ts.map +1 -0
- package/engine/dist/services/tags/tag-auditor.js +283 -0
- package/engine/dist/services/tags/tag-auditor.js.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts +50 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.d.ts.map +1 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js +291 -0
- package/engine/dist/services/taxonomy/taxonomy-manager.js.map +1 -0
- package/engine/dist/services/vision/vision_service.d.ts +4 -0
- package/engine/dist/services/vision/vision_service.d.ts.map +1 -0
- package/engine/dist/services/vision/vision_service.js +197 -0
- package/engine/dist/services/vision/vision_service.js.map +1 -0
- package/engine/dist/test-framework/core.d.ts +133 -0
- package/engine/dist/test-framework/core.d.ts.map +1 -0
- package/engine/dist/test-framework/core.js +313 -0
- package/engine/dist/test-framework/core.js.map +1 -0
- package/engine/dist/test-framework/dataset-runner.d.ts +78 -0
- package/engine/dist/test-framework/dataset-runner.d.ts.map +1 -0
- package/engine/dist/test-framework/dataset-runner.js +223 -0
- package/engine/dist/test-framework/dataset-runner.js.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts +38 -0
- package/engine/dist/test-framework/diagnostic-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/diagnostic-tests.js +283 -0
- package/engine/dist/test-framework/diagnostic-tests.js.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts +30 -0
- package/engine/dist/test-framework/performance-regression-tests.d.ts.map +1 -0
- package/engine/dist/test-framework/performance-regression-tests.js +331 -0
- package/engine/dist/test-framework/performance-regression-tests.js.map +1 -0
- package/engine/dist/types/api.d.ts +53 -0
- package/engine/dist/types/api.d.ts.map +1 -0
- package/engine/dist/types/api.js +2 -0
- package/engine/dist/types/api.js.map +1 -0
- package/engine/dist/types/atomic.d.ts +42 -0
- package/engine/dist/types/atomic.d.ts.map +1 -0
- package/engine/dist/types/atomic.js +10 -0
- package/engine/dist/types/atomic.js.map +1 -0
- package/engine/dist/types/context-protocol.d.ts +137 -0
- package/engine/dist/types/context-protocol.d.ts.map +1 -0
- package/engine/dist/types/context-protocol.js +28 -0
- package/engine/dist/types/context-protocol.js.map +1 -0
- package/engine/dist/types/context.d.ts +2 -0
- package/engine/dist/types/context.d.ts.map +1 -0
- package/engine/dist/types/context.js +2 -0
- package/engine/dist/types/context.js.map +1 -0
- package/engine/dist/types/index.d.ts +20 -0
- package/engine/dist/types/index.d.ts.map +1 -0
- package/engine/dist/types/index.js +18 -0
- package/engine/dist/types/index.js.map +1 -0
- package/engine/dist/types/search.d.ts +31 -0
- package/engine/dist/types/search.d.ts.map +1 -0
- package/engine/dist/types/search.js +2 -0
- package/engine/dist/types/search.js.map +1 -0
- package/engine/dist/types/taxonomy.d.ts +137 -0
- package/engine/dist/types/taxonomy.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.js +138 -0
- package/engine/dist/types/taxonomy.js.map +1 -0
- package/engine/dist/types/taxonomy.simple.d.ts +131 -0
- package/engine/dist/types/taxonomy.simple.d.ts.map +1 -0
- package/engine/dist/types/taxonomy.simple.js +132 -0
- package/engine/dist/types/taxonomy.simple.js.map +1 -0
- package/engine/dist/types/tool-call.d.ts +16 -0
- package/engine/dist/types/tool-call.d.ts.map +1 -0
- package/engine/dist/types/tool-call.js +6 -0
- package/engine/dist/types/tool-call.js.map +1 -0
- package/engine/dist/types/trace.d.ts +25 -0
- package/engine/dist/types/trace.d.ts.map +1 -0
- package/engine/dist/types/trace.js +5 -0
- package/engine/dist/types/trace.js.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts +81 -0
- package/engine/dist/utils/adaptive-concurrency.d.ts.map +1 -0
- package/engine/dist/utils/adaptive-concurrency.js +266 -0
- package/engine/dist/utils/adaptive-concurrency.js.map +1 -0
- package/engine/dist/utils/date_extractor.d.ts +2 -0
- package/engine/dist/utils/date_extractor.d.ts.map +1 -0
- package/engine/dist/utils/date_extractor.js +32 -0
- package/engine/dist/utils/date_extractor.js.map +1 -0
- package/engine/dist/utils/native-module-manager.d.ts +48 -0
- package/engine/dist/utils/native-module-manager.d.ts.map +1 -0
- package/engine/dist/utils/native-module-manager.js +265 -0
- package/engine/dist/utils/native-module-manager.js.map +1 -0
- package/engine/dist/utils/native-module-profiler.d.ts +66 -0
- package/engine/dist/utils/native-module-profiler.d.ts.map +1 -0
- package/engine/dist/utils/native-module-profiler.js +182 -0
- package/engine/dist/utils/native-module-profiler.js.map +1 -0
- package/engine/dist/utils/path-manager.d.ts +59 -0
- package/engine/dist/utils/path-manager.d.ts.map +1 -0
- package/engine/dist/utils/path-manager.js +154 -0
- package/engine/dist/utils/path-manager.js.map +1 -0
- package/engine/dist/utils/performance-monitor.d.ts +92 -0
- package/engine/dist/utils/performance-monitor.d.ts.map +1 -0
- package/engine/dist/utils/performance-monitor.js +221 -0
- package/engine/dist/utils/performance-monitor.js.map +1 -0
- package/engine/dist/utils/process-manager.d.ts +18 -0
- package/engine/dist/utils/process-manager.d.ts.map +1 -0
- package/engine/dist/utils/process-manager.js +100 -0
- package/engine/dist/utils/process-manager.js.map +1 -0
- package/engine/dist/utils/request-tracer.d.ts +131 -0
- package/engine/dist/utils/request-tracer.d.ts.map +1 -0
- package/engine/dist/utils/request-tracer.js +414 -0
- package/engine/dist/utils/request-tracer.js.map +1 -0
- package/engine/dist/utils/resource-manager.d.ts +108 -0
- package/engine/dist/utils/resource-manager.d.ts.map +1 -0
- package/engine/dist/utils/resource-manager.js +235 -0
- package/engine/dist/utils/resource-manager.js.map +1 -0
- package/engine/dist/utils/safe-dns.d.ts +14 -0
- package/engine/dist/utils/safe-dns.d.ts.map +1 -0
- package/engine/dist/utils/safe-dns.js +105 -0
- package/engine/dist/utils/safe-dns.js.map +1 -0
- package/engine/dist/utils/structured-logger.d.ts +124 -0
- package/engine/dist/utils/structured-logger.d.ts.map +1 -0
- package/engine/dist/utils/structured-logger.js +332 -0
- package/engine/dist/utils/structured-logger.js.map +1 -0
- package/engine/dist/utils/tag-cleanup.d.ts +11 -0
- package/engine/dist/utils/tag-cleanup.d.ts.map +1 -0
- package/engine/dist/utils/tag-cleanup.js +111 -0
- package/engine/dist/utils/tag-cleanup.js.map +1 -0
- package/engine/dist/utils/tag-filter.d.ts +19 -0
- package/engine/dist/utils/tag-filter.d.ts.map +1 -0
- package/engine/dist/utils/tag-filter.js +147 -0
- package/engine/dist/utils/tag-filter.js.map +1 -0
- package/engine/dist/utils/tag-modulation.d.ts +80 -0
- package/engine/dist/utils/tag-modulation.d.ts.map +1 -0
- package/engine/dist/utils/tag-modulation.js +284 -0
- package/engine/dist/utils/tag-modulation.js.map +1 -0
- package/engine/dist/utils/timer.d.ts +40 -0
- package/engine/dist/utils/timer.d.ts.map +1 -0
- package/engine/dist/utils/timer.js +76 -0
- package/engine/dist/utils/timer.js.map +1 -0
- package/engine/dist/utils/token-utils.d.ts +19 -0
- package/engine/dist/utils/token-utils.d.ts.map +1 -0
- package/engine/dist/utils/token-utils.js +71 -0
- package/engine/dist/utils/token-utils.js.map +1 -0
- package/engine/dist/utils/wasm-module-loader.d.ts +50 -0
- package/engine/dist/utils/wasm-module-loader.d.ts.map +1 -0
- package/engine/dist/utils/wasm-module-loader.js +136 -0
- package/engine/dist/utils/wasm-module-loader.js.map +1 -0
- package/engine/package.json +105 -0
- package/package.json +106 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { db } from '../../core/db.js';
|
|
2
|
+
import { extractEntitiesWithGLiNER } from './gliner.js';
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = path.dirname(__filename);
|
|
8
|
+
const PROJECT_ROOT = path.resolve(__dirname, '..', '..', '..');
|
|
9
|
+
const MASTER_TAGS_PATH = path.join(PROJECT_ROOT, 'engine', 'context', 'internal_tags.json');
|
|
10
|
+
/**
|
|
11
|
+
* Discovery Service (The Teacher)
|
|
12
|
+
*
|
|
13
|
+
* Implements "Tag Walker" Strategy (Standard 068 Phase B):
|
|
14
|
+
* 1. Pick a seed tag from the master list.
|
|
15
|
+
* 2. Find atoms that contain this tag ("Walking the Graph").
|
|
16
|
+
* 3. Use BERT NER to find NEW entities in these specific contexts.
|
|
17
|
+
* 4. Add new entities to the master list (Expansion).
|
|
18
|
+
*/
|
|
19
|
+
export async function runDiscovery(sampleSize = 30) {
|
|
20
|
+
const masterTags = getMasterTags();
|
|
21
|
+
let query = '';
|
|
22
|
+
let strategy = 'random';
|
|
23
|
+
let seedTag = '';
|
|
24
|
+
// Strategy 1: The Walker (80% chance if we have tags)
|
|
25
|
+
// "We have data specific tags... identified entities beyond the basic list that accommodate specific intricacies."
|
|
26
|
+
if (masterTags.length > 0 && Math.random() > 0.2) {
|
|
27
|
+
seedTag = masterTags[Math.floor(Math.random() * masterTags.length)];
|
|
28
|
+
// Find atoms that contain the seed tag (Simulating Graph Walk)
|
|
29
|
+
// We use the '~' operator for "contains text", which is efficient enough for now.
|
|
30
|
+
// We limit to sampleSize to keep it fast.
|
|
31
|
+
query = `
|
|
32
|
+
SELECT content
|
|
33
|
+
FROM atoms
|
|
34
|
+
WHERE content ILIKE $1
|
|
35
|
+
LIMIT ${sampleSize}
|
|
36
|
+
`;
|
|
37
|
+
strategy = 'walker';
|
|
38
|
+
console.log(`[Discovery] Teacher Mode (Walker): Expanding on seed tag '${seedTag}'...`);
|
|
39
|
+
}
|
|
40
|
+
// Strategy 2: The Explorer (Fallback / Initial Boot)
|
|
41
|
+
else {
|
|
42
|
+
query = `SELECT content FROM atoms LIMIT ${sampleSize}`;
|
|
43
|
+
strategy = 'explorer';
|
|
44
|
+
console.log(`[Discovery] Teacher Mode (Explorer): Random Sampling ${sampleSize} atoms...`);
|
|
45
|
+
}
|
|
46
|
+
let result;
|
|
47
|
+
try {
|
|
48
|
+
if (strategy === 'walker' && seedTag) {
|
|
49
|
+
result = await db.run(query, [`%${seedTag}%`]);
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
result = await db.run(query);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
catch (e) {
|
|
56
|
+
console.warn(`[Discovery] Query failed for strategy '${strategy}' (Seed: ${seedTag}):`, e.message);
|
|
57
|
+
console.warn(`[Discovery] Falling back to safe Explorer mode.`);
|
|
58
|
+
query = `SELECT content FROM atoms LIMIT ${sampleSize}`;
|
|
59
|
+
result = await db.run(query);
|
|
60
|
+
}
|
|
61
|
+
if (!result.rows || result.rows.length === 0) {
|
|
62
|
+
if (strategy === 'walker') {
|
|
63
|
+
console.log(`[Discovery] Walker found no atoms for tag '${seedTag}'. It might be rare.`);
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
console.warn('[Discovery] No atoms found for learning.');
|
|
67
|
+
}
|
|
68
|
+
return [];
|
|
69
|
+
}
|
|
70
|
+
const sampledContent = result.rows.map((r) => {
|
|
71
|
+
// Handle both array and object formats that PGlite might return
|
|
72
|
+
let content;
|
|
73
|
+
if (Array.isArray(r)) {
|
|
74
|
+
content = String(r[0]); // If array format, content is at index 0
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
content = String(r.content); // If object format, use content property
|
|
78
|
+
}
|
|
79
|
+
// Truncate to keep BERT fast
|
|
80
|
+
return content.length > 500 ? content.substring(0, 500) : content;
|
|
81
|
+
}).join('\n---\n');
|
|
82
|
+
console.log(`[Discovery] Teacher analyzing ${result.rows.length} atoms via BERT...`);
|
|
83
|
+
try {
|
|
84
|
+
// 2a. Attempt Zero-Shot/BERT Extraction
|
|
85
|
+
// We ask BERT to look for standard entities, but since the context is specific (seeded),
|
|
86
|
+
// it is more likely to find domain-specific co-occurrences.
|
|
87
|
+
const discoveredTags = await extractEntitiesWithGLiNER(sampledContent, [
|
|
88
|
+
'person', 'organization', 'technology', 'project', 'software', 'location', 'concept'
|
|
89
|
+
]);
|
|
90
|
+
console.log(`[Discovery] BERT found ${discoveredTags.length} potential tags.`);
|
|
91
|
+
if (discoveredTags.length > 0) {
|
|
92
|
+
// Filter out the seed tag so we don't just rediscover it
|
|
93
|
+
const newTags = discoveredTags.filter(t => t.toLowerCase() !== seedTag.toLowerCase());
|
|
94
|
+
if (newTags.length > 0) {
|
|
95
|
+
console.log(`[Discovery] Expansion Successful! '${seedTag}' led to: ${newTags.slice(0, 5).join(', ')}...`);
|
|
96
|
+
await updateMasterTags(newTags);
|
|
97
|
+
}
|
|
98
|
+
return newTags;
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
throw new Error("BERT found no entities.");
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch (e) {
|
|
105
|
+
console.warn(`[Discovery] Teacher (BERT) passed. Error: ${e.message}`);
|
|
106
|
+
// Optional: LLM Fallback (Slow, but very smart)
|
|
107
|
+
// For now, we return empty to stay fast/CPU-specific as requested.
|
|
108
|
+
return [];
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Updates the JSON master list with new findings.
|
|
113
|
+
*/
|
|
114
|
+
async function updateMasterTags(newTags) {
|
|
115
|
+
try {
|
|
116
|
+
let currentTags = { keywords: [] };
|
|
117
|
+
// Ensure directory exists
|
|
118
|
+
const contextDir = path.dirname(MASTER_TAGS_PATH);
|
|
119
|
+
if (!fs.existsSync(contextDir)) {
|
|
120
|
+
fs.mkdirSync(contextDir, { recursive: true });
|
|
121
|
+
}
|
|
122
|
+
// Read existing
|
|
123
|
+
if (fs.existsSync(MASTER_TAGS_PATH)) {
|
|
124
|
+
const content = fs.readFileSync(MASTER_TAGS_PATH, 'utf8');
|
|
125
|
+
try {
|
|
126
|
+
currentTags = JSON.parse(content);
|
|
127
|
+
// Handle if it's just an array vs object
|
|
128
|
+
if (Array.isArray(currentTags)) {
|
|
129
|
+
currentTags = { keywords: currentTags };
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
catch (jsonErr) {
|
|
133
|
+
console.warn('[Discovery] Corrupt tags file, starting fresh.');
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Merge
|
|
137
|
+
const existingSet = new Set(currentTags.keywords.map((t) => t.toLowerCase()));
|
|
138
|
+
const added = [];
|
|
139
|
+
newTags.forEach(tag => {
|
|
140
|
+
const normalized = tag.toLowerCase().trim();
|
|
141
|
+
if (normalized.length > 2 && !existingSet.has(normalized)) {
|
|
142
|
+
// Basic filtering
|
|
143
|
+
if (!['the', 'and', 'for', 'with'].includes(normalized)) {
|
|
144
|
+
currentTags.keywords.push(tag); // Keep original case
|
|
145
|
+
existingSet.add(normalized);
|
|
146
|
+
added.push(tag);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
if (added.length > 0) {
|
|
151
|
+
fs.writeFileSync(MASTER_TAGS_PATH, JSON.stringify(currentTags, null, 2));
|
|
152
|
+
console.log(`[Discovery] Learned ${added.length} new tags:`, added.join(', '));
|
|
153
|
+
// Explicitly invalidate cache just to be safe (though watcher usually catches it)
|
|
154
|
+
cachedMasterTags = null;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
catch (e) {
|
|
158
|
+
console.error('[Discovery] Failed to update master list:', e);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
let cachedMasterTags = null;
|
|
162
|
+
let watcherInitialized = false;
|
|
163
|
+
/**
|
|
164
|
+
* Reads the master list for the Infector (and Walker).
|
|
165
|
+
* Uses an in-memory cache updated via file watcher.
|
|
166
|
+
*/
|
|
167
|
+
export function getMasterTags() {
|
|
168
|
+
if (cachedMasterTags !== null) {
|
|
169
|
+
return cachedMasterTags;
|
|
170
|
+
}
|
|
171
|
+
try {
|
|
172
|
+
if (fs.existsSync(MASTER_TAGS_PATH)) {
|
|
173
|
+
const content = fs.readFileSync(MASTER_TAGS_PATH, 'utf8');
|
|
174
|
+
const data = JSON.parse(content);
|
|
175
|
+
let tags = [];
|
|
176
|
+
if (Array.isArray(data))
|
|
177
|
+
tags = data;
|
|
178
|
+
else if (data.keywords && Array.isArray(data.keywords))
|
|
179
|
+
tags = data.keywords;
|
|
180
|
+
cachedMasterTags = tags;
|
|
181
|
+
// Initialize watcher on first successful read
|
|
182
|
+
if (!watcherInitialized) {
|
|
183
|
+
watcherInitialized = true;
|
|
184
|
+
try {
|
|
185
|
+
fs.watch(MASTER_TAGS_PATH, (eventType) => {
|
|
186
|
+
if (eventType === 'change' || eventType === 'rename') {
|
|
187
|
+
// Invalidate cache
|
|
188
|
+
cachedMasterTags = null;
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
catch (watchErr) {
|
|
193
|
+
console.warn('[Discovery] Failed to set up file watcher for tags:', watchErr);
|
|
194
|
+
// If watch fails, we fallback to just reading it occasionally or next startup,
|
|
195
|
+
// but the error is logged. We still consider it "initialized" so we don't spam watch attempts.
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
return tags;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
catch (e) {
|
|
202
|
+
console.error('[Discovery] Failed to load internal_tags.json:', e);
|
|
203
|
+
}
|
|
204
|
+
return [];
|
|
205
|
+
}
|
|
206
|
+
//# sourceMappingURL=discovery.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"discovery.js","sourceRoot":"","sources":["../../../src/services/tags/discovery.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,aAAa,CAAC;AACxD,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AAEpC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAE3C,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;AAC/D,MAAM,gBAAgB,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,QAAQ,EAAE,SAAS,EAAE,oBAAoB,CAAC,CAAC;AAE5F;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,aAAqB,EAAE;IACtD,MAAM,UAAU,GAAG,aAAa,EAAE,CAAC;IACnC,IAAI,KAAK,GAAG,EAAE,CAAC;IACf,IAAI,QAAQ,GAAG,QAAQ,CAAC;IACxB,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,sDAAsD;IACtD,mHAAmH;IACnH,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,EAAE,CAAC;QAC/C,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;QAEpE,+DAA+D;QAC/D,kFAAkF;QAClF,0CAA0C;QAC1C,KAAK,GAAG;;;;oBAII,UAAU;SACrB,CAAC;QACF,QAAQ,GAAG,QAAQ,CAAC;QACpB,OAAO,CAAC,GAAG,CAAC,6DAA6D,OAAO,MAAM,CAAC,CAAC;IAC5F,CAAC;IACD,qDAAqD;SAChD,CAAC;QACF,KAAK,GAAG,mCAAmC,UAAU,EAAE,CAAC;QACxD,QAAQ,GAAG,UAAU,CAAC;QACtB,OAAO,CAAC,GAAG,CAAC,wDAAwD,UAAU,WAAW,CAAC,CAAC;IAC/F,CAAC;IAED,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACD,IAAI,QAAQ,KAAK,QAAQ,IAAI,OAAO,EAAE,CAAC;YACnC,MAAM,GAAG,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,IAAI,OAAO,GAAG,CAAC,CAAC,CAAC;QACnD,CAAC;aAAM,CAAC;YACJ,MAAM,GAAG,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACjC,CAAC;IACL,CAAC;IAAC,OAAO,CAAM,EAAE,CAAC;QACd,OAAO,CAAC,IAAI,CAAC,0CAA0C,QAAQ,YAAY,OAAO,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;QACnG,OAAO,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QAChE,KAAK,GAAG,mCAAmC,UAAU,EAAE,CAAC;QACxD,MAAM,GAAG,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3C,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,8CAA8C,OAAO,sBAAsB,CAAC,CAAC;QAC7F,CAAC;aAAM,CAAC;YACJ,OAAO,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;QAC7D,CAAC;QACD,OAAO,EAAE,CAAC;IACd,CAAC;IAED,MAAM,cAAc,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE;QAC9C,gEAAgE;QAChE,IAAI,OAAO,CAAC;QACZ,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACnB,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,yCAAyC;QACrE,CAAC;aAAM,CAAC;YACJ,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,yCAAyC;QAC1E,CAAC;QACD,6BAA6B;QAC7B,OAAO,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IACtE,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAEnB,OAAO,CAAC,GAAG,CAAC,iCAAiC,MAAM,CAAC,IAAI,CAAC,MAAM,oBAAoB,CAAC,CAAC;IAErF,IAAI,CAAC;QACD,wCAAwC;QACxC,yFAAyF;QACzF,4DAA4D;QAC5D,MAAM,cAAc,GAAG,MAAM,yBAAyB,CAAC,cAAc,EAAE;YACnE,QAAQ,EAAE,cAAc,EAAE,YAAY,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS;SACvF,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,0BAA0B,cAAc,CAAC,MAAM,kBAAkB,CAAC,CAAC;QAE/E,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,yDAAyD;YACzD,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,KAAK,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;YAEtF,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,OAAO,CAAC,GAAG,CAAC,sCAAsC,OAAO,aAAa,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAC3G,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC;YACpC,CAAC;YACD,OAAO,OAAO,CAAC;QACnB,CAAC;aAAM,CAAC;YACJ,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC/C,CAAC;IACL,CAAC;IAAC,OAAO,CAAM,EAAE,CAAC;QACd,OAAO,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;QACvE,gDAAgD;QAChD,mEAAmE;QACnE,OAAO,EAAE,CAAC;IACd,CAAC;AACL,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAAC,OAAiB;IAC7C,IAAI,CAAC;QACD,IAAI,WAAW,GAAQ,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAExC,0BAA0B;QAC1B,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;QAClD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC7B,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;QAED,gBAAgB;QAChB,IAAI,EAAE,CAAC,UAAU,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAClC,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,gBAAgB,EAAE,MAAM,CAAC,CAAC;YAC1D,IAAI,CAAC;gBACD,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAClC,yCAAyC;gBACzC,IAAI,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,CAAC;oBAC7B,WAAW,GAAG,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;gBAC5C,CAAC;YACL,CAAC;YAAC,OAAO,OAAO,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;YACnE,CAAC;QACL,CAAC;QAED,QAAQ;QACR,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QACtF,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAClB,MAAM,UAAU,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACxD,kBAAkB;gBAClB,IAAI,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;oBACtD,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,qBAAqB;oBACrD,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;oBAC5B,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpB,CAAC;YACL,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnB,EAAE,CAAC,aAAa,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YACzE,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,CAAC,MAAM,YAAY,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YAE/E,kFAAkF;YAClF,gBAAgB,GAAG,IAAI,CAAC;QAC5B,CAAC;IACL,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACT,OAAO,CAAC,KAAK,CAAC,2CAA2C,EAAE,CAAC,CAAC,CAAC;IAClE,CAAC;AACL,CAAC;AAED,IAAI,gBAAgB,GAAoB,IAAI,CAAC;AAC7C,IAAI,kBAAkB,GAAG,KAAK,CAAC;AAE/B;;;GAGG;AACH,MAAM,UAAU,aAAa;IACzB,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;QAC5B,OAAO,gBAAgB,CAAC;IAC5B,CAAC;IAED,IAAI,CAAC;QACD,IAAI,EAAE,CAAC,UAAU,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAClC,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,gBAAgB,EAAE,MAAM,CAAC,CAAC;YAC1D,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAEjC,IAAI,IAAI,GAAa,EAAE,CAAC;YACxB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC;gBAAE,IAAI,GAAG,IAAI,CAAC;iBAChC,IAAI,IAAI,CAAC,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC;gBAAE,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC;YAE7E,gBAAgB,GAAG,IAAI,CAAC;YAExB,8CAA8C;YAC9C,IAAI,CAAC,kBAAkB,EAAE,CAAC;gBACtB,kBAAkB,GAAG,IAAI,CAAC;gBAC1B,IAAI,CAAC;oBACD,EAAE,CAAC,KAAK,CAAC,gBAAgB,EAAE,CAAC,SAAS,EAAE,EAAE;wBACrC,IAAI,SAAS,KAAK,QAAQ,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;4BACnD,mBAAmB;4BACnB,gBAAgB,GAAG,IAAI,CAAC;wBAC5B,CAAC;oBACL,CAAC,CAAC,CAAC;gBACP,CAAC;gBAAC,OAAO,QAAQ,EAAE,CAAC;oBAChB,OAAO,CAAC,IAAI,CAAC,qDAAqD,EAAE,QAAQ,CAAC,CAAC;oBAC9E,+EAA+E;oBAC/E,+FAA+F;gBACnG,CAAC;YACL,CAAC;YAED,OAAO,IAAI,CAAC;QAChB,CAAC;IACL,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACT,OAAO,CAAC,KAAK,CAAC,gDAAgD,EAAE,CAAC,CAAC,CAAC;IACvE,CAAC;IACD,OAAO,EAAE,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NER Teacher Service (BERT-based)
|
|
3
|
+
*
|
|
4
|
+
* Uses an ONNX-optimized BERT model to perform Named Entity Recognition.
|
|
5
|
+
* Switched from GLiNER (unsupported architecture) to standard BERT NER.
|
|
6
|
+
* Implements lazy loading and automatic unloading to manage memory usage.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Unload the NER model to free memory
|
|
10
|
+
*/
|
|
11
|
+
export declare function unloadModel(): Promise<void>;
|
|
12
|
+
/**
|
|
13
|
+
* Check if model is currently loaded
|
|
14
|
+
*/
|
|
15
|
+
export declare function isModelLoadedStatus(): boolean;
|
|
16
|
+
export declare function extractEntitiesWithGLiNER(text: string, _entities?: string[]): Promise<string[]>;
|
|
17
|
+
export declare function unloadNerModel(): Promise<void>;
|
|
18
|
+
//# sourceMappingURL=gliner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../../src/services/tags/gliner.ts"],"names":[],"mappings":"AACA;;;;;;GAMG;AAuBH;;GAEG;AACH,wBAAsB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjD;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,OAAO,CAE7C;AAwDD,wBAAsB,yBAAyB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,GAAE,MAAM,EAAO,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAgCzG;AAGD,wBAAsB,cAAc,kBAEnC"}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NER Teacher Service (BERT-based)
|
|
3
|
+
*
|
|
4
|
+
* Uses an ONNX-optimized BERT model to perform Named Entity Recognition.
|
|
5
|
+
* Switched from GLiNER (unsupported architecture) to standard BERT NER.
|
|
6
|
+
* Implements lazy loading and automatic unloading to manage memory usage.
|
|
7
|
+
*/
|
|
8
|
+
let nerPipeline = null;
|
|
9
|
+
let lastUsed = 0;
|
|
10
|
+
let idleTimeout = null;
|
|
11
|
+
const UNLOAD_TIMEOUT = 5 * 60 * 1000; // 5 minutes (consistent with NLP service)
|
|
12
|
+
const isModelLoaded = false;
|
|
13
|
+
/**
|
|
14
|
+
* Schedule model unload after idle period
|
|
15
|
+
*/
|
|
16
|
+
function scheduleModelUnload() {
|
|
17
|
+
if (idleTimeout) {
|
|
18
|
+
clearTimeout(idleTimeout);
|
|
19
|
+
}
|
|
20
|
+
idleTimeout = setTimeout(() => {
|
|
21
|
+
cleanupPipeline();
|
|
22
|
+
}, UNLOAD_TIMEOUT);
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Unload the NER model to free memory
|
|
26
|
+
*/
|
|
27
|
+
export async function unloadModel() {
|
|
28
|
+
await cleanupPipeline();
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Check if model is currently loaded
|
|
32
|
+
*/
|
|
33
|
+
export function isModelLoadedStatus() {
|
|
34
|
+
return nerPipeline !== null;
|
|
35
|
+
}
|
|
36
|
+
async function initializePipeline() {
|
|
37
|
+
if (nerPipeline) {
|
|
38
|
+
lastUsed = Date.now();
|
|
39
|
+
scheduleModelUnload(); // Reset idle timer on use
|
|
40
|
+
return nerPipeline;
|
|
41
|
+
}
|
|
42
|
+
console.log('[NER] Dynamically loading Transformers.js...');
|
|
43
|
+
const { pipeline, env } = await import('@xenova/transformers');
|
|
44
|
+
// Disable native dependencies that might cause crashes on Windows
|
|
45
|
+
env.allowLocalModels = true;
|
|
46
|
+
// Disable ONNX native backend that requires sharp
|
|
47
|
+
env.backends.onnx['native'] = false;
|
|
48
|
+
env.backends.onnx.wasm.proxy = false;
|
|
49
|
+
env.backends.onnx.wasm.numThreads = 1;
|
|
50
|
+
// Additional settings to avoid sharp
|
|
51
|
+
env.useFS = false;
|
|
52
|
+
env.useBrowserCache = false;
|
|
53
|
+
console.log('[NER] Loading BERT NER model (Xenova/bert-base-NER)...');
|
|
54
|
+
try {
|
|
55
|
+
nerPipeline = await pipeline('token-classification', 'Xenova/bert-base-NER', {
|
|
56
|
+
quantized: true
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
catch (e) {
|
|
60
|
+
console.warn('[NER] Primary model failed. Trying fallback (Xenova/bert-base-multilingual-cased-ner-hrl)...');
|
|
61
|
+
nerPipeline = await pipeline('token-classification', 'Xenova/bert-base-multilingual-cased-ner-hrl', {
|
|
62
|
+
quantized: true
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
console.log('[NER] Model loaded successfully.');
|
|
66
|
+
lastUsed = Date.now();
|
|
67
|
+
scheduleModelUnload(); // Start idle timer
|
|
68
|
+
return nerPipeline;
|
|
69
|
+
}
|
|
70
|
+
async function cleanupPipeline() {
|
|
71
|
+
if (nerPipeline) {
|
|
72
|
+
try {
|
|
73
|
+
// Attempt to clean up the pipeline if it has a dispose method
|
|
74
|
+
if (nerPipeline.dispose) {
|
|
75
|
+
await nerPipeline.dispose();
|
|
76
|
+
}
|
|
77
|
+
nerPipeline = null;
|
|
78
|
+
console.log('[NER] Model unloaded to free memory.');
|
|
79
|
+
}
|
|
80
|
+
catch (e) {
|
|
81
|
+
console.warn('[NER] Error during pipeline cleanup:', e);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
export async function extractEntitiesWithGLiNER(text, _entities = []) {
|
|
86
|
+
try {
|
|
87
|
+
const pipelineInstance = await initializePipeline();
|
|
88
|
+
// BERT NER returns entities with labels like B-PER, I-ORG, B-LOC, B-MISC
|
|
89
|
+
// We extract the actual text (word) from each recognized entity
|
|
90
|
+
const results = await pipelineInstance(text);
|
|
91
|
+
const discovered = new Set();
|
|
92
|
+
for (const res of results) {
|
|
93
|
+
// Filter by confidence score and entity type
|
|
94
|
+
// B- prefix means "Beginning of entity", I- means "Inside entity"
|
|
95
|
+
if (res.score > 0.7 && res.entity && res.word) {
|
|
96
|
+
// Clean up subword tokens (BERT uses ## prefix for subwords)
|
|
97
|
+
const word = res.word.replace(/^##/, '').trim();
|
|
98
|
+
if (word.length > 1) {
|
|
99
|
+
discovered.add(word);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
console.log(`[NER] Discovered ${discovered.size} entities.`);
|
|
104
|
+
lastUsed = Date.now();
|
|
105
|
+
return Array.from(discovered);
|
|
106
|
+
}
|
|
107
|
+
catch (e) {
|
|
108
|
+
console.warn('[NER] Service Initialization Failed:', e.message);
|
|
109
|
+
console.log('[NER] Falling back gracefully to LLM...');
|
|
110
|
+
// Unload the pipeline on error to free memory
|
|
111
|
+
await cleanupPipeline();
|
|
112
|
+
return [];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Export a function to manually trigger cleanup if needed
|
|
116
|
+
export async function unloadNerModel() {
|
|
117
|
+
await cleanupPipeline();
|
|
118
|
+
}
|
|
119
|
+
//# sourceMappingURL=gliner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../../src/services/tags/gliner.ts"],"names":[],"mappings":"AACA;;;;;;GAMG;AAIH,IAAI,WAAW,GAAQ,IAAI,CAAC;AAC5B,IAAI,QAAQ,GAAW,CAAC,CAAC;AACzB,IAAI,WAAW,GAA0B,IAAI,CAAC;AAC9C,MAAM,cAAc,GAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,0CAA0C;AAChF,MAAM,aAAa,GAAG,KAAK,CAAC;AAE5B;;GAEG;AACH,SAAS,mBAAmB;IAC1B,IAAI,WAAW,EAAE,CAAC;QAChB,YAAY,CAAC,WAAW,CAAC,CAAC;IAC5B,CAAC;IAED,WAAW,GAAG,UAAU,CAAC,GAAG,EAAE;QAC5B,eAAe,EAAE,CAAC;IACpB,CAAC,EAAE,cAAc,CAAC,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,MAAM,eAAe,EAAE,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB;IACjC,OAAO,WAAW,KAAK,IAAI,CAAC;AAC9B,CAAC;AAED,KAAK,UAAU,kBAAkB;IAC7B,IAAI,WAAW,EAAE,CAAC;QACd,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACtB,mBAAmB,EAAE,CAAC,CAAC,0BAA0B;QACjD,OAAO,WAAW,CAAC;IACvB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;IAC5D,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAE/D,kEAAkE;IAClE,GAAG,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAC5B,kDAAkD;IAClD,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,KAAK,CAAC;IACpC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;IAEtC,qCAAqC;IACrC,GAAG,CAAC,KAAK,GAAG,KAAK,CAAC;IAClB,GAAG,CAAC,eAAe,GAAG,KAAK,CAAC;IAE5B,OAAO,CAAC,GAAG,CAAC,wDAAwD,CAAC,CAAC;IACtE,IAAI,CAAC;QACD,WAAW,GAAG,MAAM,QAAQ,CAAC,sBAAsB,EAAE,sBAAsB,EAAE;YACzE,SAAS,EAAE,IAAI;SAClB,CAAC,CAAC;IACP,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACT,OAAO,CAAC,IAAI,CAAC,8FAA8F,CAAC,CAAC;QAC7G,WAAW,GAAG,MAAM,QAAQ,CAAC,sBAAsB,EAAE,6CAA6C,EAAE;YAChG,SAAS,EAAE,IAAI;SAClB,CAAC,CAAC;IACP,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;IAEhD,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACtB,mBAAmB,EAAE,CAAC,CAAC,mBAAmB;IAC1C,OAAO,WAAW,CAAC;AACvB,CAAC;AAED,KAAK,UAAU,eAAe;IAC1B,IAAI,WAAW,EAAE,CAAC;QACd,IAAI,CAAC;YACD,8DAA8D;YAC9D,IAAI,WAAW,CAAC,OAAO,EAAE,CAAC;gBACtB,MAAM,WAAW,CAAC,OAAO,EAAE,CAAC;YAChC,CAAC;YACD,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;QACxD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACT,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,CAAC,CAAC,CAAC;QAC5D,CAAC;IACL,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,IAAY,EAAE,YAAsB,EAAE;IAClF,IAAI,CAAC;QACD,MAAM,gBAAgB,GAAG,MAAM,kBAAkB,EAAE,CAAC;QAEpD,yEAAyE;QACzE,gEAAgE;QAChE,MAAM,OAAO,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;YACxB,6CAA6C;YAC7C,kEAAkE;YAClE,IAAI,GAAG,CAAC,KAAK,GAAG,GAAG,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;gBAC5C,6DAA6D;gBAC7D,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBAChD,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAClB,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;gBACzB,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,oBAAoB,UAAU,CAAC,IAAI,YAAY,CAAC,CAAC;QAC7D,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACtB,OAAO,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAAC,OAAO,CAAM,EAAE,CAAC;QACd,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;QAEvD,8CAA8C;QAC9C,MAAM,eAAe,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACd,CAAC;AACL,CAAC;AAED,0DAA0D;AAC1D,MAAM,CAAC,KAAK,UAAU,cAAc;IAChC,MAAM,eAAe,EAAE,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tag Infection Service (The "Student")
|
|
3
|
+
*
|
|
4
|
+
* Implements Standard 068: Weak Supervision via High-Speed Pattern Matching.
|
|
5
|
+
* Implements Standard 069: Functional Flow (Generators) for infinite scaling.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* 2. The Processor (Transform)
|
|
9
|
+
* Applies "Viral Tags" to a single atom.
|
|
10
|
+
*/
|
|
11
|
+
export declare function infectAtom(atom: {
|
|
12
|
+
id: string;
|
|
13
|
+
content: string;
|
|
14
|
+
tags: string[];
|
|
15
|
+
}, patterns: any): string[] | null;
|
|
16
|
+
/**
|
|
17
|
+
* 3. The Orchestrator (Sink)
|
|
18
|
+
* Connects the Stream to the Processor.
|
|
19
|
+
*/
|
|
20
|
+
export declare function runInfectionLoop(): Promise<void>;
|
|
21
|
+
//# sourceMappingURL=infector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"infector.d.ts","sourceRoot":"","sources":["../../../src/services/tags/infector.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA0EH;;;GAGG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,EAAE,CAAA;CAAE,EAAE,QAAQ,EAAE,GAAG,GAAG,MAAM,EAAE,GAAG,IAAI,CA2DhH;AAED;;;GAGG;AACH,wBAAsB,gBAAgB,kBA+CrC"}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tag Infection Service (The "Student")
|
|
3
|
+
*
|
|
4
|
+
* Implements Standard 068: Weak Supervision via High-Speed Pattern Matching.
|
|
5
|
+
* Implements Standard 069: Functional Flow (Generators) for infinite scaling.
|
|
6
|
+
*/
|
|
7
|
+
import wink from 'wink-nlp';
|
|
8
|
+
import model from 'wink-eng-lite-web-model';
|
|
9
|
+
import * as fs from 'fs';
|
|
10
|
+
import * as path from 'path';
|
|
11
|
+
import { fileURLToPath } from 'url';
|
|
12
|
+
import { db } from '../../core/db.js';
|
|
13
|
+
// Initialize the "Reflex" Engine (Fast CPU NLP)
|
|
14
|
+
// Cast to any to avoid strict typing issues with wink-nlp generic models
|
|
15
|
+
const nlp = wink(model);
|
|
16
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
17
|
+
const __dirname = path.dirname(__filename);
|
|
18
|
+
const PROJECT_ROOT = path.resolve(__dirname, '..', '..', '..'); // engine/src/services/tags -> engine/src/services -> engine/src -> engine
|
|
19
|
+
const TAGS_FILE = path.join(PROJECT_ROOT, 'engine', 'context', 'internal_tags.json');
|
|
20
|
+
/**
|
|
21
|
+
* 1. The Generator (Source)
|
|
22
|
+
* Lazily fetches atoms from the database in batches to prevent RAM spikes.
|
|
23
|
+
* This replaces the need for recursion or massive array loading.
|
|
24
|
+
*/
|
|
25
|
+
async function* atomStream(batchSize = 50) {
|
|
26
|
+
let lastId = '';
|
|
27
|
+
let batchCount = 0;
|
|
28
|
+
while (true) {
|
|
29
|
+
// Fetch next batch where ID > lastId
|
|
30
|
+
const query = `
|
|
31
|
+
SELECT id, content, tags
|
|
32
|
+
FROM atoms
|
|
33
|
+
WHERE id > $1
|
|
34
|
+
ORDER BY id
|
|
35
|
+
LIMIT $2
|
|
36
|
+
`;
|
|
37
|
+
const result = await db.run(query, [lastId, batchSize]);
|
|
38
|
+
batchCount++;
|
|
39
|
+
if (result.rows && result.rows.length > 0 && batchCount % 50 === 0) {
|
|
40
|
+
console.log(`[Infector] Stream fetched batch of ${result.rows.length} atoms... (Batch ${batchCount})`);
|
|
41
|
+
}
|
|
42
|
+
if (!result.rows || result.rows.length === 0) {
|
|
43
|
+
break; // Stream exhausted
|
|
44
|
+
}
|
|
45
|
+
// Yield one atom at a time (Functional Flow)
|
|
46
|
+
for (const row of result.rows) {
|
|
47
|
+
// Handle both array and object formats that PGlite might return
|
|
48
|
+
let id, content, tags;
|
|
49
|
+
if (Array.isArray(row)) {
|
|
50
|
+
// Row is in array format [id, content, tags]
|
|
51
|
+
[id, content, tags] = row;
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
// Row is in object format {id, content, tags}
|
|
55
|
+
id = row.id;
|
|
56
|
+
content = row.content;
|
|
57
|
+
tags = row.tags;
|
|
58
|
+
}
|
|
59
|
+
lastId = id; // Move cursor for next batch
|
|
60
|
+
yield {
|
|
61
|
+
id: id,
|
|
62
|
+
content: content,
|
|
63
|
+
tags: tags || []
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* 2. The Processor (Transform)
|
|
70
|
+
* Applies "Viral Tags" to a single atom.
|
|
71
|
+
*/
|
|
72
|
+
export function infectAtom(atom, patterns) {
|
|
73
|
+
if (!atom.content)
|
|
74
|
+
return null;
|
|
75
|
+
const currentTags = new Set(atom.tags);
|
|
76
|
+
let changed = false;
|
|
77
|
+
// Use Wink-NLP to normalize text (case folding, tokenization)
|
|
78
|
+
const doc = nlp.readDoc(atom.content);
|
|
79
|
+
const text = doc.out(nlp.its.text).toLowerCase();
|
|
80
|
+
// Regex check with smart boundaries
|
|
81
|
+
patterns.keywords.forEach((keyword) => {
|
|
82
|
+
if (currentTags.has(keyword))
|
|
83
|
+
return;
|
|
84
|
+
// Escape specialregex characters
|
|
85
|
+
const escaped = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
86
|
+
// Apply boundary only if the keyword starts/ends with a word character
|
|
87
|
+
// This handles "C++" correctly (no boundary after +) vs "Java" (boundary after a)
|
|
88
|
+
const startBoundary = /^\w/.test(keyword) ? '\\b' : '';
|
|
89
|
+
const endBoundary = /\w$/.test(keyword) ? '\\b' : '';
|
|
90
|
+
const regex = new RegExp(`${startBoundary}${escaped}${endBoundary}`, 'i');
|
|
91
|
+
if (regex.test(text)) {
|
|
92
|
+
currentTags.add(keyword); // Infection!
|
|
93
|
+
changed = true;
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
// --- ENHANCEMENT: Temporal Auto-Tagging ---
|
|
97
|
+
// 1. Years (1900 - 2099)
|
|
98
|
+
// Regex matches 4 digits starting with 19 or 20, surrounded by boundaries
|
|
99
|
+
const yearMatches = text.match(/\b((?:19|20)\d{2})\b/g);
|
|
100
|
+
if (yearMatches) {
|
|
101
|
+
yearMatches.forEach(year => (!currentTags.has(year)) && (currentTags.add(year), changed = true));
|
|
102
|
+
}
|
|
103
|
+
// 2. Months (Full Names)
|
|
104
|
+
const months = [
|
|
105
|
+
"january", "february", "march", "april", "may", "june",
|
|
106
|
+
"july", "august", "september", "october", "november", "december"
|
|
107
|
+
];
|
|
108
|
+
// Simple inclusion check for months (since we normalized text to lowercase)
|
|
109
|
+
// We check for word boundaries to avoid matching "may" inside "maybe"
|
|
110
|
+
months.forEach(month => {
|
|
111
|
+
// Create regex for word boundary match
|
|
112
|
+
const regex = new RegExp(`\\b${month}\\b`, 'i');
|
|
113
|
+
if (!currentTags.has(month) && regex.test(text)) {
|
|
114
|
+
// Capitalize first letter for the tag
|
|
115
|
+
const tag = month.charAt(0).toUpperCase() + month.slice(1);
|
|
116
|
+
currentTags.add(tag);
|
|
117
|
+
changed = true;
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
return changed ? Array.from(currentTags) : null;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* 3. The Orchestrator (Sink)
|
|
124
|
+
* Connects the Stream to the Processor.
|
|
125
|
+
*/
|
|
126
|
+
export async function runInfectionLoop() {
|
|
127
|
+
console.log('🦠Infection Protocol: Initializing...');
|
|
128
|
+
// Load the "Virus" (Master Tag List)
|
|
129
|
+
if (!fs.existsSync(TAGS_FILE)) {
|
|
130
|
+
// Fallback check for alternate location (if running from dist/)
|
|
131
|
+
console.warn(`🦠No tag definitions found at ${TAGS_FILE}. Checking common paths...`);
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
const viralPatterns = JSON.parse(fs.readFileSync(TAGS_FILE, 'utf-8'));
|
|
135
|
+
let infectedCount = 0;
|
|
136
|
+
// The Loop (Looks clean, acts efficient)
|
|
137
|
+
for await (const atom of atomStream()) {
|
|
138
|
+
const newTags = infectAtom(atom, viralPatterns);
|
|
139
|
+
if (newTags) {
|
|
140
|
+
// Persist the infection
|
|
141
|
+
// We update the 'tags' column. In Cozo, :update needs keys.
|
|
142
|
+
// Using a retry loop to handle potential lock contention with Ingest
|
|
143
|
+
let attempts = 0;
|
|
144
|
+
const maxAttempts = 3;
|
|
145
|
+
while (attempts < maxAttempts) {
|
|
146
|
+
try {
|
|
147
|
+
await db.run(`UPDATE atoms SET tags = $1 WHERE id = $2`, [newTags, atom.id]);
|
|
148
|
+
infectedCount++;
|
|
149
|
+
if (infectedCount % 100 === 0)
|
|
150
|
+
process.stdout.write(`.`);
|
|
151
|
+
break; // Success
|
|
152
|
+
}
|
|
153
|
+
catch (error) {
|
|
154
|
+
attempts++;
|
|
155
|
+
if (attempts >= maxAttempts) {
|
|
156
|
+
console.warn(`[Infector] Failed to update atom ${atom.id} after ${maxAttempts} attempts:`, error.message);
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
// Small backoff
|
|
160
|
+
await new Promise(r => setTimeout(r, 100 * attempts));
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
console.log(`\n🦠Infection Complete. ${infectedCount} atoms infected with new context.`);
|
|
167
|
+
}
|
|
168
|
+
//# sourceMappingURL=infector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"infector.js","sourceRoot":"","sources":["../../../src/services/tags/infector.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,IAAI,MAAM,UAAU,CAAC;AAC5B,OAAO,KAAK,MAAM,yBAAyB,CAAC;AAC5C,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AAEtC,gDAAgD;AAChD,yEAAyE;AACzE,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAQ,CAAC;AAE/B,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAC3C,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,0EAA0E;AAC1I,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,QAAQ,EAAE,SAAS,EAAE,oBAAoB,CAAC,CAAC;AAErF;;;;GAIG;AACH,KAAK,SAAS,CAAC,CAAC,UAAU,CAAC,SAAS,GAAG,EAAE;IACrC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,OAAO,IAAI,EAAE,CAAC;QACV,qCAAqC;QACrC,MAAM,KAAK,GAAG;;;;;;SAMb,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;QACxD,UAAU,EAAE,CAAC;QAEb,IAAI,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YACjE,OAAO,CAAC,GAAG,CAAC,sCAAsC,MAAM,CAAC,IAAI,CAAC,MAAM,oBAAoB,UAAU,GAAG,CAAC,CAAC;QAC3G,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3C,MAAM,CAAC,mBAAmB;QAC9B,CAAC;QAED,6CAA6C;QAC7C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAC5B,gEAAgE;YAChE,IAAI,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC;YAEtB,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;gBACrB,6CAA6C;gBAC7C,CAAC,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,GAAG,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACJ,8CAA8C;gBAC9C,EAAE,GAAG,GAAG,CAAC,EAAE,CAAC;gBACZ,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;gBACtB,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;YACpB,CAAC;YAED,MAAM,GAAG,EAAY,CAAC,CAAC,6BAA6B;YAEpD,MAAM;gBACF,EAAE,EAAE,EAAY;gBAChB,OAAO,EAAE,OAAiB;gBAC1B,IAAI,EAAG,IAAiB,IAAI,EAAE;aACjC,CAAC;QACN,CAAC;IACL,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,IAAqD,EAAE,QAAa;IAC3F,IAAI,CAAC,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE/B,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,8DAA8D;IAC9D,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACtC,MAAM,IAAI,GAAI,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAY,CAAC,WAAW,EAAE,CAAC;IAE7D,oCAAoC;IACpC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAe,EAAE,EAAE;QAC1C,IAAI,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC;YAAE,OAAO;QAErC,iCAAiC;QACjC,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;QAE/D,uEAAuE;QACvE,kFAAkF;QAClF,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;QAErD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,GAAG,aAAa,GAAG,OAAO,GAAG,WAAW,EAAE,EAAE,GAAG,CAAC,CAAC;QAE1E,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACnB,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,aAAa;YACvC,OAAO,GAAG,IAAI,CAAC;QACnB,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6CAA6C;IAE7C,yBAAyB;IACzB,0EAA0E;IAC1E,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;IACxD,IAAI,WAAW,EAAE,CAAC;QACd,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC;IACrG,CAAC;IAED,yBAAyB;IACzB,MAAM,MAAM,GAAG;QACX,SAAS,EAAE,UAAU,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM;QACtD,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU;KACnE,CAAC;IAEF,4EAA4E;IAC5E,sEAAsE;IACtE,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;QACnB,uCAAuC;QACvC,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,MAAM,KAAK,KAAK,EAAE,GAAG,CAAC,CAAC;QAChD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9C,sCAAsC;YACtC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC3D,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;QACnB,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACpD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IAClC,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;IAEtD,qCAAqC;IACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5B,gEAAgE;QAChE,OAAO,CAAC,IAAI,CAAC,kCAAkC,SAAS,4BAA4B,CAAC,CAAC;QACtF,OAAO;IACX,CAAC;IAED,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IACtE,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,yCAAyC;IACzC,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,UAAU,EAAE,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QAEhD,IAAI,OAAO,EAAE,CAAC;YACV,wBAAwB;YACxB,4DAA4D;YAC5D,qEAAqE;YACrE,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,MAAM,WAAW,GAAG,CAAC,CAAC;YACtB,OAAO,QAAQ,GAAG,WAAW,EAAE,CAAC;gBAC5B,IAAI,CAAC;oBACD,MAAM,EAAE,CAAC,GAAG,CACR,0CAA0C,EAC1C,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC,CACrB,CAAC;oBAEF,aAAa,EAAE,CAAC;oBAChB,IAAI,aAAa,GAAG,GAAG,KAAK,CAAC;wBAAE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;oBACzD,MAAM,CAAC,UAAU;gBACrB,CAAC;gBAAC,OAAO,KAAU,EAAE,CAAC;oBAClB,QAAQ,EAAE,CAAC;oBACX,IAAI,QAAQ,IAAI,WAAW,EAAE,CAAC;wBAC1B,OAAO,CAAC,IAAI,CAAC,oCAAoC,IAAI,CAAC,EAAE,UAAU,WAAW,YAAY,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;oBAC9G,CAAC;yBAAM,CAAC;wBACJ,gBAAgB;wBAChB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC;oBAC1D,CAAC;gBACL,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,aAAa,mCAAmC,CAAC,CAAC;AAC9F,CAAC"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tag Quality Auditor
|
|
3
|
+
*
|
|
4
|
+
* Analyzes tagging patterns to identify:
|
|
5
|
+
* 1. Under-tagged atoms (high content, few tags)
|
|
6
|
+
* 2. Over-tagged atoms (noise)
|
|
7
|
+
* 3. Orphan tags (used once, never retrieved)
|
|
8
|
+
* 4. Tag clusters (tags that always appear together)
|
|
9
|
+
* 5. Missing tags (common terms not used as tags)
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* const auditor = new TagAuditor();
|
|
13
|
+
* const report = await auditor.generateAuditReport();
|
|
14
|
+
* console.log(report);
|
|
15
|
+
*/
|
|
16
|
+
interface AuditReport {
|
|
17
|
+
totalAtoms: number;
|
|
18
|
+
totalTags: number;
|
|
19
|
+
underTagged: UnderTaggedAtom[];
|
|
20
|
+
orphanTags: string[];
|
|
21
|
+
tagClusters: string[][];
|
|
22
|
+
suggestions: TagSuggestion[];
|
|
23
|
+
statistics: TagStatistics;
|
|
24
|
+
}
|
|
25
|
+
interface UnderTaggedAtom {
|
|
26
|
+
id: string;
|
|
27
|
+
source: string;
|
|
28
|
+
contentLength: number;
|
|
29
|
+
tagCount: number;
|
|
30
|
+
suggestedTags: string[];
|
|
31
|
+
}
|
|
32
|
+
interface TagSuggestion {
|
|
33
|
+
atomId: string;
|
|
34
|
+
suggestedTags: string[];
|
|
35
|
+
confidence: number;
|
|
36
|
+
}
|
|
37
|
+
interface TagStatistics {
|
|
38
|
+
avgTagsPerAtom: number;
|
|
39
|
+
medianTagsPerAtom: number;
|
|
40
|
+
maxTagsInAtom: number;
|
|
41
|
+
uniqueTags: number;
|
|
42
|
+
tagsUsedOnce: number;
|
|
43
|
+
}
|
|
44
|
+
export declare class TagAuditor {
|
|
45
|
+
/**
|
|
46
|
+
* Find under-tagged content
|
|
47
|
+
*/
|
|
48
|
+
findUnderTaggedAtoms(minContentLength?: number, maxTags?: number): Promise<UnderTaggedAtom[]>;
|
|
49
|
+
/**
|
|
50
|
+
* Find orphan tags (used only once)
|
|
51
|
+
*/
|
|
52
|
+
findOrphanTags(minAtoms?: number): Promise<string[]>;
|
|
53
|
+
/**
|
|
54
|
+
* Find tag clusters (tags that always appear together)
|
|
55
|
+
*/
|
|
56
|
+
findTagClusters(minSupport?: number): Promise<string[][]>;
|
|
57
|
+
/**
|
|
58
|
+
* Suggest tags for an atom based on content
|
|
59
|
+
*/
|
|
60
|
+
suggestTagsForAtom(atomId: string, limit?: number, context?: {
|
|
61
|
+
content: string;
|
|
62
|
+
existingTags: string[];
|
|
63
|
+
allTags: string[];
|
|
64
|
+
}): Promise<string[]>;
|
|
65
|
+
/**
|
|
66
|
+
* Get tag statistics
|
|
67
|
+
*/
|
|
68
|
+
getTagStatistics(): Promise<TagStatistics>;
|
|
69
|
+
/**
|
|
70
|
+
* Generate comprehensive audit report
|
|
71
|
+
*/
|
|
72
|
+
generateAuditReport(): Promise<AuditReport>;
|
|
73
|
+
private getTotalAtoms;
|
|
74
|
+
private getTotalTags;
|
|
75
|
+
}
|
|
76
|
+
export {};
|
|
77
|
+
//# sourceMappingURL=tag-auditor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tag-auditor.d.ts","sourceRoot":"","sources":["../../../src/services/tags/tag-auditor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAKH,UAAU,WAAW;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,eAAe,EAAE,CAAC;IAC/B,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,WAAW,EAAE,MAAM,EAAE,EAAE,CAAC;IACxB,WAAW,EAAE,aAAa,EAAE,CAAC;IAC7B,UAAU,EAAE,aAAa,CAAC;CAC3B;AAED,UAAU,eAAe;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB;AAED,UAAU,aAAa;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,aAAa;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,UAAU;IACrB;;OAEG;IACG,oBAAoB,CAAC,gBAAgB,GAAE,MAAY,EAAE,OAAO,GAAE,MAAU,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IAiD3G;;OAEG;IACG,cAAc,CAAC,QAAQ,GAAE,MAAY,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAyB/D;;OAEG;IACG,eAAe,CAAC,UAAU,GAAE,MAAW,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAkEnE;;OAEG;IACG,kBAAkB,CACtB,MAAM,EAAE,MAAM,EACd,KAAK,GAAE,MAAU,EACjB,OAAO,CAAC,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,EAAE,CAAC;QAAC,OAAO,EAAE,MAAM,EAAE,CAAA;KAAE,GACvE,OAAO,CAAC,MAAM,EAAE,CAAC;IA8DpB;;OAEG;IACG,gBAAgB,IAAI,OAAO,CAAC,aAAa,CAAC;IAiDhD;;OAEG;IACG,mBAAmB,IAAI,OAAO,CAAC,WAAW,CAAC;YAoCnC,aAAa;YAKb,YAAY;CAK3B"}
|