npm - @wanshi-kg/wanshi - Versions diffs - 0.1.0 - Mend

@wanshi-kg/wanshi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (443) hide show

package/LICENSE +21 -0
package/README.md +458 -0
package/dist/__tests__/helpers.js +27 -0
package/dist/__tests__/helpers.js.map +1 -0
package/dist/cli/commands/export.command.js +99 -0
package/dist/cli/commands/export.command.js.map +1 -0
package/dist/cli/commands/index.js +22 -0
package/dist/cli/commands/index.js.map +1 -0
package/dist/cli/commands/inspectMerges.command.js +84 -0
package/dist/cli/commands/inspectMerges.command.js.map +1 -0
package/dist/cli/commands/metrics.command.js +196 -0
package/dist/cli/commands/metrics.command.js.map +1 -0
package/dist/cli/commands/process.command.js +82 -0
package/dist/cli/commands/process.command.js.map +1 -0
package/dist/cli/commands/watch.command.js +91 -0
package/dist/cli/commands/watch.command.js.map +1 -0
package/dist/cli/index.js +269 -0
package/dist/cli/index.js.map +1 -0
package/dist/cli/optionsToConfig.js +160 -0
package/dist/cli/optionsToConfig.js.map +1 -0
package/dist/config/index.js +59 -0
package/dist/config/index.js.map +1 -0
package/dist/config/legacyHints.js +113 -0
package/dist/config/legacyHints.js.map +1 -0
package/dist/config/schema.js +803 -0
package/dist/config/schema.js.map +1 -0
package/dist/config/ui.js +221 -0
package/dist/config/ui.js.map +1 -0
package/dist/core/DirectoryProcessor.js +725 -0
package/dist/core/DirectoryProcessor.js.map +1 -0
package/dist/core/adapters/IStructuredAdapter.js +3 -0
package/dist/core/adapters/IStructuredAdapter.js.map +1 -0
package/dist/core/adapters/SqliteAdapter.js +267 -0
package/dist/core/adapters/SqliteAdapter.js.map +1 -0
package/dist/core/adapters/StructuredAdapterRegistry.js +31 -0
package/dist/core/adapters/StructuredAdapterRegistry.js.map +1 -0
package/dist/core/adapters/index.js +20 -0
package/dist/core/adapters/index.js.map +1 -0
package/dist/core/checkpoint/CheckpointService.js +188 -0
package/dist/core/checkpoint/CheckpointService.js.map +1 -0
package/dist/core/checkpoint/index.js +18 -0
package/dist/core/checkpoint/index.js.map +1 -0
package/dist/core/corpus/CorpusAnalyzer.js +266 -0
package/dist/core/corpus/CorpusAnalyzer.js.map +1 -0
package/dist/core/corpus/CorpusProfileStore.js +92 -0
package/dist/core/corpus/CorpusProfileStore.js.map +1 -0
package/dist/core/corpus/index.js +21 -0
package/dist/core/corpus/index.js.map +1 -0
package/dist/core/corpus/normalizeGlossary.js +60 -0
package/dist/core/corpus/normalizeGlossary.js.map +1 -0
package/dist/core/corpus/relPath.js +52 -0
package/dist/core/corpus/relPath.js.map +1 -0
package/dist/core/corpus/termFrequency.js +86 -0
package/dist/core/corpus/termFrequency.js.map +1 -0
package/dist/core/cost/CostMeter.js +235 -0
package/dist/core/cost/CostMeter.js.map +1 -0
package/dist/core/cost/index.js +19 -0
package/dist/core/cost/index.js.map +1 -0
package/dist/core/cost/prices.js +38 -0
package/dist/core/cost/prices.js.map +1 -0
package/dist/core/cv/ObjectDetectionService.js +119 -0
package/dist/core/cv/ObjectDetectionService.js.map +1 -0
package/dist/core/di/ContainerFactory.js +670 -0
package/dist/core/di/ContainerFactory.js.map +1 -0
package/dist/core/di/DIContainer.js +103 -0
package/dist/core/di/DIContainer.js.map +1 -0
package/dist/core/di/index.js +19 -0
package/dist/core/di/index.js.map +1 -0
package/dist/core/errors/CustomErrors.js +342 -0
package/dist/core/errors/CustomErrors.js.map +1 -0
package/dist/core/errors/index.js +18 -0
package/dist/core/errors/index.js.map +1 -0
package/dist/core/export/KnowledgeGraphExportService.js +56 -0
package/dist/core/export/KnowledgeGraphExportService.js.map +1 -0
package/dist/core/export/index.js +19 -0
package/dist/core/export/index.js.map +1 -0
package/dist/core/export/strategies/GraphitiExportStrategy.js +115 -0
package/dist/core/export/strategies/GraphitiExportStrategy.js.map +1 -0
package/dist/core/export/strategies/GraphvizDotExportStrategy.js +331 -0
package/dist/core/export/strategies/GraphvizDotExportStrategy.js.map +1 -0
package/dist/core/export/strategies/IExportStrategy.js +3 -0
package/dist/core/export/strategies/IExportStrategy.js.map +1 -0
package/dist/core/export/strategies/JsonExportStrategy.js +19 -0
package/dist/core/export/strategies/JsonExportStrategy.js.map +1 -0
package/dist/core/export/strategies/JsonlExportStrategy.js +69 -0
package/dist/core/export/strategies/JsonlExportStrategy.js.map +1 -0
package/dist/core/export/strategies/KblamExportStrategy.js +36 -0
package/dist/core/export/strategies/KblamExportStrategy.js.map +1 -0
package/dist/core/export/strategies/LoraExportStrategy.js +46 -0
package/dist/core/export/strategies/LoraExportStrategy.js.map +1 -0
package/dist/core/export/strategies/McpExportStrategy.js +67 -0
package/dist/core/export/strategies/McpExportStrategy.js.map +1 -0
package/dist/core/export/strategies/index.js +25 -0
package/dist/core/export/strategies/index.js.map +1 -0
package/dist/core/export/strategies/kbTriples.js +60 -0
package/dist/core/export/strategies/kbTriples.js.map +1 -0
package/dist/core/index.js +22 -0
package/dist/core/index.js.map +1 -0
package/dist/core/knowledge/KnowledgeGraphBuilder.js +627 -0
package/dist/core/knowledge/KnowledgeGraphBuilder.js.map +1 -0
package/dist/core/knowledge/MergeRecord.js +3 -0
package/dist/core/knowledge/MergeRecord.js.map +1 -0
package/dist/core/knowledge/canon/Canonicalizer.js +414 -0
package/dist/core/knowledge/canon/Canonicalizer.js.map +1 -0
package/dist/core/knowledge/canon/index.js +18 -0
package/dist/core/knowledge/canon/index.js.map +1 -0
package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js +92 -0
package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js.map +1 -0
package/dist/core/knowledge/contradiction/LlmContradictionChecker.js +52 -0
package/dist/core/knowledge/contradiction/LlmContradictionChecker.js.map +1 -0
package/dist/core/knowledge/contradiction/index.js +19 -0
package/dist/core/knowledge/contradiction/index.js.map +1 -0
package/dist/core/knowledge/grounding/KeywordGroundingChecker.js +33 -0
package/dist/core/knowledge/grounding/KeywordGroundingChecker.js.map +1 -0
package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js +82 -0
package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js.map +1 -0
package/dist/core/knowledge/grounding/index.js +20 -0
package/dist/core/knowledge/grounding/index.js.map +1 -0
package/dist/core/knowledge/grounding/verbalize.js +38 -0
package/dist/core/knowledge/grounding/verbalize.js.map +1 -0
package/dist/core/knowledge/images/imageMetaGraph.js +136 -0
package/dist/core/knowledge/images/imageMetaGraph.js.map +1 -0
package/dist/core/knowledge/index.js +20 -0
package/dist/core/knowledge/index.js.map +1 -0
package/dist/core/knowledge/merging/KnowledgeMerger.js +624 -0
package/dist/core/knowledge/merging/KnowledgeMerger.js.map +1 -0
package/dist/core/knowledge/references/ReferenceResolver.js +184 -0
package/dist/core/knowledge/references/ReferenceResolver.js.map +1 -0
package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js +401 -0
package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js.map +1 -0
package/dist/core/knowledge/references/citations/CitationResolver.js +95 -0
package/dist/core/knowledge/references/citations/CitationResolver.js.map +1 -0
package/dist/core/knowledge/references/citations/GrobidClient.js +143 -0
package/dist/core/knowledge/references/citations/GrobidClient.js.map +1 -0
package/dist/core/knowledge/references/citations/TitleIdResolver.js +101 -0
package/dist/core/knowledge/references/citations/TitleIdResolver.js.map +1 -0
package/dist/core/knowledge/references/web/FetchCacheService.js +114 -0
package/dist/core/knowledge/references/web/FetchCacheService.js.map +1 -0
package/dist/core/knowledge/references/web/GatedFetcher.js +228 -0
package/dist/core/knowledge/references/web/GatedFetcher.js.map +1 -0
package/dist/core/knowledge/references/web/WebReferenceProcessor.js +164 -0
package/dist/core/knowledge/references/web/WebReferenceProcessor.js.map +1 -0
package/dist/core/knowledge/search/KnowledgeGraphSearch.js +261 -0
package/dist/core/knowledge/search/KnowledgeGraphSearch.js.map +1 -0
package/dist/core/knowledge/vocabulary.js +162 -0
package/dist/core/knowledge/vocabulary.js.map +1 -0
package/dist/core/llm/EmbeddingService.js +113 -0
package/dist/core/llm/EmbeddingService.js.map +1 -0
package/dist/core/llm/OllamaService.js +146 -0
package/dist/core/llm/OllamaService.js.map +1 -0
package/dist/core/llm/OpenAICompatibleService.js +190 -0
package/dist/core/llm/OpenAICompatibleService.js.map +1 -0
package/dist/core/llm/OpenAIEmbeddingService.js +129 -0
package/dist/core/llm/OpenAIEmbeddingService.js.map +1 -0
package/dist/core/llm/embeddingUtils.js +25 -0
package/dist/core/llm/embeddingUtils.js.map +1 -0
package/dist/core/llm/index.js +23 -0
package/dist/core/llm/index.js.map +1 -0
package/dist/core/llm/prompts/PromptManager.js +388 -0
package/dist/core/llm/prompts/PromptManager.js.map +1 -0
package/dist/core/llm/prompts/PromptTemplateEngine.js +257 -0
package/dist/core/llm/prompts/PromptTemplateEngine.js.map +1 -0
package/dist/core/llm/prompts/templates/partials/examples/EXAMPLE_STYLE_GUIDE.md +84 -0
package/dist/core/llm/prompts/templates/partials/examples/article.md +187 -0
package/dist/core/llm/prompts/templates/partials/examples/code.md +229 -0
package/dist/core/llm/prompts/templates/partials/examples/communication.md +205 -0
package/dist/core/llm/prompts/templates/partials/examples/documentation.md +262 -0
package/dist/core/llm/prompts/templates/partials/examples/financial.md +157 -0
package/dist/core/llm/prompts/templates/partials/examples/legal.md +153 -0
package/dist/core/llm/prompts/templates/partials/examples/logs.md +127 -0
package/dist/core/llm/prompts/templates/partials/examples/medical.md +218 -0
package/dist/core/llm/prompts/templates/partials/examples/notes.md +201 -0
package/dist/core/llm/prompts/templates/partials/examples/research.md +208 -0
package/dist/core/llm/prompts/templates/partials/examples/tabular.md +178 -0
package/dist/core/llm/prompts/templates/partials/examples/transcript.md +204 -0
package/dist/core/llm/prompts/templates/partials/retrieved-context.hbs +18 -0
package/dist/core/llm/prompts/templates/v1/system.hbs +371 -0
package/dist/core/llm/prompts/templates/v1/user.hbs +20 -0
package/dist/core/llm/prompts/templates/v2/system.hbs +573 -0
package/dist/core/llm/prompts/templates/v2/user.hbs +20 -0
package/dist/core/llm/prompts/templates/v3/system.hbs +861 -0
package/dist/core/llm/prompts/templates/v3/user.hbs +16 -0
package/dist/core/llm/prompts/templates/v4/system.hbs +800 -0
package/dist/core/llm/prompts/templates/v4/user.hbs +40 -0
package/dist/core/llm/prompts/templates/v4.5/system.hbs +71 -0
package/dist/core/llm/prompts/templates/v4.5/user.hbs +46 -0
package/dist/core/llm/prompts/templates/v5/glossary/system.hbs +40 -0
package/dist/core/llm/prompts/templates/v5/glossary/user.hbs +11 -0
package/dist/core/llm/prompts/templates/v5/system.hbs +163 -0
package/dist/core/llm/prompts/templates/v5/user.hbs +55 -0
package/dist/core/pipeline/GroundingTransform.js +52 -0
package/dist/core/pipeline/GroundingTransform.js.map +1 -0
package/dist/core/pipeline/PipelineRunner.js +51 -0
package/dist/core/pipeline/PipelineRunner.js.map +1 -0
package/dist/core/pipeline/RelationFilterTransform.js +72 -0
package/dist/core/pipeline/RelationFilterTransform.js.map +1 -0
package/dist/core/pipeline/index.js +20 -0
package/dist/core/pipeline/index.js.map +1 -0
package/dist/core/processor/FileProcessor.js +184 -0
package/dist/core/processor/FileProcessor.js.map +1 -0
package/dist/core/processor/ProcessedRegistry.js +38 -0
package/dist/core/processor/ProcessedRegistry.js.map +1 -0
package/dist/core/processor/ast/AstSeedService.js +0 -0
package/dist/core/processor/ast/AstSeedService.js.map +1 -0
package/dist/core/processor/ast/AstSymbolStore.js +110 -0
package/dist/core/processor/ast/AstSymbolStore.js.map +1 -0
package/dist/core/processor/ast/index.js +19 -0
package/dist/core/processor/ast/index.js.map +1 -0
package/dist/core/processor/chunking/TextChunker.js +98 -0
package/dist/core/processor/chunking/TextChunker.js.map +1 -0
package/dist/core/processor/chunking/index.js +18 -0
package/dist/core/processor/chunking/index.js.map +1 -0
package/dist/core/processor/classifier/CONTENT_CLASSES.js +294 -0
package/dist/core/processor/classifier/CONTENT_CLASSES.js.map +1 -0
package/dist/core/processor/classifier/CascadeContentClassifier.js +107 -0
package/dist/core/processor/classifier/CascadeContentClassifier.js.map +1 -0
package/dist/core/processor/classifier/HeuristicContentClassifier.js +113 -0
package/dist/core/processor/classifier/HeuristicContentClassifier.js.map +1 -0
package/dist/core/processor/classifier/IContentTypeClassifier.js +3 -0
package/dist/core/processor/classifier/IContentTypeClassifier.js.map +1 -0
package/dist/core/processor/classifier/LlmContentClassifier.js +107 -0
package/dist/core/processor/classifier/LlmContentClassifier.js.map +1 -0
package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js +498 -0
package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js.map +1 -0
package/dist/core/processor/classifier/index.js +21 -0
package/dist/core/processor/classifier/index.js.map +1 -0
package/dist/core/processor/classifier/mergeClassifications.js +32 -0
package/dist/core/processor/classifier/mergeClassifications.js.map +1 -0
package/dist/core/processor/index.js +20 -0
package/dist/core/processor/index.js.map +1 -0
package/dist/core/processor/readers/AudioReader.js +462 -0
package/dist/core/processor/readers/AudioReader.js.map +1 -0
package/dist/core/processor/readers/BinaryReader.js +90 -0
package/dist/core/processor/readers/BinaryReader.js.map +1 -0
package/dist/core/processor/readers/ChandraPdfReader.js +187 -0
package/dist/core/processor/readers/ChandraPdfReader.js.map +1 -0
package/dist/core/processor/readers/ChatExportReader.js +365 -0
package/dist/core/processor/readers/ChatExportReader.js.map +1 -0
package/dist/core/processor/readers/DoclingReader.js +445 -0
package/dist/core/processor/readers/DoclingReader.js.map +1 -0
package/dist/core/processor/readers/EmailReader.js +259 -0
package/dist/core/processor/readers/EmailReader.js.map +1 -0
package/dist/core/processor/readers/EpubReader.js +175 -0
package/dist/core/processor/readers/EpubReader.js.map +1 -0
package/dist/core/processor/readers/FileReader.js +90 -0
package/dist/core/processor/readers/FileReader.js.map +1 -0
package/dist/core/processor/readers/FileReaderFactory.js +49 -0
package/dist/core/processor/readers/FileReaderFactory.js.map +1 -0
package/dist/core/processor/readers/HtmlReader.js +371 -0
package/dist/core/processor/readers/HtmlReader.js.map +1 -0
package/dist/core/processor/readers/ImageReader.js +162 -0
package/dist/core/processor/readers/ImageReader.js.map +1 -0
package/dist/core/processor/readers/JsonFileReader.js +232 -0
package/dist/core/processor/readers/JsonFileReader.js.map +1 -0
package/dist/core/processor/readers/JupyterReader.js +178 -0
package/dist/core/processor/readers/JupyterReader.js.map +1 -0
package/dist/core/processor/readers/LatexReader.js +176 -0
package/dist/core/processor/readers/LatexReader.js.map +1 -0
package/dist/core/processor/readers/MarkdownReader.js +289 -0
package/dist/core/processor/readers/MarkdownReader.js.map +1 -0
package/dist/core/processor/readers/MarkerPdfReader.js +193 -0
package/dist/core/processor/readers/MarkerPdfReader.js.map +1 -0
package/dist/core/processor/readers/MistralOcrReader.js +198 -0
package/dist/core/processor/readers/MistralOcrReader.js.map +1 -0
package/dist/core/processor/readers/OfficeReader.js +174 -0
package/dist/core/processor/readers/OfficeReader.js.map +1 -0
package/dist/core/processor/readers/PdfReader.js +116 -0
package/dist/core/processor/readers/PdfReader.js.map +1 -0
package/dist/core/processor/readers/RtfReader.js +107 -0
package/dist/core/processor/readers/RtfReader.js.map +1 -0
package/dist/core/processor/readers/SubtitleReader.js +145 -0
package/dist/core/processor/readers/SubtitleReader.js.map +1 -0
package/dist/core/processor/readers/TesseractPdfReader.js +183 -0
package/dist/core/processor/readers/TesseractPdfReader.js.map +1 -0
package/dist/core/processor/readers/TextReader.js +129 -0
package/dist/core/processor/readers/TextReader.js.map +1 -0
package/dist/core/processor/readers/TranscriptReader.js +234 -0
package/dist/core/processor/readers/TranscriptReader.js.map +1 -0
package/dist/core/processor/readers/image/imageMetadata.js +155 -0
package/dist/core/processor/readers/image/imageMetadata.js.map +1 -0
package/dist/core/processor/readers/index.js +41 -0
package/dist/core/processor/readers/index.js.map +1 -0
package/dist/core/processor/readers/referenceExtraction.js +198 -0
package/dist/core/processor/readers/referenceExtraction.js.map +1 -0
package/dist/core/processor/readers/stripReferences.js +59 -0
package/dist/core/processor/readers/stripReferences.js.map +1 -0
package/dist/core/processor/readers/transcript/turnPacking.js +81 -0
package/dist/core/processor/readers/transcript/turnPacking.js.map +1 -0
package/dist/core/progress/NdjsonProgressEmitter.js +30 -0
package/dist/core/progress/NdjsonProgressEmitter.js.map +1 -0
package/dist/core/progress/NoopProgressEmitter.js +15 -0
package/dist/core/progress/NoopProgressEmitter.js.map +1 -0
package/dist/core/progress/index.js +19 -0
package/dist/core/progress/index.js.map +1 -0
package/dist/core/trace/TraceWriter.js +100 -0
package/dist/core/trace/TraceWriter.js.map +1 -0
package/dist/core/trace/events.js +13 -0
package/dist/core/trace/events.js.map +1 -0
package/dist/core/trace/index.js +20 -0
package/dist/core/trace/index.js.map +1 -0
package/dist/core/trace/lineage.js +97 -0
package/dist/core/trace/lineage.js.map +1 -0
package/dist/evaluation/BenchmarkRunner.js +171 -0
package/dist/evaluation/BenchmarkRunner.js.map +1 -0
package/dist/evaluation/classifier/ClassifierAccuracy.js +185 -0
package/dist/evaluation/classifier/ClassifierAccuracy.js.map +1 -0
package/dist/evaluation/classifier/labeledSamples.js +379 -0
package/dist/evaluation/classifier/labeledSamples.js.map +1 -0
package/dist/evaluation/compare/goldCompare.js +126 -0
package/dist/evaluation/compare/goldCompare.js.map +1 -0
package/dist/evaluation/crossre/compareScoring.js +30 -0
package/dist/evaluation/crossre/compareScoring.js.map +1 -0
package/dist/evaluation/datasets/CrossREDataset.js +170 -0
package/dist/evaluation/datasets/CrossREDataset.js.map +1 -0
package/dist/evaluation/datasets/IDataset.js +3 -0
package/dist/evaluation/datasets/IDataset.js.map +1 -0
package/dist/evaluation/datasets/RebelDataset.js +117 -0
package/dist/evaluation/datasets/RebelDataset.js.map +1 -0
package/dist/evaluation/datasets/RedocredDataset.js +218 -0
package/dist/evaluation/datasets/RedocredDataset.js.map +1 -0
package/dist/evaluation/datasets/SemEval2010Dataset.js +150 -0
package/dist/evaluation/datasets/SemEval2010Dataset.js.map +1 -0
package/dist/evaluation/index.js +33 -0
package/dist/evaluation/index.js.map +1 -0
package/dist/evaluation/matching/ExactMatcher.js +75 -0
package/dist/evaluation/matching/ExactMatcher.js.map +1 -0
package/dist/evaluation/matching/SemanticMatcher.js +143 -0
package/dist/evaluation/matching/SemanticMatcher.js.map +1 -0
package/dist/evaluation/metrics/TripleMetrics.js +64 -0
package/dist/evaluation/metrics/TripleMetrics.js.map +1 -0
package/dist/evaluation/mine/MineCheckpoint.js +114 -0
package/dist/evaluation/mine/MineCheckpoint.js.map +1 -0
package/dist/evaluation/mine/MineDataset.js +208 -0
package/dist/evaluation/mine/MineDataset.js.map +1 -0
package/dist/evaluation/mine/MineReporter.js +98 -0
package/dist/evaluation/mine/MineReporter.js.map +1 -0
package/dist/evaluation/mine/MineRunner.js +148 -0
package/dist/evaluation/mine/MineRunner.js.map +1 -0
package/dist/evaluation/mine/MineScorer.js +127 -0
package/dist/evaluation/mine/MineScorer.js.map +1 -0
package/dist/evaluation/mine/types.js +12 -0
package/dist/evaluation/mine/types.js.map +1 -0
package/dist/evaluation/reporters/ConsoleReporter.js +55 -0
package/dist/evaluation/reporters/ConsoleReporter.js.map +1 -0
package/dist/evaluation/reporters/JsonReporter.js +50 -0
package/dist/evaluation/reporters/JsonReporter.js.map +1 -0
package/dist/index.js +28 -0
package/dist/index.js.map +1 -0
package/dist/quality/CompositeScore.js +61 -0
package/dist/quality/CompositeScore.js.map +1 -0
package/dist/quality/ConsistencyMetrics.js +70 -0
package/dist/quality/ConsistencyMetrics.js.map +1 -0
package/dist/quality/FactualMetrics.js +76 -0
package/dist/quality/FactualMetrics.js.map +1 -0
package/dist/quality/GraphHealthMetrics.js +68 -0
package/dist/quality/GraphHealthMetrics.js.map +1 -0
package/dist/quality/SemanticMetrics.js +102 -0
package/dist/quality/SemanticMetrics.js.map +1 -0
package/dist/quality/StructuralMetrics.js +60 -0
package/dist/quality/StructuralMetrics.js.map +1 -0
package/dist/quality/index.js +23 -0
package/dist/quality/index.js.map +1 -0
package/dist/shared/index.js +20 -0
package/dist/shared/index.js.map +1 -0
package/dist/shared/logger/Logger.js +3 -0
package/dist/shared/logger/Logger.js.map +1 -0
package/dist/shared/logger/LoggerFactory.js +75 -0
package/dist/shared/logger/LoggerFactory.js.map +1 -0
package/dist/shared/logger/index.js +19 -0
package/dist/shared/logger/index.js.map +1 -0
package/dist/shared/shutdown.js +30 -0
package/dist/shared/shutdown.js.map +1 -0
package/dist/shared/utils/agglomerativeCluster.js +269 -0
package/dist/shared/utils/agglomerativeCluster.js.map +1 -0
package/dist/shared/utils/astSymbols.js +69 -0
package/dist/shared/utils/astSymbols.js.map +1 -0
package/dist/shared/utils/cosineSimilarity.js +18 -0
package/dist/shared/utils/cosineSimilarity.js.map +1 -0
package/dist/shared/utils/directoryTree.js +184 -0
package/dist/shared/utils/directoryTree.js.map +1 -0
package/dist/shared/utils/documentOutline.js +74 -0
package/dist/shared/utils/documentOutline.js.map +1 -0
package/dist/shared/utils/index.js +24 -0
package/dist/shared/utils/index.js.map +1 -0
package/dist/shared/utils/jaroWinklerSimilarity.js +60 -0
package/dist/shared/utils/jaroWinklerSimilarity.js.map +1 -0
package/dist/shared/utils/parseJsonLenient.js +27 -0
package/dist/shared/utils/parseJsonLenient.js.map +1 -0
package/dist/shared/utils/readConfig.js +42 -0
package/dist/shared/utils/readConfig.js.map +1 -0
package/dist/shared/utils/readRtf.js +216 -0
package/dist/shared/utils/readRtf.js.map +1 -0
package/dist/shared/utils/softmax.js +26 -0
package/dist/shared/utils/softmax.js.map +1 -0
package/dist/types/ContentClass.js +3 -0
package/dist/types/ContentClass.js.map +1 -0
package/dist/types/CorpusProfile.js +3 -0
package/dist/types/CorpusProfile.js.map +1 -0
package/dist/types/IContradictionChecker.js +3 -0
package/dist/types/IContradictionChecker.js.map +1 -0
package/dist/types/ICorpusAnalyzer.js +3 -0
package/dist/types/ICorpusAnalyzer.js.map +1 -0
package/dist/types/IDirectoryProcessor.js +3 -0
package/dist/types/IDirectoryProcessor.js.map +1 -0
package/dist/types/IEmbeddingProvider.js +3 -0
package/dist/types/IEmbeddingProvider.js.map +1 -0
package/dist/types/IEmbeddingService.js +6 -0
package/dist/types/IEmbeddingService.js.map +1 -0
package/dist/types/IFileProcessor.js +3 -0
package/dist/types/IFileProcessor.js.map +1 -0
package/dist/types/IGroundingChecker.js +3 -0
package/dist/types/IGroundingChecker.js.map +1 -0
package/dist/types/IKnowledgeGraphBuilder.js +3 -0
package/dist/types/IKnowledgeGraphBuilder.js.map +1 -0
package/dist/types/IKnowledgeGraphExporter.js +3 -0
package/dist/types/IKnowledgeGraphExporter.js.map +1 -0
package/dist/types/IKnowledgeGraphMerger.js +3 -0
package/dist/types/IKnowledgeGraphMerger.js.map +1 -0
package/dist/types/IKnowledgeGraphSearch.js +3 -0
package/dist/types/IKnowledgeGraphSearch.js.map +1 -0
package/dist/types/ILLMProvider.js +3 -0
package/dist/types/ILLMProvider.js.map +1 -0
package/dist/types/ILLMService.js +3 -0
package/dist/types/ILLMService.js.map +1 -0
package/dist/types/IObjectDetector.js +3 -0
package/dist/types/IObjectDetector.js.map +1 -0
package/dist/types/IProcessingService.js +3 -0
package/dist/types/IProcessingService.js.map +1 -0
package/dist/types/IProgressEmitter.js +3 -0
package/dist/types/IProgressEmitter.js.map +1 -0
package/dist/types/IPromptManager.js +3 -0
package/dist/types/IPromptManager.js.map +1 -0
package/dist/types/KnowledgeGraph.js +3 -0
package/dist/types/KnowledgeGraph.js.map +1 -0
package/dist/types/MCPKnowledgeGraph.js +3 -0
package/dist/types/MCPKnowledgeGraph.js.map +1 -0
package/dist/types/Observation.js +21 -0
package/dist/types/Observation.js.map +1 -0
package/dist/types/ProcessingOptions.js +3 -0
package/dist/types/ProcessingOptions.js.map +1 -0
package/dist/types/index.js +40 -0
package/dist/types/index.js.map +1 -0
package/package.json +122 -0

package/dist/core/knowledge/KnowledgeGraphBuilder.js ADDED Viewed

@@ -0,0 +1,627 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.KnowledgeGraphBuilder = void 0;
+exports.buildGraphSchema = buildGraphSchema;
+const path = __importStar(require("path"));
+const crypto = __importStar(require("crypto"));
+const zod_1 = require("zod");
+const types_1 = require("../../types");
+const progress_1 = require("../progress");
+const vocabulary_1 = require("./vocabulary");
+const grounding_1 = require("./grounding");
+const shared_1 = require("../../shared");
+const trace_1 = require("../trace");
+/**
+ * Build the extraction schema. Under v5 both vocabularies are *closed*: when an
+ * allowed set is supplied, the field is a Zod enum; `entityType` falls back to the
+ * base set + `other`, `relationType` to the base set + `related_to`, so the model
+ * can never invent a one-off type/predicate. When a set is empty the field stays a
+ * free string (legacy behavior, e.g. older prompt versions).
+ *
+ * **Lenient coercion (recall guard):** the enum is wrapped in `.catch(escape)`, so an
+ * out-of-vocab value the model emits anyway (e.g. `relationType: "returns"`, which
+ * Ollama's soft `format` constraint doesn't reliably prevent) is coerced onto the
+ * catch-all (`other` / `related_to`) **per field** instead of failing Zod and
+ * discarding the *entire chunk* (3 retries → empty graph). This is the escapes'
+ * intended purpose ("prevent validation-failure recall loss"); coerced values surface
+ * in `KnowledgeMerger.logVocabularyFit`'s catch-all fraction (the too-tight-vocab
+ * signal), so nothing goes silent.
+ */
+function buildGraphSchema(allowedTypes, allowedRelationTypes) {
+    const hasTypes = !!allowedTypes && allowedTypes.length > 0;
+    const hasRel = !!allowedRelationTypes && allowedRelationTypes.length > 0;
+    const entityEscape = hasTypes ? (allowedTypes.includes("other") ? "other" : allowedTypes[0]) : "other";
+    const relEscape = hasRel
+        ? allowedRelationTypes.includes("related_to")
+            ? "related_to"
+            : allowedRelationTypes[0]
+        : "related_to";
+    const entityType = hasTypes
+        ? zod_1.z
+            .enum(allowedTypes)
+            .catch(entityEscape)
+            .describe("Entity type — pick the closest; use 'other' if none fit")
+        : zod_1.z.string().describe("Entity description");
+    // v5's prompt asks for "one canonical predicate", so instruction-following models
+    // (e.g. gemma4) emit relationType as a scalar string ("depends_on") rather than a
+    // one-element array. Coerce scalar → [scalar] before validating so a compliant model
+    // isn't rejected; the array path is unchanged.
+    const toRelationArray = (v) => (Array.isArray(v) ? v : v == null ? [] : [v]);
+    const relationType = hasRel
+        ? zod_1.z
+            .preprocess(toRelationArray, zod_1.z.array(zod_1.z.enum(allowedRelationTypes).catch(relEscape)))
+            .describe("One canonical predicate; use 'related_to' if none fit")
+        : zod_1.z.preprocess(toRelationArray, zod_1.z.array(zod_1.z.string())).describe("List of relation types");
+    return zod_1.z.object({
+        entities: zod_1.z.array(zod_1.z.object({
+            name: zod_1.z.string().describe("Unique entity name"),
+            entityType,
+            // Models often emit referenced-but-undescribed entities with no observations
+            // field at all; default to [] so a missing array doesn't reject the whole chunk
+            // (and so observations drops out of the JSON-schema `required` list).
+            observations: zod_1.z
+                .array(zod_1.z.string())
+                .default([])
+                .describe("List of facts and observations about entity"),
+        })),
+        relations: zod_1.z.array(zod_1.z.object({
+            from: zod_1.z.string().describe("Relation source entity"),
+            to: zod_1.z.string().describe("Relation target entity"),
+            relationType,
+        })),
+    });
+}
+const DEFAULT_GRAPH_SCHEMA = buildGraphSchema();
+/**
+ * Builds knowledge graphs from processed files using LLM
+ */
+class KnowledgeGraphBuilder {
+    constructor(options, logger) {
+        var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
+        /** Chunks whose extraction threw this run — left uncheckpointed (KG-02). */
+        this.failedChunks = [];
+        /** Claims the grounding gate rejected this run (WI3 manifest trace). */
+        this.groundingRejections = [];
+        this.llmService = options.llmService;
+        this.promptManager = options.promptManager;
+        this.checkpoint = options.checkpoint;
+        this.resume = (_a = options.resume) !== null && _a !== void 0 ? _a : false;
+        this.model = options.model;
+        this.promptVersion = (_b = options.promptVersion) !== null && _b !== void 0 ? _b : 'default';
+        this.inputRoot = (_c = options.inputRoot) !== null && _c !== void 0 ? _c : '';
+        this.logger = logger;
+        this.progress = (_d = options.progress) !== null && _d !== void 0 ? _d : new progress_1.NoopProgressEmitter();
+        this.grounding = (_e = options.grounding) !== null && _e !== void 0 ? _e : 'disabled';
+        this.groundingMinScore = (_f = options.groundingMinScore) !== null && _f !== void 0 ? _f : 0.5;
+        this.groundingChecker =
+            (_g = options.groundingChecker) !== null && _g !== void 0 ? _g : new grounding_1.KeywordGroundingChecker(this.groundingMinScore);
+        this.groundingSignature = (_h = options.groundingSignature) !== null && _h !== void 0 ? _h : '';
+        this.attachSourceSpans = (_j = options.attachSourceSpans) !== null && _j !== void 0 ? _j : false;
+        this.openPredicate = (_k = options.openPredicate) !== null && _k !== void 0 ? _k : false;
+        this.strictVocabulary = (_l = options.strictVocabulary) !== null && _l !== void 0 ? _l : false;
+    }
+    /** Chunks whose extraction failed this run (empty when all succeeded). */
+    getFailedChunks() {
+        return this.failedChunks;
+    }
+    /** Claims the inline grounding gate rejected this run (empty when none/off). */
+    getGroundingRejections() {
+        return this.groundingRejections;
+    }
+    /**
+     * Stable identity for a file in the checkpoint key: the path relative to the
+     * discovery root (`inputRoot`), normalized to posix separators. This makes
+     * resume survive relocating the whole input tree or changing the `input`
+     * prefix. Falls back to the raw path when there's no root or the file resolves
+     * outside it (`..`), so behavior degrades gracefully rather than mis-keying.
+     */
+    stablePathId(filePath) {
+        if (!this.inputRoot)
+            return filePath;
+        const rel = path.relative(this.inputRoot, filePath);
+        if (!rel || rel.startsWith('..') || path.isAbsolute(rel))
+            return filePath;
+        return rel.split(path.sep).join('/');
+    }
+    /**
+     * The *deterministic* extraction inputs other than the chunk's own text (KG-07),
+     * folded into the checkpoint key's `extra` so toggling any of them between
+     * `--resume` runs re-extracts the affected chunks instead of silently reusing a
+     * graph built under different settings: the grounding signature (Phase 5), the
+     * rendered system prompt (which already encodes the resolved entity/relation
+     * vocabulary + domain examples → the "schema shape"), the corpus glossary, and
+     * the classifier classes.
+     *
+     * Deliberately EXCLUDES the chunk's retrieved context: retrieval pulls from the
+     * graph built by *prior* (temperature>0, non-deterministic) extractions, so it
+     * differs on every run. Folding it into the key made the key unstable across runs
+     * and defeated `--resume` entirely whenever retrieval was on (the default) — a
+     * re-run after a crash matched nothing and re-extracted (and re-billed) every
+     * chunk. The key must hash deterministic *inputs*, never volatile *outputs*.
+     */
+    extractionExtra(systemPrompt, glossary, contentClasses) {
+        const h = crypto.createHash('sha1');
+        for (const part of [
+            this.groundingSignature,
+            systemPrompt,
+            glossary ? JSON.stringify(glossary) : '',
+            contentClasses ? JSON.stringify(contentClasses) : '',
+        ]) {
+            h.update(part);
+            h.update('\x00');
+        }
+        return h.digest('hex');
+    }
+    /**
+     * Build a knowledge graph from a processed file
+     */
+    build(processedFile, systemPrompt, retrieve, glossary) {
+        return __awaiter(this, void 0, void 0, function* () {
+            var _a, _b, _c;
+            this.logger.info(`Building knowledge graph for: ${processedFile.path}`);
+            const graphs = [];
+            const contentClasses = (_a = processedFile.metadata) === null || _a === void 0 ? void 0 : _a.classes;
+            const multiChunk = processedFile.chunks.length > 1;
+            // Process chunks if available
+            if (multiChunk) {
+                for (const chunk of processedFile.chunks) {
+                    // Cooperative interrupt: finish the in-flight chunk, then stop before
+                    // starting the next one so a partial graph can be flushed.
+                    if (shared_1.shutdown.isRequested()) {
+                        this.logger.warn(`Interrupted — stopping at chunk ${chunk.index}/${chunk.totalChunks} of ${processedFile.path}`);
+                        break;
+                    }
+                    // Retrieve context for THIS chunk's content (per-chunk retrieval).
+                    const retrievedContext = retrieve ? yield retrieve(chunk.content) : undefined;
+                    const kg = yield this.buildChunk(processedFile.path, chunk.index, chunk.totalChunks, chunk.content, this.chunkProvenance(processedFile, chunk), () => {
+                        var _a;
+                        return this.buildFromChunk(processedFile.path, chunk.content, (_a = processedFile.content) !== null && _a !== void 0 ? _a : '', // full file text → outline + grounding
+                        systemPrompt, chunk.index, chunk.totalChunks, retrievedContext, chunk.images, contentClasses, glossary);
+                    }, (entity) => {
+                        entity.files = [processedFile.path];
+                        entity.chunk = chunk.index;
+                        entity.totalChunks = chunk.totalChunks;
+                    }, this.extractionExtra(systemPrompt, glossary, contentClasses));
+                    graphs.push(kg);
+                }
+            }
+            else if (processedFile.chunks.length === 1) {
+                const chunk = processedFile.chunks[0];
+                const { content, images } = chunk;
+                const retrievedContext = retrieve ? yield retrieve(content) : undefined;
+                // Process entire file
+                const kg = yield this.buildChunk(processedFile.path, (_b = chunk.index) !== null && _b !== void 0 ? _b : 1, (_c = chunk.totalChunks) !== null && _c !== void 0 ? _c : 1, content, this.chunkProvenance(processedFile, chunk), () => this.buildFromContent(processedFile.path, content, systemPrompt, retrievedContext, images, contentClasses, glossary), (entity) => {
+                    entity.files = [processedFile.path];
+                }, this.extractionExtra(systemPrompt, glossary, contentClasses));
+                graphs.push(kg);
+            }
+            // Pin ingest-time document identity (reader metadata) as its own entity.
+            // Never trusted to extraction: body text is full of OTHER papers' IDs, and a
+            // cited paper's arXiv ID binding onto the host document is the worst-case
+            // provenance failure.
+            const identity = this.documentIdentityGraph(processedFile);
+            if (identity)
+                graphs.push(identity);
+            return graphs;
+        });
+    }
+    /** Build the pinned `document` entity from reader-supplied identity metadata. */
+    documentIdentityGraph(processedFile) {
+        var _a, _b;
+        const arxivId = (_a = processedFile.metadata) === null || _a === void 0 ? void 0 : _a.arxivId;
+        const title = (_b = processedFile.metadata) === null || _b === void 0 ? void 0 : _b.title;
+        if (!arxivId && !title)
+            return null;
+        const createdAt = new Date().toISOString();
+        const observations = [];
+        if (title) {
+            observations.push({ text: `Title: ${title}`, source: processedFile.path, createdAt });
+        }
+        if (arxivId) {
+            observations.push({ text: `arXiv:${arxivId}`, source: processedFile.path, createdAt });
+        }
+        const name = title !== null && title !== void 0 ? title : path.basename(processedFile.path);
+        this.logger.info(`Pinned document identity for ${processedFile.path}: ${name}`);
+        return {
+            entities: [
+                {
+                    name,
+                    entityType: "document",
+                    files: [processedFile.path],
+                    observations,
+                },
+            ],
+            relations: [],
+        };
+    }
+    /**
+     * Run one chunk through the LLM, or restore it from the checkpoint when
+     * resuming. Stored graphs already carry their entity metadata, so on a hit
+     * we skip the LLM call entirely.
+     */
+    buildChunk(filePath, chunkIndex, totalChunks, content, provenance, generate, attachMetadata, extractionExtra) {
+        return __awaiter(this, void 0, void 0, function* () {
+            var _a, _b;
+            this.progress.emit({
+                type: "chunk_start",
+                path: filePath,
+                chunk: chunkIndex,
+                totalChunks,
+            });
+            const relPath = this.stablePathId(filePath);
+            const chunkId = `${relPath}#${chunkIndex}`;
+            const extractionId = `${chunkId}@0`;
+            const key = this.resume && this.checkpoint
+                ? this.checkpoint.computeKey(relPath, chunkIndex, content, this.model, this.promptVersion, extractionExtra)
+                : undefined;
+            if (key && this.checkpoint.has(key)) {
+                this.logger.info(`Skipping cached chunk ${chunkIndex}/${totalChunks} of ${filePath} (checkpoint hit)`);
+                const cached = this.normalizeGraph(this.checkpoint.get(key));
+                this.progress.emit({
+                    type: "chunk_complete",
+                    path: filePath,
+                    chunk: chunkIndex,
+                    totalChunks,
+                    entities: cached.entities.length,
+                    relations: cached.relations.length,
+                    cached: true,
+                });
+                // Mint/register the cached chunk's mentions too so lineage works on resume.
+                this.traceExtraction(cached, { extractionId, chunkId, filePath, chunkIndex, checkpointHit: true });
+                return cached;
+            }
+            let raw;
+            try {
+                raw = yield generate();
+            }
+            catch (error) {
+                // Extraction threw (retries exhausted, truncation, network/credits).
+                // Record it and return an empty graph WITHOUT checkpointing, so the chunk
+                // is retried on the next --resume rather than cached as done-and-empty.
+                const message = error instanceof Error ? error.message : String(error);
+                this.logger.error(`Extraction failed for chunk ${chunkIndex}/${totalChunks} of ${filePath} ` +
+                    `— left uncheckpointed so --resume retries it: ${message}`);
+                this.failedChunks.push({ filePath, chunkIndex, totalChunks, error: message });
+                this.progress.emit({
+                    type: "chunk_failed",
+                    path: filePath,
+                    chunk: chunkIndex,
+                    totalChunks,
+                    error: message,
+                });
+                this.traceExtraction({ entities: [], relations: [] }, { extractionId, chunkId, filePath, chunkIndex, checkpointHit: false, failed: true, error: message });
+                return { entities: [], relations: [] };
+            }
+            const usage = (_b = (_a = this.llmService).getLastUsage) === null || _b === void 0 ? void 0 : _b.call(_a);
+            const graph0 = this.toGraph(raw, provenance, content);
+            // Register mention IDs (pre-grounding) + emit the extraction event. Mention IDs
+            // are derived deterministically from content, so grounding can reference them
+            // without anything being stored on the graph objects (observe-only).
+            this.traceExtraction(graph0, { extractionId, chunkId, filePath, chunkIndex, checkpointHit: false, usage });
+            const kg = yield this.applyGroundingGate(graph0, content, filePath, chunkIndex, extractionId);
+            kg.entities.forEach(attachMetadata);
+            this.progress.emit({
+                type: "chunk_complete",
+                path: filePath,
+                chunk: chunkIndex,
+                totalChunks,
+                entities: kg.entities.length,
+                relations: kg.relations.length,
+                cached: false,
+            });
+            if (key) {
+                yield this.checkpoint.append({
+                    key,
+                    filePath,
+                    relPath,
+                    chunkIndex,
+                    totalChunks,
+                    model: this.model,
+                    promptVersion: this.promptVersion,
+                    kg,
+                });
+            }
+            return kg;
+        });
+    }
+    /**
+     * Scope the entity-type enum: the active content domain's `primaryEntityTypes`
+     * ∪ corpus-glossary entity types ∪ base set ∪ `other`. Delegates to the shared
+     * {@link allowedEntityTypes} so the enum and the prompt hints derive from one
+     * source. Always closed — with no class and no glossary it still returns the
+     * base set (+`other`), so `entityType` is an enforced enum even on an
+     * un-profiled, un-classified run.
+     */
+    resolveAllowedTypes(contentClasses, glossary) {
+        var _a, _b;
+        // Open-predicate: no enum at all → buildGraphSchema falls to free `z.string()`.
+        if (this.openPredicate)
+            return undefined;
+        // Strict: a supplied glossary REPLACES the base/domain sets (exact ontology).
+        if (this.strictVocabulary && ((_a = glossary === null || glossary === void 0 ? void 0 : glossary.entityTypes) === null || _a === void 0 ? void 0 : _a.length)) {
+            return Array.from(new Set([...glossary.entityTypes, vocabulary_1.ENTITY_TYPE_ESCAPE]));
+        }
+        return (0, vocabulary_1.allowedEntityTypes)(contentClasses, (_b = glossary === null || glossary === void 0 ? void 0 : glossary.entityTypes) !== null && _b !== void 0 ? _b : []);
+    }
+    /**
+     * Scope the relation-predicate enum: the active domain's `primaryRelationTypes`
+     * ∪ corpus-glossary relation types ∪ base set ∪ `related_to`. Delegates to the
+     * shared {@link allowedRelationTypes}. Unlike the pre-Phase-2 resolver this
+     * passes `contentClasses`, so the domain predicates the hints/examples teach are
+     * actually emittable (KG-05) instead of triggering ZodError → empty graph.
+     */
+    resolveAllowedRelationTypes(contentClasses, glossary) {
+        var _a, _b;
+        // Open-predicate: no enum at all → buildGraphSchema falls to free `z.string()`.
+        if (this.openPredicate)
+            return undefined;
+        // Strict: a supplied glossary REPLACES the base/domain sets (exact ontology).
+        if (this.strictVocabulary && ((_a = glossary === null || glossary === void 0 ? void 0 : glossary.relationTypes) === null || _a === void 0 ? void 0 : _a.length)) {
+            return Array.from(new Set([...glossary.relationTypes, vocabulary_1.RELATION_TYPE_ESCAPE]));
+        }
+        return (0, vocabulary_1.allowedRelationTypes)(contentClasses, (_b = glossary === null || glossary === void 0 ? void 0 : glossary.relationTypes) !== null && _b !== void 0 ? _b : []);
+    }
+    /** Provenance to stamp on a chunk's observations (reader-supplied or file). */
+    chunkProvenance(processedFile, chunk) {
+        var _a, _b, _c, _d, _e, _f;
+        return {
+            speaker: (_a = chunk.provenance) === null || _a === void 0 ? void 0 : _a.speaker,
+            source: (_c = (_b = chunk.provenance) === null || _b === void 0 ? void 0 : _b.source) !== null && _c !== void 0 ? _c : processedFile.path,
+            occurredAt: (_d = chunk.provenance) === null || _d === void 0 ? void 0 : _d.occurredAt,
+            sourceAdapter: (_e = chunk.provenance) === null || _e === void 0 ? void 0 : _e.sourceAdapter,
+            locator: (_f = chunk.provenance) === null || _f === void 0 ? void 0 : _f.locator,
+        };
+    }
+    /**
+     * Convert the LLM's raw graph (bare-string observations) into the domain
+     * graph, stamping each observation with the chunk's provenance + transaction
+     * time. Grounding is deterministic — we attach what we already know rather
+     * than asking the model for it.
+     */
+    toGraph(raw, provenance, content) {
+        const createdAt = new Date().toISOString();
+        return {
+            entities: raw.entities.map((e) => ({
+                name: e.name,
+                entityType: e.entityType,
+                files: [],
+                observations: e.observations.map((text) => (Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign({ text }, (provenance.speaker ? { speaker: provenance.speaker } : {})), (provenance.source ? { source: provenance.source } : {})), (provenance.occurredAt ? { validAt: provenance.occurredAt } : {})), (provenance.sourceAdapter ? { sourceAdapter: provenance.sourceAdapter } : {})), (provenance.locator ? { locator: provenance.locator } : {})), { createdAt }))),
+            })),
+            relations: raw.relations.map((r) => (Object.assign(Object.assign({ from: r.from, to: r.to, relationType: r.relationType }, (this.attachSourceSpans ? { sourceSpan: content } : {})), (this.attachSourceSpans && provenance.occurredAt
+                ? { validAt: provenance.occurredAt }
+                : {})))),
+        };
+    }
+    /**
+     * Inline grounding gate (Phase 5): check each observation fact AND each
+     * relation triple against its source chunk via the injected checker (keyword
+     * overlap | MiniCheck NLI), then either flag (annotate, keep) or drop the
+     * ungrounded ones. No-op when disabled. Every rejection is recorded
+     * (`groundingRejections`) so it leaves a trace in the run manifest (WI3).
+     */
+    applyGroundingGate(kg, source, filePath, chunkIndex, extractionId) {
+        return __awaiter(this, void 0, void 0, function* () {
+            if (this.grounding === 'disabled' || !source)
+                return kg;
+            const drop = this.grounding === 'drop';
+            let droppedObs = 0;
+            let droppedRel = 0;
+            // Observations — the claim is the fact text.
+            for (const e of kg.entities) {
+                const kept = [];
+                for (const o of e.observations) {
+                    const v = yield this.groundingChecker.check(o.text, source);
+                    const decision = v.supported ? 'accept' : drop ? 'drop' : 'flag';
+                    if (trace_1.trace.enabled && extractionId) {
+                        this.traceGrounding(extractionId, 'observation', e.name, o.text, v.score, decision, trace_1.LineageRegistry.observationId(extractionId, e.name, o.text));
+                    }
+                    if (v.supported) {
+                        if (!drop) {
+                            o.groundingScore = v.score;
+                            o.grounded = true;
+                        }
+                        kept.push(o);
+                        continue;
+                    }
+                    this.recordRejection(filePath, chunkIndex, 'observation', e.name, o.text, v.score, drop);
+                    if (drop) {
+                        droppedObs++;
+                    }
+                    else {
+                        o.groundingScore = v.score;
+                        o.grounded = false;
+                        kept.push(o);
+                    }
+                }
+                e.observations = kept;
+            }
+            // Relation triples — verbalize `{from} {predicate} {to}` and check it.
+            const keptRel = [];
+            for (const r of kg.relations) {
+                const claim = (0, grounding_1.verbalizeRelation)(r.from, r.relationType, r.to);
+                const v = yield this.groundingChecker.check(claim, source);
+                const decision = v.supported ? 'accept' : drop ? 'drop' : 'flag';
+                if (trace_1.trace.enabled && extractionId) {
+                    this.traceGrounding(extractionId, 'relation', `${r.from}→${r.to}`, claim, v.score, decision, trace_1.LineageRegistry.relationMentionId(extractionId, r.from, r.to));
+                }
+                if (v.supported) {
+                    if (!drop) {
+                        r.groundingScore = v.score;
+                        r.grounded = true;
+                    }
+                    keptRel.push(r);
+                    continue;
+                }
+                this.recordRejection(filePath, chunkIndex, 'relation', `${r.from}→${r.to}`, claim, v.score, drop);
+                if (drop) {
+                    droppedRel++;
+                }
+                else {
+                    r.groundingScore = v.score;
+                    r.grounded = false;
+                    keptRel.push(r);
+                }
+            }
+            kg.relations = keptRel;
+            if (droppedObs > 0 || droppedRel > 0) {
+                this.logger.debug(`Grounding gate dropped ${droppedObs} observation(s) and ${droppedRel} relation(s) ` +
+                    `in ${filePath} [chunk ${chunkIndex}]`);
+            }
+            return kg;
+        });
+    }
+    /** Record one grounding rejection for the run manifest (WI3). */
+    recordRejection(filePath, chunkIndex, kind, subject, claim, score, dropped) {
+        this.groundingRejections.push({ filePath, chunkIndex, kind, subject, claim, score, dropped });
+    }
+    /**
+     * Debug trace: register each parsed entity/observation/relation's deterministic
+     * mention ID in the run lineage and emit the extraction event. Mention IDs are
+     * derived from content (never stored on the graph) so this is pure observation.
+     */
+    traceExtraction(kg, ctx) {
+        if (!trace_1.trace.enabled)
+            return;
+        const entityMentions = kg.entities.map((e) => {
+            const observationIds = e.observations.map((o) => trace_1.LineageRegistry.observationId(ctx.extractionId, e.name, (0, types_1.obsText)(o)));
+            const mentionId = trace_1.LineageRegistry.entityMentionId(ctx.extractionId, e.name);
+            trace_1.trace.lineage.registerEntity({
+                mentionId, name: e.name, entityType: e.entityType,
+                chunkId: ctx.chunkId, extractionId: ctx.extractionId, observationIds,
+            });
+            return { mentionId, name: e.name, entityType: e.entityType, observationIds };
+        });
+        const relationMentions = kg.relations.map((r) => ({
+            mentionId: trace_1.LineageRegistry.relationMentionId(ctx.extractionId, r.from, r.to),
+            from: r.from, to: r.to, relationType: r.relationType,
+        }));
+        trace_1.trace.emit(Object.assign(Object.assign(Object.assign({ stage: 'extract', type: 'extraction', extractionId: ctx.extractionId, chunkId: ctx.chunkId, file: ctx.filePath, chunkIndex: ctx.chunkIndex, model: this.model, promptVersion: this.promptVersion, attempt: 0, checkpointHit: ctx.checkpointHit, entityMentions, relationMentions }, (ctx.usage ? { usage: ctx.usage } : {})), (ctx.failed ? { failed: true } : {})), (ctx.error ? { error: ctx.error } : {})));
+    }
+    /** Debug trace: emit one grounding decision (accept/flag/drop) for a claim. */
+    traceGrounding(extractionId, kind, subject, claim, score, decision, mentionId) {
+        var _a, _b, _c;
+        trace_1.trace.emit({
+            stage: 'ground', type: 'grounding',
+            extractionId, chunkId: extractionId.split('@')[0], mentionId,
+            kind, subject, claim, score,
+            checker: (_c = (_b = (_a = this.groundingChecker) === null || _a === void 0 ? void 0 : _a.constructor) === null || _b === void 0 ? void 0 : _b.name) !== null && _c !== void 0 ? _c : 'grounding',
+            decision,
+        });
+    }
+    /** Normalize a (possibly legacy string-observation) graph from the checkpoint. */
+    normalizeGraph(kg) {
+        return Object.assign(Object.assign({}, kg), { entities: kg.entities.map((e) => (Object.assign(Object.assign({}, e), { observations: (0, types_1.normalizeObservations)(e.observations) }))) });
+    }
+    /**
+     * Build knowledge graph from a chunk of content
+     */
+    buildFromChunk(filePath, content, fullContent, systemPrompt, chunkIndex, totalChunks, retrievedContext, images, contentClasses, glossary) {
+        return __awaiter(this, void 0, void 0, function* () {
+            this.logger.debug(`Building KG for chunk ${chunkIndex}/${totalChunks} of ${filePath}`);
+            const userPrompt = yield this.promptManager.getUserPrompt({
+                input: '',
+                filter: '',
+                fileName: filePath,
+                fileContent: fullContent,
+                chunkContent: content,
+                chunkIndex,
+                totalChunks,
+                retrievedContext,
+                contentClasses,
+                corpusGlossary: glossary
+            });
+            return this.generateKnowledgeGraph(systemPrompt, userPrompt, images, this.resolveAllowedTypes(contentClasses, glossary), this.resolveAllowedRelationTypes(contentClasses, glossary));
+        });
+    }
+    /**
+     * Build knowledge graph from entire content
+     */
+    buildFromContent(filePath, content, systemPrompt, retrievedContext, images, contentClasses, glossary) {
+        return __awaiter(this, void 0, void 0, function* () {
+            this.logger.debug(`Building KG for entire file: ${filePath}`);
+            const userPrompt = yield this.promptManager.getUserPrompt({
+                input: '',
+                filter: '',
+                fileName: filePath,
+                fileContent: content,
+                chunkContent: content,
+                retrievedContext,
+                contentClasses,
+                corpusGlossary: glossary
+            });
+            return this.generateKnowledgeGraph(systemPrompt, userPrompt, images, this.resolveAllowedTypes(contentClasses, glossary), this.resolveAllowedRelationTypes(contentClasses, glossary));
+        });
+    }
+    /**
+     * Generate knowledge graph using LLM
+     */
+    generateKnowledgeGraph(systemPrompt, userPrompt, images, allowedTypes, allowedRelationTypes) {
+        return __awaiter(this, void 0, void 0, function* () {
+            var _a, _b;
+            const messages = [
+                {
+                    role: 'system',
+                    content: systemPrompt
+                },
+                {
+                    role: 'user',
+                    content: userPrompt,
+                    images: images === null || images === void 0 ? void 0 : images.map(img => { var _a; return (_a = img.base64) !== null && _a !== void 0 ? _a : ''; })
+                }
+            ];
+            // Let failures propagate (generateStructured already retries 3× then throws).
+            // buildChunk catches, records the failed chunk, and skips its checkpoint so
+            // --resume retries it — do NOT swallow into an empty graph here (KG-02).
+            const result = yield this.llmService.generateStructured(messages, buildGraphSchema(allowedTypes, allowedRelationTypes));
+            // Ensure arrays exist
+            (_a = result.entities) !== null && _a !== void 0 ? _a : (result.entities = []);
+            (_b = result.relations) !== null && _b !== void 0 ? _b : (result.relations = []);
+            this.logger.debug(`Generated KG with ${result.entities.length} entities and ${result.relations.length} relations`);
+            return result;
+        });
+    }
+}
+exports.KnowledgeGraphBuilder = KnowledgeGraphBuilder;
+//# sourceMappingURL=KnowledgeGraphBuilder.js.map