@wanshi-kg/wanshi 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +458 -0
- package/dist/__tests__/helpers.js +27 -0
- package/dist/__tests__/helpers.js.map +1 -0
- package/dist/cli/commands/export.command.js +99 -0
- package/dist/cli/commands/export.command.js.map +1 -0
- package/dist/cli/commands/index.js +22 -0
- package/dist/cli/commands/index.js.map +1 -0
- package/dist/cli/commands/inspectMerges.command.js +84 -0
- package/dist/cli/commands/inspectMerges.command.js.map +1 -0
- package/dist/cli/commands/metrics.command.js +196 -0
- package/dist/cli/commands/metrics.command.js.map +1 -0
- package/dist/cli/commands/process.command.js +82 -0
- package/dist/cli/commands/process.command.js.map +1 -0
- package/dist/cli/commands/watch.command.js +91 -0
- package/dist/cli/commands/watch.command.js.map +1 -0
- package/dist/cli/index.js +269 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/optionsToConfig.js +160 -0
- package/dist/cli/optionsToConfig.js.map +1 -0
- package/dist/config/index.js +59 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/legacyHints.js +113 -0
- package/dist/config/legacyHints.js.map +1 -0
- package/dist/config/schema.js +803 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/config/ui.js +221 -0
- package/dist/config/ui.js.map +1 -0
- package/dist/core/DirectoryProcessor.js +725 -0
- package/dist/core/DirectoryProcessor.js.map +1 -0
- package/dist/core/adapters/IStructuredAdapter.js +3 -0
- package/dist/core/adapters/IStructuredAdapter.js.map +1 -0
- package/dist/core/adapters/SqliteAdapter.js +267 -0
- package/dist/core/adapters/SqliteAdapter.js.map +1 -0
- package/dist/core/adapters/StructuredAdapterRegistry.js +31 -0
- package/dist/core/adapters/StructuredAdapterRegistry.js.map +1 -0
- package/dist/core/adapters/index.js +20 -0
- package/dist/core/adapters/index.js.map +1 -0
- package/dist/core/checkpoint/CheckpointService.js +188 -0
- package/dist/core/checkpoint/CheckpointService.js.map +1 -0
- package/dist/core/checkpoint/index.js +18 -0
- package/dist/core/checkpoint/index.js.map +1 -0
- package/dist/core/corpus/CorpusAnalyzer.js +266 -0
- package/dist/core/corpus/CorpusAnalyzer.js.map +1 -0
- package/dist/core/corpus/CorpusProfileStore.js +92 -0
- package/dist/core/corpus/CorpusProfileStore.js.map +1 -0
- package/dist/core/corpus/index.js +21 -0
- package/dist/core/corpus/index.js.map +1 -0
- package/dist/core/corpus/normalizeGlossary.js +60 -0
- package/dist/core/corpus/normalizeGlossary.js.map +1 -0
- package/dist/core/corpus/relPath.js +52 -0
- package/dist/core/corpus/relPath.js.map +1 -0
- package/dist/core/corpus/termFrequency.js +86 -0
- package/dist/core/corpus/termFrequency.js.map +1 -0
- package/dist/core/cost/CostMeter.js +235 -0
- package/dist/core/cost/CostMeter.js.map +1 -0
- package/dist/core/cost/index.js +19 -0
- package/dist/core/cost/index.js.map +1 -0
- package/dist/core/cost/prices.js +38 -0
- package/dist/core/cost/prices.js.map +1 -0
- package/dist/core/cv/ObjectDetectionService.js +119 -0
- package/dist/core/cv/ObjectDetectionService.js.map +1 -0
- package/dist/core/di/ContainerFactory.js +670 -0
- package/dist/core/di/ContainerFactory.js.map +1 -0
- package/dist/core/di/DIContainer.js +103 -0
- package/dist/core/di/DIContainer.js.map +1 -0
- package/dist/core/di/index.js +19 -0
- package/dist/core/di/index.js.map +1 -0
- package/dist/core/errors/CustomErrors.js +342 -0
- package/dist/core/errors/CustomErrors.js.map +1 -0
- package/dist/core/errors/index.js +18 -0
- package/dist/core/errors/index.js.map +1 -0
- package/dist/core/export/KnowledgeGraphExportService.js +56 -0
- package/dist/core/export/KnowledgeGraphExportService.js.map +1 -0
- package/dist/core/export/index.js +19 -0
- package/dist/core/export/index.js.map +1 -0
- package/dist/core/export/strategies/GraphitiExportStrategy.js +115 -0
- package/dist/core/export/strategies/GraphitiExportStrategy.js.map +1 -0
- package/dist/core/export/strategies/GraphvizDotExportStrategy.js +331 -0
- package/dist/core/export/strategies/GraphvizDotExportStrategy.js.map +1 -0
- package/dist/core/export/strategies/IExportStrategy.js +3 -0
- package/dist/core/export/strategies/IExportStrategy.js.map +1 -0
- package/dist/core/export/strategies/JsonExportStrategy.js +19 -0
- package/dist/core/export/strategies/JsonExportStrategy.js.map +1 -0
- package/dist/core/export/strategies/JsonlExportStrategy.js +69 -0
- package/dist/core/export/strategies/JsonlExportStrategy.js.map +1 -0
- package/dist/core/export/strategies/KblamExportStrategy.js +36 -0
- package/dist/core/export/strategies/KblamExportStrategy.js.map +1 -0
- package/dist/core/export/strategies/LoraExportStrategy.js +46 -0
- package/dist/core/export/strategies/LoraExportStrategy.js.map +1 -0
- package/dist/core/export/strategies/McpExportStrategy.js +67 -0
- package/dist/core/export/strategies/McpExportStrategy.js.map +1 -0
- package/dist/core/export/strategies/index.js +25 -0
- package/dist/core/export/strategies/index.js.map +1 -0
- package/dist/core/export/strategies/kbTriples.js +60 -0
- package/dist/core/export/strategies/kbTriples.js.map +1 -0
- package/dist/core/index.js +22 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/knowledge/KnowledgeGraphBuilder.js +627 -0
- package/dist/core/knowledge/KnowledgeGraphBuilder.js.map +1 -0
- package/dist/core/knowledge/MergeRecord.js +3 -0
- package/dist/core/knowledge/MergeRecord.js.map +1 -0
- package/dist/core/knowledge/canon/Canonicalizer.js +414 -0
- package/dist/core/knowledge/canon/Canonicalizer.js.map +1 -0
- package/dist/core/knowledge/canon/index.js +18 -0
- package/dist/core/knowledge/canon/index.js.map +1 -0
- package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js +92 -0
- package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js.map +1 -0
- package/dist/core/knowledge/contradiction/LlmContradictionChecker.js +52 -0
- package/dist/core/knowledge/contradiction/LlmContradictionChecker.js.map +1 -0
- package/dist/core/knowledge/contradiction/index.js +19 -0
- package/dist/core/knowledge/contradiction/index.js.map +1 -0
- package/dist/core/knowledge/grounding/KeywordGroundingChecker.js +33 -0
- package/dist/core/knowledge/grounding/KeywordGroundingChecker.js.map +1 -0
- package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js +82 -0
- package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js.map +1 -0
- package/dist/core/knowledge/grounding/index.js +20 -0
- package/dist/core/knowledge/grounding/index.js.map +1 -0
- package/dist/core/knowledge/grounding/verbalize.js +38 -0
- package/dist/core/knowledge/grounding/verbalize.js.map +1 -0
- package/dist/core/knowledge/images/imageMetaGraph.js +136 -0
- package/dist/core/knowledge/images/imageMetaGraph.js.map +1 -0
- package/dist/core/knowledge/index.js +20 -0
- package/dist/core/knowledge/index.js.map +1 -0
- package/dist/core/knowledge/merging/KnowledgeMerger.js +624 -0
- package/dist/core/knowledge/merging/KnowledgeMerger.js.map +1 -0
- package/dist/core/knowledge/references/ReferenceResolver.js +184 -0
- package/dist/core/knowledge/references/ReferenceResolver.js.map +1 -0
- package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js +401 -0
- package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js.map +1 -0
- package/dist/core/knowledge/references/citations/CitationResolver.js +95 -0
- package/dist/core/knowledge/references/citations/CitationResolver.js.map +1 -0
- package/dist/core/knowledge/references/citations/GrobidClient.js +143 -0
- package/dist/core/knowledge/references/citations/GrobidClient.js.map +1 -0
- package/dist/core/knowledge/references/citations/TitleIdResolver.js +101 -0
- package/dist/core/knowledge/references/citations/TitleIdResolver.js.map +1 -0
- package/dist/core/knowledge/references/web/FetchCacheService.js +114 -0
- package/dist/core/knowledge/references/web/FetchCacheService.js.map +1 -0
- package/dist/core/knowledge/references/web/GatedFetcher.js +228 -0
- package/dist/core/knowledge/references/web/GatedFetcher.js.map +1 -0
- package/dist/core/knowledge/references/web/WebReferenceProcessor.js +164 -0
- package/dist/core/knowledge/references/web/WebReferenceProcessor.js.map +1 -0
- package/dist/core/knowledge/search/KnowledgeGraphSearch.js +261 -0
- package/dist/core/knowledge/search/KnowledgeGraphSearch.js.map +1 -0
- package/dist/core/knowledge/vocabulary.js +162 -0
- package/dist/core/knowledge/vocabulary.js.map +1 -0
- package/dist/core/llm/EmbeddingService.js +113 -0
- package/dist/core/llm/EmbeddingService.js.map +1 -0
- package/dist/core/llm/OllamaService.js +146 -0
- package/dist/core/llm/OllamaService.js.map +1 -0
- package/dist/core/llm/OpenAICompatibleService.js +190 -0
- package/dist/core/llm/OpenAICompatibleService.js.map +1 -0
- package/dist/core/llm/OpenAIEmbeddingService.js +129 -0
- package/dist/core/llm/OpenAIEmbeddingService.js.map +1 -0
- package/dist/core/llm/embeddingUtils.js +25 -0
- package/dist/core/llm/embeddingUtils.js.map +1 -0
- package/dist/core/llm/index.js +23 -0
- package/dist/core/llm/index.js.map +1 -0
- package/dist/core/llm/prompts/PromptManager.js +388 -0
- package/dist/core/llm/prompts/PromptManager.js.map +1 -0
- package/dist/core/llm/prompts/PromptTemplateEngine.js +257 -0
- package/dist/core/llm/prompts/PromptTemplateEngine.js.map +1 -0
- package/dist/core/llm/prompts/templates/partials/examples/EXAMPLE_STYLE_GUIDE.md +84 -0
- package/dist/core/llm/prompts/templates/partials/examples/article.md +187 -0
- package/dist/core/llm/prompts/templates/partials/examples/code.md +229 -0
- package/dist/core/llm/prompts/templates/partials/examples/communication.md +205 -0
- package/dist/core/llm/prompts/templates/partials/examples/documentation.md +262 -0
- package/dist/core/llm/prompts/templates/partials/examples/financial.md +157 -0
- package/dist/core/llm/prompts/templates/partials/examples/legal.md +153 -0
- package/dist/core/llm/prompts/templates/partials/examples/logs.md +127 -0
- package/dist/core/llm/prompts/templates/partials/examples/medical.md +218 -0
- package/dist/core/llm/prompts/templates/partials/examples/notes.md +201 -0
- package/dist/core/llm/prompts/templates/partials/examples/research.md +208 -0
- package/dist/core/llm/prompts/templates/partials/examples/tabular.md +178 -0
- package/dist/core/llm/prompts/templates/partials/examples/transcript.md +204 -0
- package/dist/core/llm/prompts/templates/partials/retrieved-context.hbs +18 -0
- package/dist/core/llm/prompts/templates/v1/system.hbs +371 -0
- package/dist/core/llm/prompts/templates/v1/user.hbs +20 -0
- package/dist/core/llm/prompts/templates/v2/system.hbs +573 -0
- package/dist/core/llm/prompts/templates/v2/user.hbs +20 -0
- package/dist/core/llm/prompts/templates/v3/system.hbs +861 -0
- package/dist/core/llm/prompts/templates/v3/user.hbs +16 -0
- package/dist/core/llm/prompts/templates/v4/system.hbs +800 -0
- package/dist/core/llm/prompts/templates/v4/user.hbs +40 -0
- package/dist/core/llm/prompts/templates/v4.5/system.hbs +71 -0
- package/dist/core/llm/prompts/templates/v4.5/user.hbs +46 -0
- package/dist/core/llm/prompts/templates/v5/glossary/system.hbs +40 -0
- package/dist/core/llm/prompts/templates/v5/glossary/user.hbs +11 -0
- package/dist/core/llm/prompts/templates/v5/system.hbs +163 -0
- package/dist/core/llm/prompts/templates/v5/user.hbs +55 -0
- package/dist/core/pipeline/GroundingTransform.js +52 -0
- package/dist/core/pipeline/GroundingTransform.js.map +1 -0
- package/dist/core/pipeline/PipelineRunner.js +51 -0
- package/dist/core/pipeline/PipelineRunner.js.map +1 -0
- package/dist/core/pipeline/RelationFilterTransform.js +72 -0
- package/dist/core/pipeline/RelationFilterTransform.js.map +1 -0
- package/dist/core/pipeline/index.js +20 -0
- package/dist/core/pipeline/index.js.map +1 -0
- package/dist/core/processor/FileProcessor.js +184 -0
- package/dist/core/processor/FileProcessor.js.map +1 -0
- package/dist/core/processor/ProcessedRegistry.js +38 -0
- package/dist/core/processor/ProcessedRegistry.js.map +1 -0
- package/dist/core/processor/ast/AstSeedService.js +0 -0
- package/dist/core/processor/ast/AstSeedService.js.map +1 -0
- package/dist/core/processor/ast/AstSymbolStore.js +110 -0
- package/dist/core/processor/ast/AstSymbolStore.js.map +1 -0
- package/dist/core/processor/ast/index.js +19 -0
- package/dist/core/processor/ast/index.js.map +1 -0
- package/dist/core/processor/chunking/TextChunker.js +98 -0
- package/dist/core/processor/chunking/TextChunker.js.map +1 -0
- package/dist/core/processor/chunking/index.js +18 -0
- package/dist/core/processor/chunking/index.js.map +1 -0
- package/dist/core/processor/classifier/CONTENT_CLASSES.js +294 -0
- package/dist/core/processor/classifier/CONTENT_CLASSES.js.map +1 -0
- package/dist/core/processor/classifier/CascadeContentClassifier.js +107 -0
- package/dist/core/processor/classifier/CascadeContentClassifier.js.map +1 -0
- package/dist/core/processor/classifier/HeuristicContentClassifier.js +113 -0
- package/dist/core/processor/classifier/HeuristicContentClassifier.js.map +1 -0
- package/dist/core/processor/classifier/IContentTypeClassifier.js +3 -0
- package/dist/core/processor/classifier/IContentTypeClassifier.js.map +1 -0
- package/dist/core/processor/classifier/LlmContentClassifier.js +107 -0
- package/dist/core/processor/classifier/LlmContentClassifier.js.map +1 -0
- package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js +498 -0
- package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js.map +1 -0
- package/dist/core/processor/classifier/index.js +21 -0
- package/dist/core/processor/classifier/index.js.map +1 -0
- package/dist/core/processor/classifier/mergeClassifications.js +32 -0
- package/dist/core/processor/classifier/mergeClassifications.js.map +1 -0
- package/dist/core/processor/index.js +20 -0
- package/dist/core/processor/index.js.map +1 -0
- package/dist/core/processor/readers/AudioReader.js +462 -0
- package/dist/core/processor/readers/AudioReader.js.map +1 -0
- package/dist/core/processor/readers/BinaryReader.js +90 -0
- package/dist/core/processor/readers/BinaryReader.js.map +1 -0
- package/dist/core/processor/readers/ChandraPdfReader.js +187 -0
- package/dist/core/processor/readers/ChandraPdfReader.js.map +1 -0
- package/dist/core/processor/readers/ChatExportReader.js +365 -0
- package/dist/core/processor/readers/ChatExportReader.js.map +1 -0
- package/dist/core/processor/readers/DoclingReader.js +445 -0
- package/dist/core/processor/readers/DoclingReader.js.map +1 -0
- package/dist/core/processor/readers/EmailReader.js +259 -0
- package/dist/core/processor/readers/EmailReader.js.map +1 -0
- package/dist/core/processor/readers/EpubReader.js +175 -0
- package/dist/core/processor/readers/EpubReader.js.map +1 -0
- package/dist/core/processor/readers/FileReader.js +90 -0
- package/dist/core/processor/readers/FileReader.js.map +1 -0
- package/dist/core/processor/readers/FileReaderFactory.js +49 -0
- package/dist/core/processor/readers/FileReaderFactory.js.map +1 -0
- package/dist/core/processor/readers/HtmlReader.js +371 -0
- package/dist/core/processor/readers/HtmlReader.js.map +1 -0
- package/dist/core/processor/readers/ImageReader.js +162 -0
- package/dist/core/processor/readers/ImageReader.js.map +1 -0
- package/dist/core/processor/readers/JsonFileReader.js +232 -0
- package/dist/core/processor/readers/JsonFileReader.js.map +1 -0
- package/dist/core/processor/readers/JupyterReader.js +178 -0
- package/dist/core/processor/readers/JupyterReader.js.map +1 -0
- package/dist/core/processor/readers/LatexReader.js +176 -0
- package/dist/core/processor/readers/LatexReader.js.map +1 -0
- package/dist/core/processor/readers/MarkdownReader.js +289 -0
- package/dist/core/processor/readers/MarkdownReader.js.map +1 -0
- package/dist/core/processor/readers/MarkerPdfReader.js +193 -0
- package/dist/core/processor/readers/MarkerPdfReader.js.map +1 -0
- package/dist/core/processor/readers/MistralOcrReader.js +198 -0
- package/dist/core/processor/readers/MistralOcrReader.js.map +1 -0
- package/dist/core/processor/readers/OfficeReader.js +174 -0
- package/dist/core/processor/readers/OfficeReader.js.map +1 -0
- package/dist/core/processor/readers/PdfReader.js +116 -0
- package/dist/core/processor/readers/PdfReader.js.map +1 -0
- package/dist/core/processor/readers/RtfReader.js +107 -0
- package/dist/core/processor/readers/RtfReader.js.map +1 -0
- package/dist/core/processor/readers/SubtitleReader.js +145 -0
- package/dist/core/processor/readers/SubtitleReader.js.map +1 -0
- package/dist/core/processor/readers/TesseractPdfReader.js +183 -0
- package/dist/core/processor/readers/TesseractPdfReader.js.map +1 -0
- package/dist/core/processor/readers/TextReader.js +129 -0
- package/dist/core/processor/readers/TextReader.js.map +1 -0
- package/dist/core/processor/readers/TranscriptReader.js +234 -0
- package/dist/core/processor/readers/TranscriptReader.js.map +1 -0
- package/dist/core/processor/readers/image/imageMetadata.js +155 -0
- package/dist/core/processor/readers/image/imageMetadata.js.map +1 -0
- package/dist/core/processor/readers/index.js +41 -0
- package/dist/core/processor/readers/index.js.map +1 -0
- package/dist/core/processor/readers/referenceExtraction.js +198 -0
- package/dist/core/processor/readers/referenceExtraction.js.map +1 -0
- package/dist/core/processor/readers/stripReferences.js +59 -0
- package/dist/core/processor/readers/stripReferences.js.map +1 -0
- package/dist/core/processor/readers/transcript/turnPacking.js +81 -0
- package/dist/core/processor/readers/transcript/turnPacking.js.map +1 -0
- package/dist/core/progress/NdjsonProgressEmitter.js +30 -0
- package/dist/core/progress/NdjsonProgressEmitter.js.map +1 -0
- package/dist/core/progress/NoopProgressEmitter.js +15 -0
- package/dist/core/progress/NoopProgressEmitter.js.map +1 -0
- package/dist/core/progress/index.js +19 -0
- package/dist/core/progress/index.js.map +1 -0
- package/dist/core/trace/TraceWriter.js +100 -0
- package/dist/core/trace/TraceWriter.js.map +1 -0
- package/dist/core/trace/events.js +13 -0
- package/dist/core/trace/events.js.map +1 -0
- package/dist/core/trace/index.js +20 -0
- package/dist/core/trace/index.js.map +1 -0
- package/dist/core/trace/lineage.js +97 -0
- package/dist/core/trace/lineage.js.map +1 -0
- package/dist/evaluation/BenchmarkRunner.js +171 -0
- package/dist/evaluation/BenchmarkRunner.js.map +1 -0
- package/dist/evaluation/classifier/ClassifierAccuracy.js +185 -0
- package/dist/evaluation/classifier/ClassifierAccuracy.js.map +1 -0
- package/dist/evaluation/classifier/labeledSamples.js +379 -0
- package/dist/evaluation/classifier/labeledSamples.js.map +1 -0
- package/dist/evaluation/compare/goldCompare.js +126 -0
- package/dist/evaluation/compare/goldCompare.js.map +1 -0
- package/dist/evaluation/crossre/compareScoring.js +30 -0
- package/dist/evaluation/crossre/compareScoring.js.map +1 -0
- package/dist/evaluation/datasets/CrossREDataset.js +170 -0
- package/dist/evaluation/datasets/CrossREDataset.js.map +1 -0
- package/dist/evaluation/datasets/IDataset.js +3 -0
- package/dist/evaluation/datasets/IDataset.js.map +1 -0
- package/dist/evaluation/datasets/RebelDataset.js +117 -0
- package/dist/evaluation/datasets/RebelDataset.js.map +1 -0
- package/dist/evaluation/datasets/RedocredDataset.js +218 -0
- package/dist/evaluation/datasets/RedocredDataset.js.map +1 -0
- package/dist/evaluation/datasets/SemEval2010Dataset.js +150 -0
- package/dist/evaluation/datasets/SemEval2010Dataset.js.map +1 -0
- package/dist/evaluation/index.js +33 -0
- package/dist/evaluation/index.js.map +1 -0
- package/dist/evaluation/matching/ExactMatcher.js +75 -0
- package/dist/evaluation/matching/ExactMatcher.js.map +1 -0
- package/dist/evaluation/matching/SemanticMatcher.js +143 -0
- package/dist/evaluation/matching/SemanticMatcher.js.map +1 -0
- package/dist/evaluation/metrics/TripleMetrics.js +64 -0
- package/dist/evaluation/metrics/TripleMetrics.js.map +1 -0
- package/dist/evaluation/mine/MineCheckpoint.js +114 -0
- package/dist/evaluation/mine/MineCheckpoint.js.map +1 -0
- package/dist/evaluation/mine/MineDataset.js +208 -0
- package/dist/evaluation/mine/MineDataset.js.map +1 -0
- package/dist/evaluation/mine/MineReporter.js +98 -0
- package/dist/evaluation/mine/MineReporter.js.map +1 -0
- package/dist/evaluation/mine/MineRunner.js +148 -0
- package/dist/evaluation/mine/MineRunner.js.map +1 -0
- package/dist/evaluation/mine/MineScorer.js +127 -0
- package/dist/evaluation/mine/MineScorer.js.map +1 -0
- package/dist/evaluation/mine/types.js +12 -0
- package/dist/evaluation/mine/types.js.map +1 -0
- package/dist/evaluation/reporters/ConsoleReporter.js +55 -0
- package/dist/evaluation/reporters/ConsoleReporter.js.map +1 -0
- package/dist/evaluation/reporters/JsonReporter.js +50 -0
- package/dist/evaluation/reporters/JsonReporter.js.map +1 -0
- package/dist/index.js +28 -0
- package/dist/index.js.map +1 -0
- package/dist/quality/CompositeScore.js +61 -0
- package/dist/quality/CompositeScore.js.map +1 -0
- package/dist/quality/ConsistencyMetrics.js +70 -0
- package/dist/quality/ConsistencyMetrics.js.map +1 -0
- package/dist/quality/FactualMetrics.js +76 -0
- package/dist/quality/FactualMetrics.js.map +1 -0
- package/dist/quality/GraphHealthMetrics.js +68 -0
- package/dist/quality/GraphHealthMetrics.js.map +1 -0
- package/dist/quality/SemanticMetrics.js +102 -0
- package/dist/quality/SemanticMetrics.js.map +1 -0
- package/dist/quality/StructuralMetrics.js +60 -0
- package/dist/quality/StructuralMetrics.js.map +1 -0
- package/dist/quality/index.js +23 -0
- package/dist/quality/index.js.map +1 -0
- package/dist/shared/index.js +20 -0
- package/dist/shared/index.js.map +1 -0
- package/dist/shared/logger/Logger.js +3 -0
- package/dist/shared/logger/Logger.js.map +1 -0
- package/dist/shared/logger/LoggerFactory.js +75 -0
- package/dist/shared/logger/LoggerFactory.js.map +1 -0
- package/dist/shared/logger/index.js +19 -0
- package/dist/shared/logger/index.js.map +1 -0
- package/dist/shared/shutdown.js +30 -0
- package/dist/shared/shutdown.js.map +1 -0
- package/dist/shared/utils/agglomerativeCluster.js +269 -0
- package/dist/shared/utils/agglomerativeCluster.js.map +1 -0
- package/dist/shared/utils/astSymbols.js +69 -0
- package/dist/shared/utils/astSymbols.js.map +1 -0
- package/dist/shared/utils/cosineSimilarity.js +18 -0
- package/dist/shared/utils/cosineSimilarity.js.map +1 -0
- package/dist/shared/utils/directoryTree.js +184 -0
- package/dist/shared/utils/directoryTree.js.map +1 -0
- package/dist/shared/utils/documentOutline.js +74 -0
- package/dist/shared/utils/documentOutline.js.map +1 -0
- package/dist/shared/utils/index.js +24 -0
- package/dist/shared/utils/index.js.map +1 -0
- package/dist/shared/utils/jaroWinklerSimilarity.js +60 -0
- package/dist/shared/utils/jaroWinklerSimilarity.js.map +1 -0
- package/dist/shared/utils/parseJsonLenient.js +27 -0
- package/dist/shared/utils/parseJsonLenient.js.map +1 -0
- package/dist/shared/utils/readConfig.js +42 -0
- package/dist/shared/utils/readConfig.js.map +1 -0
- package/dist/shared/utils/readRtf.js +216 -0
- package/dist/shared/utils/readRtf.js.map +1 -0
- package/dist/shared/utils/softmax.js +26 -0
- package/dist/shared/utils/softmax.js.map +1 -0
- package/dist/types/ContentClass.js +3 -0
- package/dist/types/ContentClass.js.map +1 -0
- package/dist/types/CorpusProfile.js +3 -0
- package/dist/types/CorpusProfile.js.map +1 -0
- package/dist/types/IContradictionChecker.js +3 -0
- package/dist/types/IContradictionChecker.js.map +1 -0
- package/dist/types/ICorpusAnalyzer.js +3 -0
- package/dist/types/ICorpusAnalyzer.js.map +1 -0
- package/dist/types/IDirectoryProcessor.js +3 -0
- package/dist/types/IDirectoryProcessor.js.map +1 -0
- package/dist/types/IEmbeddingProvider.js +3 -0
- package/dist/types/IEmbeddingProvider.js.map +1 -0
- package/dist/types/IEmbeddingService.js +6 -0
- package/dist/types/IEmbeddingService.js.map +1 -0
- package/dist/types/IFileProcessor.js +3 -0
- package/dist/types/IFileProcessor.js.map +1 -0
- package/dist/types/IGroundingChecker.js +3 -0
- package/dist/types/IGroundingChecker.js.map +1 -0
- package/dist/types/IKnowledgeGraphBuilder.js +3 -0
- package/dist/types/IKnowledgeGraphBuilder.js.map +1 -0
- package/dist/types/IKnowledgeGraphExporter.js +3 -0
- package/dist/types/IKnowledgeGraphExporter.js.map +1 -0
- package/dist/types/IKnowledgeGraphMerger.js +3 -0
- package/dist/types/IKnowledgeGraphMerger.js.map +1 -0
- package/dist/types/IKnowledgeGraphSearch.js +3 -0
- package/dist/types/IKnowledgeGraphSearch.js.map +1 -0
- package/dist/types/ILLMProvider.js +3 -0
- package/dist/types/ILLMProvider.js.map +1 -0
- package/dist/types/ILLMService.js +3 -0
- package/dist/types/ILLMService.js.map +1 -0
- package/dist/types/IObjectDetector.js +3 -0
- package/dist/types/IObjectDetector.js.map +1 -0
- package/dist/types/IProcessingService.js +3 -0
- package/dist/types/IProcessingService.js.map +1 -0
- package/dist/types/IProgressEmitter.js +3 -0
- package/dist/types/IProgressEmitter.js.map +1 -0
- package/dist/types/IPromptManager.js +3 -0
- package/dist/types/IPromptManager.js.map +1 -0
- package/dist/types/KnowledgeGraph.js +3 -0
- package/dist/types/KnowledgeGraph.js.map +1 -0
- package/dist/types/MCPKnowledgeGraph.js +3 -0
- package/dist/types/MCPKnowledgeGraph.js.map +1 -0
- package/dist/types/Observation.js +21 -0
- package/dist/types/Observation.js.map +1 -0
- package/dist/types/ProcessingOptions.js +3 -0
- package/dist/types/ProcessingOptions.js.map +1 -0
- package/dist/types/index.js +40 -0
- package/dist/types/index.js.map +1 -0
- package/package.json +122 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.LABELED_SAMPLES = void 0;
|
|
4
|
+
exports.LABELED_SAMPLES = [
|
|
5
|
+
// ── code ───────────────────────────────────────────────────────────────
|
|
6
|
+
{
|
|
7
|
+
id: "code-1",
|
|
8
|
+
path: "src/services/AuthService.ts",
|
|
9
|
+
expected: "code",
|
|
10
|
+
content: `import { TokenStore } from "./TokenStore";
|
|
11
|
+
|
|
12
|
+
export class AuthService extends BaseService {
|
|
13
|
+
async login(user: string, password: string): Promise<Session> {
|
|
14
|
+
const hash = await this.hasher.hash(password);
|
|
15
|
+
if (!this.store.verify(user, hash)) {
|
|
16
|
+
throw new Error("invalid credentials");
|
|
17
|
+
}
|
|
18
|
+
return this.store.createSession(user);
|
|
19
|
+
}
|
|
20
|
+
}`,
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
id: "code-2",
|
|
24
|
+
path: "scripts/clean_data.py",
|
|
25
|
+
expected: "code",
|
|
26
|
+
content: `import os
|
|
27
|
+
import json
|
|
28
|
+
|
|
29
|
+
def clean(records):
|
|
30
|
+
for r in records:
|
|
31
|
+
if r.get("value") is None:
|
|
32
|
+
continue
|
|
33
|
+
yield {"id": r["id"], "value": float(r["value"])}
|
|
34
|
+
|
|
35
|
+
def main():
|
|
36
|
+
with open("data.json") as f:
|
|
37
|
+
records = json.load(f)
|
|
38
|
+
print(list(clean(records)))`,
|
|
39
|
+
},
|
|
40
|
+
// ── financial ──────────────────────────────────────────────────────────
|
|
41
|
+
{
|
|
42
|
+
id: "financial-1",
|
|
43
|
+
path: "reports/q3-earnings.md",
|
|
44
|
+
expected: "financial",
|
|
45
|
+
content: `Q3 revenue rose to $4.2 billion, beating analyst estimates of $3.9 billion.
|
|
46
|
+
EBITDA margin improved to 28%. The board declared a dividend of $0.45 per share.
|
|
47
|
+
NASDAQ: ACME closed up 6% after the earnings call. Full-year guidance was raised.`,
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
id: "financial-2",
|
|
51
|
+
path: "finance/portfolio-review.txt",
|
|
52
|
+
expected: "financial",
|
|
53
|
+
content: `Portfolio allocation: 60% equity, 30% bonds, 10% cash. The S&P 500 holdings
|
|
54
|
+
returned $1,250,000 in unrealized gains. P/E ratio of the tech sleeve sits at 24.
|
|
55
|
+
Dividend yield across the portfolio is 2.1%. SEC 10-K filings reviewed for each holding.`,
|
|
56
|
+
},
|
|
57
|
+
// ── medical ────────────────────────────────────────────────────────────
|
|
58
|
+
{
|
|
59
|
+
id: "medical-1",
|
|
60
|
+
path: "records/patient-note.txt",
|
|
61
|
+
expected: "medical",
|
|
62
|
+
content: `Patient presents with hypertension. Prescribed lisinopril 10 mg PO daily and
|
|
63
|
+
metformin 500 mg BID for type 2 diabetes. Blood pressure 148/92 mmHg, heart rate 84 bpm.
|
|
64
|
+
Diagnosis confirmed; follow-up in 4 weeks to reassess medication and symptoms.`,
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
id: "medical-2",
|
|
68
|
+
path: "trials/study-protocol.txt",
|
|
69
|
+
expected: "medical",
|
|
70
|
+
content: `This randomized controlled, double-blind, placebo-controlled trial evaluates
|
|
71
|
+
drug efficacy in Phase II. FDA approval is pending. Adverse events were recorded per
|
|
72
|
+
protocol. IRB-approved informed consent obtained from all participants. 200 mg IV dose.`,
|
|
73
|
+
},
|
|
74
|
+
// ── legal ──────────────────────────────────────────────────────────────
|
|
75
|
+
{
|
|
76
|
+
id: "legal-1",
|
|
77
|
+
path: "legal/nda.txt",
|
|
78
|
+
expected: "legal",
|
|
79
|
+
content: `WHEREAS the parties agree to the following terms. Effective Date: 2023-01-01.
|
|
80
|
+
Termination: either party may terminate upon 30 days notice. The receiving party shall
|
|
81
|
+
not disclose Confidential Information. This agreement is governed by the jurisdiction of Delaware.`,
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
id: "legal-2",
|
|
85
|
+
path: "contracts/master-services-agreement.md",
|
|
86
|
+
expected: "legal",
|
|
87
|
+
content: `The Contractor shall indemnify the Client for any breach of this Agreement.
|
|
88
|
+
Liability for damages is capped per Section 7.2. Counsel for both parties reviewed the
|
|
89
|
+
provisions. The plaintiff waives any claim arising under this clause.`,
|
|
90
|
+
},
|
|
91
|
+
// ── research ───────────────────────────────────────────────────────────
|
|
92
|
+
{
|
|
93
|
+
id: "research-1",
|
|
94
|
+
path: "papers/sparse-attention.md",
|
|
95
|
+
expected: "research",
|
|
96
|
+
content: `## Abstract
|
|
97
|
+
We propose a sparse-attention method and demonstrate that it reduces memory cost.
|
|
98
|
+
Our experimental results on the PG-19 dataset improve perplexity over the baseline.
|
|
99
|
+
We build on Beltagy et al. (2020). doi:10.1145/1234567. p < 0.01 across 4 seeds.`,
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
id: "research-2",
|
|
103
|
+
path: "research/ablation-findings.md",
|
|
104
|
+
expected: "research",
|
|
105
|
+
content: `## Methodology
|
|
106
|
+
We investigated whether dropout improves generalization. The hypothesis was tested on a
|
|
107
|
+
held-out benchmark. Experimental evaluation reports accuracy: 0.91 and f1-score: 0.88.
|
|
108
|
+
The confidence interval excludes the baseline. Smith et al. (2019) reported similar findings.`,
|
|
109
|
+
},
|
|
110
|
+
// ── transcript ─────────────────────────────────────────────────────────
|
|
111
|
+
{
|
|
112
|
+
id: "transcript-1",
|
|
113
|
+
path: "meetings/q4-standup.txt",
|
|
114
|
+
expected: "transcript",
|
|
115
|
+
content: `Meeting Minutes - Q4 Standup
|
|
116
|
+
Attendees: John, Sarah, Mike
|
|
117
|
+
Sarah: we agreed to ship the beta on Friday.
|
|
118
|
+
Mike: I'll follow up with the vendor.
|
|
119
|
+
Action Items:
|
|
120
|
+
- John: finalize the budget by next week`,
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
id: "transcript-2",
|
|
124
|
+
path: "transcripts/customer-interview.txt",
|
|
125
|
+
expected: "transcript",
|
|
126
|
+
content: `Interviewer: what's your biggest pain point today?
|
|
127
|
+
Participant: onboarding takes too long.
|
|
128
|
+
Interviewer: and how would you fix it?
|
|
129
|
+
Participant: a guided setup. We decided to prototype that next sprint.
|
|
130
|
+
Speaker 2: agreed, let's schedule a review.`,
|
|
131
|
+
},
|
|
132
|
+
// ── tabular ────────────────────────────────────────────────────────────
|
|
133
|
+
{
|
|
134
|
+
id: "tabular-1",
|
|
135
|
+
path: "data/sales-2023.csv",
|
|
136
|
+
expected: "tabular",
|
|
137
|
+
content: `product,region,units_sold,revenue,margin
|
|
138
|
+
Widget A,EMEA,1500,15000,0.32
|
|
139
|
+
Widget B,APAC,2200,25000,0.28
|
|
140
|
+
Widget C,AMER,900,9000,0.41
|
|
141
|
+
Widget D,EMEA,1750,19500,0.30`,
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
id: "tabular-2",
|
|
145
|
+
path: "exports/inventory.tsv",
|
|
146
|
+
expected: "tabular",
|
|
147
|
+
content: `sku\twarehouse\ton_hand\treorder_point\tsupplier
|
|
148
|
+
A-100\tDAL\t420\t100\tAcme
|
|
149
|
+
B-200\tSEA\t85\t120\tGlobex
|
|
150
|
+
C-300\tNYC\t310\t150\tInitech
|
|
151
|
+
D-400\tDAL\t12\t50\tAcme`,
|
|
152
|
+
},
|
|
153
|
+
// ── communication ──────────────────────────────────────────────────────
|
|
154
|
+
{
|
|
155
|
+
id: "communication-1",
|
|
156
|
+
path: "mail/budget-update.eml",
|
|
157
|
+
expected: "communication",
|
|
158
|
+
content: `From: john@acme.com
|
|
159
|
+
To: sarah@acme.com
|
|
160
|
+
Cc: mike@acme.com
|
|
161
|
+
Subject: Q4 budget draft
|
|
162
|
+
|
|
163
|
+
Hi Sarah, attached is the budget draft. Please review the travel section.
|
|
164
|
+
Best regards,
|
|
165
|
+
John`,
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
id: "communication-2",
|
|
169
|
+
path: "messages/reply-thread.txt",
|
|
170
|
+
expected: "communication",
|
|
171
|
+
content: `On Mon, Nov 3, Sarah Lee wrote:
|
|
172
|
+
> Can you send the updated numbers?
|
|
173
|
+
|
|
174
|
+
Hi Sarah, sure — forwarded them just now. Thanks for the reminder.
|
|
175
|
+
Regards, John (cc: mike@acme.com)`,
|
|
176
|
+
},
|
|
177
|
+
// ── documentation ──────────────────────────────────────────────────────
|
|
178
|
+
{
|
|
179
|
+
id: "documentation-1",
|
|
180
|
+
path: "README.md",
|
|
181
|
+
expected: "documentation",
|
|
182
|
+
content: `# Wanshi
|
|
183
|
+
|
|
184
|
+
Turns files into knowledge graphs.
|
|
185
|
+
|
|
186
|
+
## Installation
|
|
187
|
+
\`\`\`bash
|
|
188
|
+
npm install -g wanshi
|
|
189
|
+
\`\`\`
|
|
190
|
+
|
|
191
|
+
## Getting Started
|
|
192
|
+
To get started, run the CLI. You can configure the output format in config.yaml.`,
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
id: "documentation-2",
|
|
196
|
+
path: "docs/api-guide.md",
|
|
197
|
+
expected: "documentation",
|
|
198
|
+
content: `## API Reference
|
|
199
|
+
This guide shows how to use the endpoints. Follow these steps to authenticate.
|
|
200
|
+
|
|
201
|
+
\`\`\`bash
|
|
202
|
+
GET /graph
|
|
203
|
+
\`\`\`
|
|
204
|
+
|
|
205
|
+
You need to set the API key first. See the configuration section for parameters.`,
|
|
206
|
+
},
|
|
207
|
+
// ── technical ──────────────────────────────────────────────────────────
|
|
208
|
+
{
|
|
209
|
+
id: "technical-1",
|
|
210
|
+
path: "logs/app.log",
|
|
211
|
+
expected: "technical",
|
|
212
|
+
content: `2023-11-03T09:12:44.512Z INFO server started on port 8080
|
|
213
|
+
2023-11-03T09:12:45.001Z WARN cache miss for key user:42
|
|
214
|
+
2023-11-03T09:12:46.220Z ERROR connection timeout to database after 5000ms
|
|
215
|
+
2023-11-03T09:12:47.330Z INFO retrying connection (attempt 2)`,
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
id: "technical-2",
|
|
219
|
+
path: "config/server.yaml",
|
|
220
|
+
expected: "technical",
|
|
221
|
+
content: `host: 0.0.0.0
|
|
222
|
+
port: 8080
|
|
223
|
+
timeout: 30
|
|
224
|
+
database:
|
|
225
|
+
url: postgres://localhost/app
|
|
226
|
+
pool: 10
|
|
227
|
+
cache:
|
|
228
|
+
ttl: 300`,
|
|
229
|
+
},
|
|
230
|
+
// ── narrative ──────────────────────────────────────────────────────────
|
|
231
|
+
{
|
|
232
|
+
id: "narrative-1",
|
|
233
|
+
path: "articles/ai-and-society.md",
|
|
234
|
+
expected: "narrative",
|
|
235
|
+
content: `The future of work is being reshaped by automation. However, the picture is more
|
|
236
|
+
nuanced than the headlines suggest. Furthermore, many roles will evolve rather than vanish.
|
|
237
|
+
According to recent analysis, the transition will be gradual. The story explores both sides.`,
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
id: "narrative-2",
|
|
241
|
+
path: "posts/remote-work-essay.txt",
|
|
242
|
+
expected: "narrative",
|
|
243
|
+
content: `This essay examines how remote work changed team culture. It discusses the trade-offs
|
|
244
|
+
between flexibility and connection. Therefore, companies must rethink their norms.
|
|
245
|
+
Meanwhile, employees report higher satisfaction. The article argues for a hybrid approach.`,
|
|
246
|
+
},
|
|
247
|
+
// ── reference ──────────────────────────────────────────────────────────
|
|
248
|
+
{
|
|
249
|
+
id: "reference-1",
|
|
250
|
+
path: "reference/glossary.md",
|
|
251
|
+
expected: "reference",
|
|
252
|
+
content: `Entity: a uniquely named node in the knowledge graph.
|
|
253
|
+
Observation: a provenance-stamped fact attached to an entity.
|
|
254
|
+
Relation: a typed directed edge between two entities.
|
|
255
|
+
See also: cross-reference the schema definition for each term.`,
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
id: "reference-2",
|
|
259
|
+
path: "specs/abbreviations.md",
|
|
260
|
+
expected: "reference",
|
|
261
|
+
content: `API: Application Programming Interface.
|
|
262
|
+
DOI: Digital Object Identifier.
|
|
263
|
+
KG: Knowledge Graph.
|
|
264
|
+
This catalog lists common acronyms alphabetically. Each entry is indexed by its symbol.`,
|
|
265
|
+
},
|
|
266
|
+
// ── hard / ambiguous (neutral paths, cross-cutting signals) ─────────────
|
|
267
|
+
// These are the cases that matter for S2/S3: confusable content and paths
|
|
268
|
+
// that carry no telltale extension, so the classifier must decide on content
|
|
269
|
+
// alone. The gold label is the most defensible single class, but the current
|
|
270
|
+
// heuristic is expected to confuse several of these — that is the point.
|
|
271
|
+
{
|
|
272
|
+
id: "hard-research-prose",
|
|
273
|
+
path: "notes/sparse-idea.md",
|
|
274
|
+
expected: "research",
|
|
275
|
+
content: `We hypothesize that sparse attention preserves accuracy while cutting cost.
|
|
276
|
+
Our analysis on a held-out benchmark suggests the approach generalizes. The methodology
|
|
277
|
+
extends prior work, and the experimental results beat the baseline on most metrics.`,
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
id: "hard-narrative-sciencey",
|
|
281
|
+
path: "articles/science-column.md",
|
|
282
|
+
expected: "narrative",
|
|
283
|
+
content: `The promise of fusion energy has captivated scientists for decades. According to
|
|
284
|
+
researchers, recent breakthroughs bring it closer. However, skeptics argue the timeline
|
|
285
|
+
remains uncertain. This article explores what the latest experiments mean for the public.`,
|
|
286
|
+
},
|
|
287
|
+
{
|
|
288
|
+
id: "hard-doc-plain",
|
|
289
|
+
path: "docs/overview.txt",
|
|
290
|
+
expected: "documentation",
|
|
291
|
+
content: `This page explains how to set up the tool. You can install it with the package
|
|
292
|
+
manager, then run the command. To get started, follow these steps and configure the
|
|
293
|
+
options. See the usage section for more, and refer to the tutorial for a walkthrough.`,
|
|
294
|
+
},
|
|
295
|
+
{
|
|
296
|
+
id: "hard-reference-defs",
|
|
297
|
+
path: "notes/terms.txt",
|
|
298
|
+
expected: "reference",
|
|
299
|
+
content: `Idempotent: an operation that yields the same result when applied repeatedly.
|
|
300
|
+
Latency: the time between a request and its response.
|
|
301
|
+
Throughput: the number of operations completed per unit time.
|
|
302
|
+
See also: the performance glossary for related definitions.`,
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
id: "hard-technical-plain",
|
|
306
|
+
path: "infra/setup.txt",
|
|
307
|
+
expected: "technical",
|
|
308
|
+
content: `host: api.internal
|
|
309
|
+
port: 9090
|
|
310
|
+
timeout: 60
|
|
311
|
+
The service connects to the database and the cache. On startup it loads the configuration
|
|
312
|
+
and binds to the port. Restart the daemon after changing any setting.`,
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
id: "hard-code-in-md",
|
|
316
|
+
path: "notes/snippet.md",
|
|
317
|
+
expected: "code",
|
|
318
|
+
content: `Here's the helper I wrote:
|
|
319
|
+
|
|
320
|
+
\`\`\`js
|
|
321
|
+
function debounce(fn, ms) {
|
|
322
|
+
let t;
|
|
323
|
+
return (...args) => { clearTimeout(t); t = setTimeout(() => fn(...args), ms); };
|
|
324
|
+
}
|
|
325
|
+
\`\`\``,
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
id: "hard-communication-chat",
|
|
329
|
+
path: "chat/dm.md",
|
|
330
|
+
expected: "communication",
|
|
331
|
+
content: `hey, did you get a chance to look at the proposal?
|
|
332
|
+
yeah, sent you my notes a minute ago. let me know if the budget section makes sense.
|
|
333
|
+
thanks! I'll reply after the call. cc'ing Dana so she's in the loop.`,
|
|
334
|
+
},
|
|
335
|
+
{
|
|
336
|
+
id: "hard-transcript-plain",
|
|
337
|
+
path: "audio/recording.txt",
|
|
338
|
+
expected: "transcript",
|
|
339
|
+
content: `Anna: so where did we land on the launch date?
|
|
340
|
+
Ben: I think the 15th is realistic if QA signs off.
|
|
341
|
+
Anna: okay, let's plan for that and revisit Thursday.
|
|
342
|
+
Ben: works for me. I'll update the schedule.`,
|
|
343
|
+
},
|
|
344
|
+
{
|
|
345
|
+
id: "hard-medical-prose",
|
|
346
|
+
path: "health/summary.txt",
|
|
347
|
+
expected: "medical",
|
|
348
|
+
content: `The patient has a chronic condition managed with ongoing treatment. Symptoms include
|
|
349
|
+
fatigue and elevated blood pressure. The care team adjusted the therapeutic plan and will
|
|
350
|
+
monitor the diagnosis. A follow-up visit is scheduled to review the prescription.`,
|
|
351
|
+
},
|
|
352
|
+
{
|
|
353
|
+
id: "hard-financial-prose",
|
|
354
|
+
path: "memos/quarter.txt",
|
|
355
|
+
expected: "financial",
|
|
356
|
+
content: `Revenue grew this quarter and earnings exceeded our internal forecast. The investment
|
|
357
|
+
in the new line is paying off, and the board expects the dividend to hold. Analysts remain
|
|
358
|
+
bullish on the stock despite a softer outlook for the sector.`,
|
|
359
|
+
},
|
|
360
|
+
{
|
|
361
|
+
id: "hard-legal-light",
|
|
362
|
+
path: "policy/terms.txt",
|
|
363
|
+
expected: "legal",
|
|
364
|
+
content: `By using this service you agree to these terms. The provider may terminate access for
|
|
365
|
+
breach of the conditions. Each party retains liability only as set out in this agreement.
|
|
366
|
+
Disputes are subject to the courts of the stated jurisdiction.`,
|
|
367
|
+
},
|
|
368
|
+
{
|
|
369
|
+
id: "hard-tabular-markdown",
|
|
370
|
+
path: "reports/summary.md",
|
|
371
|
+
expected: "tabular",
|
|
372
|
+
content: `| product | region | units | revenue |
|
|
373
|
+
| ------- | ------ | ----- | ------- |
|
|
374
|
+
| Widget A | EMEA | 1500 | 15000 |
|
|
375
|
+
| Widget B | APAC | 2200 | 25000 |
|
|
376
|
+
| Widget C | AMER | 900 | 9000 |`,
|
|
377
|
+
},
|
|
378
|
+
];
|
|
379
|
+
//# sourceMappingURL=labeledSamples.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"labeledSamples.js","sourceRoot":"","sources":["../../../src/evaluation/classifier/labeledSamples.ts"],"names":[],"mappings":";;;AAoBa,QAAA,eAAe,GAAoB;IAC9C,0EAA0E;IAC1E;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,6BAA6B;QACnC,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE;;;;;;;;;;EAUX;KACC;IACD;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,uBAAuB;QAC7B,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE;;;;;;;;;;;;gCAYmB;KAC7B;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,wBAAwB;QAC9B,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;kFAEqE;KAC/E;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,8BAA8B;QACpC,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;yFAE4E;KACtF;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,0BAA0B;QAChC,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;+EAEkE;KAC5E;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,2BAA2B;QACjC,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;wFAE2E;KACrF;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,eAAe;QACrB,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE;;mGAEsF;KAChG;IACD;QACE,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,wCAAwC;QAC9C,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE;;sEAEyD;KACnE;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,4BAA4B;QAClC,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE;;;iFAGoE;KAC9E;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,+BAA+B;QACrC,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE;;;8FAGiF;KAC3F;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,yBAAyB;QAC/B,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE;;;;;yCAK4B;KACtC;IACD;QACE,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,oCAAoC;QAC1C,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE;;;;4CAI+B;KACzC;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,qBAAqB;QAC3B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;;;8BAIiB;KAC3B;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,uBAAuB;QAC7B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;;;yBAIY;KACtB;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,wBAAwB;QAC9B,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;;;;;;KAOR;KACF;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,2BAA2B;QACjC,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;;;kCAIqB;KAC/B;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,WAAW;QACjB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;;;;;;;;;iFAUoE;KAC9E;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,mBAAmB;QACzB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;;;;;;iFAOoE;KAC9E;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,cAAc;QACpB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;+DAGkD;KAC5D;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;;;;;WAOF;KACR;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,4BAA4B;QAClC,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;6FAEgF;KAC1F;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,6BAA6B;QACnC,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;2FAE8E;KACxF;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,uBAAuB;QAC7B,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;+DAGkD;KAC5D;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,wBAAwB;QAC9B,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;wFAG2E;KACrF;IAED,2EAA2E;IAC3E,0EAA0E;IAC1E,6EAA6E;IAC7E,6EAA6E;IAC7E,yEAAyE;IACzE;QACE,EAAE,EAAE,qBAAqB;QACzB,IAAI,EAAE,sBAAsB;QAC5B,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE;;oFAEuE;KACjF;IACD;QACE,EAAE,EAAE,yBAAyB;QAC7B,IAAI,EAAE,4BAA4B;QAClC,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;0FAE6E;KACvF;IACD;QACE,EAAE,EAAE,gBAAgB;QACpB,IAAI,EAAE,mBAAmB;QACzB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;sFAEyE;KACnF;IACD;QACE,EAAE,EAAE,qBAAqB;QACzB,IAAI,EAAE,iBAAiB;QACvB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;4DAG+C;KACzD;IACD;QACE,EAAE,EAAE,sBAAsB;QAC1B,IAAI,EAAE,iBAAiB;QACvB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;;sEAIyD;KACnE;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,kBAAkB;QACxB,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE;;;;;;;OAON;KACJ;IACD;QACE,EAAE,EAAE,yBAAyB;QAC7B,IAAI,EAAE,YAAY;QAClB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;qEAEwD;KAClE;IACD;QACE,EAAE,EAAE,uBAAuB;QAC3B,IAAI,EAAE,qBAAqB;QAC3B,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE;;;6CAGgC;KAC1C;IACD;QACE,EAAE,EAAE,oBAAoB;QACxB,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;kFAEqE;KAC/E;IACD;QACE,EAAE,EAAE,sBAAsB;QAC1B,IAAI,EAAE,mBAAmB;QACzB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;8DAEiD;KAC3D;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,kBAAkB;QACxB,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE;;+DAEkD;KAC5D;IACD;QACE,EAAE,EAAE,uBAAuB;QAC3B,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;;;iCAIoB;KAC9B;CACF,CAAC"}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Shared scoring core for the gold-labeled two-way comparisons (wanshi vs KGGen).
|
|
3
|
+
//
|
|
4
|
+
// Dataset-agnostic: CrossRE (sentence-level, 6 domains), SemEval (sentence-level,
|
|
5
|
+
// no domains), Re-DocRED (document-level, Wikidata schema + Ign-F1). The CLI
|
|
6
|
+
// (scripts/gold-compare.ts) handles loading/extraction/caching; this module owns
|
|
7
|
+
// the metric math so there is ONE source of truth across datasets.
|
|
8
|
+
//
|
|
9
|
+
// HEADLINE = node entity-capture (semantic): every graph node → a self-triplet →
|
|
10
|
+
// matched against gold entities. Fair across open-predicate tools whose free
|
|
11
|
+
// predicates won't string/embed-match a dataset's abstract gold predicate vocab.
|
|
12
|
+
// Triplet-level entity/relation/triple F1 are also reported (understate uniformly).
|
|
13
|
+
//
|
|
14
|
+
// Optional per-call extras:
|
|
15
|
+
// - domainById → per-domain node-capture F1 (CrossRE).
|
|
16
|
+
// - ignoreKeys → Re-DocRED Ign-F1: drop train-seen (subj|pred|obj) triples from
|
|
17
|
+
// BOTH gold and predictions before the triplet-level metrics, so a
|
|
18
|
+
// tool gets no credit for memorized training facts.
|
|
19
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
20
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
21
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
22
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
23
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
24
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
25
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
26
|
+
});
|
|
27
|
+
};
|
|
28
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
+
exports.tripleKey = tripleKey;
|
|
30
|
+
exports.scoreGraph = scoreGraph;
|
|
31
|
+
exports.loadJsonl = loadJsonl;
|
|
32
|
+
exports.appendJsonl = appendJsonl;
|
|
33
|
+
const TripleMetrics_1 = require("../metrics/TripleMetrics");
|
|
34
|
+
const compareScoring_1 = require("../crossre/compareScoring");
|
|
35
|
+
const addTally = (a, b) => {
|
|
36
|
+
a.tp += b.tp;
|
|
37
|
+
a.fp += b.fp;
|
|
38
|
+
a.fn += b.fn;
|
|
39
|
+
};
|
|
40
|
+
/** Normalized (subject|predicate|object) key for train-seen-triple exclusion (Ign-F1). */
|
|
41
|
+
function tripleKey(t) {
|
|
42
|
+
return `${t.subject.trim().toLowerCase()}|${t.predicate.trim().toLowerCase()}|${t.object.trim().toLowerCase()}`;
|
|
43
|
+
}
|
|
44
|
+
function scoreGraph(ids, graphById, goldById, exact, semantic, opts) {
|
|
45
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
46
|
+
var _a, _b, _c;
|
|
47
|
+
const domainById = opts === null || opts === void 0 ? void 0 : opts.domainById;
|
|
48
|
+
const ignoreKeys = opts === null || opts === void 0 ? void 0 : opts.ignoreKeys;
|
|
49
|
+
const exactTrip = [];
|
|
50
|
+
const semTrip = [];
|
|
51
|
+
const ignExactTrip = [];
|
|
52
|
+
const ignSemTrip = [];
|
|
53
|
+
const nodeSem = { tp: 0, fp: 0, fn: 0 }; // micro-averaged across samples
|
|
54
|
+
const nodeExact = { tp: 0, fp: 0, fn: 0 };
|
|
55
|
+
const nodeByDomain = new Map();
|
|
56
|
+
let triples = 0, ents = 0;
|
|
57
|
+
for (const id of ids) {
|
|
58
|
+
const kg = (_a = graphById.get(id)) !== null && _a !== void 0 ? _a : { entities: [], relations: [] };
|
|
59
|
+
const gold = (_b = goldById.get(id)) !== null && _b !== void 0 ? _b : [];
|
|
60
|
+
const trip = (0, compareScoring_1.kgToTriplets)(kg);
|
|
61
|
+
const nodes = (0, compareScoring_1.nodeTriplets)(kg);
|
|
62
|
+
triples += trip.length;
|
|
63
|
+
ents += kg.entities.length;
|
|
64
|
+
exactTrip.push((0, TripleMetrics_1.computeExactMetrics)(trip, gold, exact));
|
|
65
|
+
semTrip.push(yield (0, TripleMetrics_1.computeSemanticMetrics)(trip, gold, semantic));
|
|
66
|
+
// Ign-F1: drop train-seen triples from both sides before scoring.
|
|
67
|
+
if (ignoreKeys) {
|
|
68
|
+
const tripF = trip.filter((t) => !ignoreKeys.has(tripleKey(t)));
|
|
69
|
+
const goldF = gold.filter((t) => !ignoreKeys.has(tripleKey(t)));
|
|
70
|
+
ignExactTrip.push((0, TripleMetrics_1.computeExactMetrics)(tripF, goldF, exact));
|
|
71
|
+
ignSemTrip.push(yield (0, TripleMetrics_1.computeSemanticMetrics)(tripF, goldF, semantic));
|
|
72
|
+
}
|
|
73
|
+
// Node entity-capture: match the full node set against gold entities.
|
|
74
|
+
const ns = yield semantic.matchEntities(nodes, gold);
|
|
75
|
+
addTally(nodeSem, ns);
|
|
76
|
+
addTally(nodeExact, exact.matchEntities(nodes, gold));
|
|
77
|
+
if (domainById) {
|
|
78
|
+
const d = (_c = domainById.get(id)) !== null && _c !== void 0 ? _c : 'unknown';
|
|
79
|
+
if (!nodeByDomain.has(d))
|
|
80
|
+
nodeByDomain.set(d, { tp: 0, fp: 0, fn: 0 });
|
|
81
|
+
addTally(nodeByDomain.get(d), ns);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
let perDomainNode;
|
|
85
|
+
if (domainById) {
|
|
86
|
+
perDomainNode = new Map();
|
|
87
|
+
for (const [d, t] of nodeByDomain)
|
|
88
|
+
perDomainNode.set(d, (0, TripleMetrics_1.computeMetrics)(t.tp, t.fp, t.fn));
|
|
89
|
+
}
|
|
90
|
+
return {
|
|
91
|
+
nodeEntitySem: (0, TripleMetrics_1.computeMetrics)(nodeSem.tp, nodeSem.fp, nodeSem.fn),
|
|
92
|
+
nodeEntityExact: (0, TripleMetrics_1.computeMetrics)(nodeExact.tp, nodeExact.fp, nodeExact.fn),
|
|
93
|
+
tripletSem: (0, TripleMetrics_1.microAverage)(semTrip),
|
|
94
|
+
tripletExact: (0, TripleMetrics_1.microAverage)(exactTrip),
|
|
95
|
+
perDomainNode,
|
|
96
|
+
ignTripletSem: ignoreKeys ? (0, TripleMetrics_1.microAverage)(ignSemTrip) : undefined,
|
|
97
|
+
ignTripletExact: ignoreKeys ? (0, TripleMetrics_1.microAverage)(ignExactTrip) : undefined,
|
|
98
|
+
triplesPer: ids.length ? triples / ids.length : 0,
|
|
99
|
+
entsPer: ids.length ? ents / ids.length : 0,
|
|
100
|
+
};
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
// ─── JSONL cache (append + load + truncation-tolerant; the CheckpointService idiom) ──
|
|
104
|
+
function loadJsonl(file, fs) {
|
|
105
|
+
const map = new Map();
|
|
106
|
+
if (!fs.existsSync(file))
|
|
107
|
+
return map;
|
|
108
|
+
for (const line of fs.readFileSync(file, 'utf-8').split('\n')) {
|
|
109
|
+
const t = line.trim();
|
|
110
|
+
if (!t)
|
|
111
|
+
continue;
|
|
112
|
+
try {
|
|
113
|
+
const rec = JSON.parse(t);
|
|
114
|
+
if (rec && rec.id !== undefined)
|
|
115
|
+
map.set(rec.id, rec);
|
|
116
|
+
}
|
|
117
|
+
catch (_a) {
|
|
118
|
+
/* tolerate a truncated final line from an interrupted write */
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return map;
|
|
122
|
+
}
|
|
123
|
+
function appendJsonl(file, rec, fs) {
|
|
124
|
+
fs.appendFileSync(file, JSON.stringify(rec) + '\n', 'utf-8');
|
|
125
|
+
}
|
|
126
|
+
//# sourceMappingURL=goldCompare.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"goldCompare.js","sourceRoot":"","sources":["../../../src/evaluation/compare/goldCompare.ts"],"names":[],"mappings":";AAAA,kFAAkF;AAClF,EAAE;AACF,kFAAkF;AAClF,6EAA6E;AAC7E,iFAAiF;AACjF,mEAAmE;AACnE,EAAE;AACF,iFAAiF;AACjF,6EAA6E;AAC7E,iFAAiF;AACjF,oFAAoF;AACpF,EAAE;AACF,4BAA4B;AAC5B,0DAA0D;AAC1D,mFAAmF;AACnF,qFAAqF;AACrF,sEAAsE;;;;;;;;;;;AA+BtE,8BAEC;AAED,gCAoEC;AAGD,8BAcC;AAED,kCAEC;AAvHD,4DAAqH;AAErH,8DAAuE;AAmBvE,MAAM,QAAQ,GAAG,CAAC,CAAQ,EAAE,CAAyC,EAAE,EAAE;IACvE,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC;IAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC;IAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC;AAC3C,CAAC,CAAC;AAEF,0FAA0F;AAC1F,SAAgB,SAAS,CAAC,CAAU;IAClC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC;AAClH,CAAC;AAED,SAAsB,UAAU,CAC9B,GAAa,EACb,SAAsC,EACtC,QAAgC,EAChC,KAAmB,EACnB,QAAyB,EACzB,IAAqE;;;QAErE,MAAM,UAAU,GAAG,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,UAAU,CAAC;QACpC,MAAM,UAAU,GAAG,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,UAAU,CAAC;QAEpC,MAAM,SAAS,GAAmB,EAAE,CAAC;QACrC,MAAM,OAAO,GAAmB,EAAE,CAAC;QACnC,MAAM,YAAY,GAAmB,EAAE,CAAC;QACxC,MAAM,UAAU,GAAmB,EAAE,CAAC;QACtC,MAAM,OAAO,GAAU,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAG,gCAAgC;QAClF,MAAM,SAAS,GAAU,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC;QACjD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAiB,CAAC;QAC9C,IAAI,OAAO,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,CAAC;QAE1B,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;YACrB,MAAM,EAAE,GAAG,MAAA,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,mCAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;YAChE,MAAM,IAAI,GAAG,MAAA,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,mCAAI,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,IAAA,6BAAY,EAAC,EAAE,CAAC,CAAC;YAC9B,MAAM,KAAK,GAAG,IAAA,6BAAY,EAAC,EAAE,CAAC,CAAC;YAC/B,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC;YACvB,IAAI,IAAI,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YAE3B,SAAS,CAAC,IAAI,CAAC,IAAA,mCAAmB,EAAC,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;YACvD,OAAO,CAAC,IAAI,CAAC,MAAM,IAAA,sCAAsB,EAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC;YAEjE,kEAAkE;YAClE,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAChE,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAChE,YAAY,CAAC,IAAI,CAAC,IAAA,mCAAmB,EAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC;gBAC5D,UAAU,CAAC,IAAI,CAAC,MAAM,IAAA,sCAAsB,EAAC,KAAK,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC;YACxE,CAAC;YAED,sEAAsE;YACtE,MAAM,EAAE,GAAG,MAAM,QAAQ,CAAC,aAAa,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;YACrD,QAAQ,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;YACtB,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,aAAa,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;YAEtD,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,MAAA,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,mCAAI,SAAS,CAAC;gBAC1C,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;oBAAE,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;gBACvE,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAED,IAAI,aAAmD,CAAC;QACxD,IAAI,UAAU,EAAE,CAAC;YACf,aAAa,GAAG,IAAI,GAAG,EAAE,CAAC;YAC1B,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,YAAY;gBAAE,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,IAAA,8BAAc,EAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5F,CAAC;QAED,OAAO;YACL,aAAa,EAAE,IAAA,8BAAc,EAAC,OAAO,CAAC,EAAE,EAAE,OAAO,CAAC,EAAE,EAAE,OAAO,CAAC,EAAE,CAAC;YACjE,eAAe,EAAE,IAAA,8BAAc,EAAC,SAAS,CAAC,EAAE,EAAE,SAAS,CAAC,EAAE,EAAE,SAAS,CAAC,EAAE,CAAC;YACzE,UAAU,EAAE,IAAA,4BAAY,EAAC,OAAO,CAAC;YACjC,YAAY,EAAE,IAAA,4BAAY,EAAC,SAAS,CAAC;YACrC,aAAa;YACb,aAAa,EAAE,UAAU,CAAC,CAAC,CAAC,IAAA,4BAAY,EAAC,UAAU,CAAC,CAAC,CAAC,CAAC,SAAS;YAChE,eAAe,EAAE,UAAU,CAAC,CAAC,CAAC,IAAA,4BAAY,EAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS;YACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YACjD,OAAO,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;SAC5C,CAAC;IACJ,CAAC;CAAA;AAED,wFAAwF;AACxF,SAAgB,SAAS,CAAU,IAAY,EAAE,EAAuB;IACtE,MAAM,GAAG,GAAG,IAAI,GAAG,EAAa,CAAC;IACjC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,GAAG,CAAC;IACrC,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9D,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC;YAAE,SAAS;QACjB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,GAAG,IAAI,GAAG,CAAC,EAAE,KAAK,SAAS;gBAAE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;QACxD,CAAC;QAAC,WAAM,CAAC;YACP,+DAA+D;QACjE,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAgB,WAAW,CAAC,IAAY,EAAE,GAAY,EAAE,EAAuB;IAC7E,EAAE,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAC/D,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.kgToTriplets = kgToTriplets;
|
|
4
|
+
exports.nodeTriplets = nodeTriplets;
|
|
5
|
+
/**
|
|
6
|
+
* Flatten a KnowledgeGraph's relations into triplets — one per relationType label.
|
|
7
|
+
* An empty/blank relationType falls back to `related to`, mirroring
|
|
8
|
+
* `MineDataset.toGraph` (which gives KGGen's empty edges the same label). Without
|
|
9
|
+
* this, wanshi relations that came back with `relationType: []` would silently
|
|
10
|
+
* vanish from the triplet metrics while KGGen's empty edges survive — an asymmetry
|
|
11
|
+
* that would understate wanshi on the relation/triple levels.
|
|
12
|
+
*/
|
|
13
|
+
function kgToTriplets(kg) {
|
|
14
|
+
return kg.relations.flatMap((r) => {
|
|
15
|
+
const labels = r.relationType.length ? r.relationType : ['related to'];
|
|
16
|
+
return labels.map((rel) => ({ subject: r.from, predicate: rel || 'related to', object: r.to }));
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Represent each graph NODE as a self-triplet so the existing
|
|
21
|
+
* Exact/SemanticMatcher.matchEntities (which reads subjects ∪ objects) measures
|
|
22
|
+
* entity-capture over the full node set — NOT just relation endpoints. This is the
|
|
23
|
+
* fair cross-tool headline: "did the tool recover the gold entities at all",
|
|
24
|
+
* independent of whether it also drew an edge between them (wanshi is edge-sparse,
|
|
25
|
+
* so scoring entities only via relation endpoints would understate it).
|
|
26
|
+
*/
|
|
27
|
+
function nodeTriplets(kg) {
|
|
28
|
+
return kg.entities.map((e) => ({ subject: e.name, predicate: '', object: e.name }));
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=compareScoring.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compareScoring.js","sourceRoot":"","sources":["../../../src/evaluation/crossre/compareScoring.ts"],"names":[],"mappings":";;AAWA,oCAKC;AAUD,oCAEC;AAzBD;;;;;;;GAOG;AACH,SAAgB,YAAY,CAAC,EAAkB;IAC7C,OAAO,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;QAChC,MAAM,MAAM,GAAG,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;QACvE,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,IAAI,YAAY,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAClG,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;GAOG;AACH,SAAgB,YAAY,CAAC,EAAkB;IAC7C,OAAO,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;AACtF,CAAC"}
|