@wanshi-kg/wanshi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +458 -0
  3. package/dist/__tests__/helpers.js +27 -0
  4. package/dist/__tests__/helpers.js.map +1 -0
  5. package/dist/cli/commands/export.command.js +99 -0
  6. package/dist/cli/commands/export.command.js.map +1 -0
  7. package/dist/cli/commands/index.js +22 -0
  8. package/dist/cli/commands/index.js.map +1 -0
  9. package/dist/cli/commands/inspectMerges.command.js +84 -0
  10. package/dist/cli/commands/inspectMerges.command.js.map +1 -0
  11. package/dist/cli/commands/metrics.command.js +196 -0
  12. package/dist/cli/commands/metrics.command.js.map +1 -0
  13. package/dist/cli/commands/process.command.js +82 -0
  14. package/dist/cli/commands/process.command.js.map +1 -0
  15. package/dist/cli/commands/watch.command.js +91 -0
  16. package/dist/cli/commands/watch.command.js.map +1 -0
  17. package/dist/cli/index.js +269 -0
  18. package/dist/cli/index.js.map +1 -0
  19. package/dist/cli/optionsToConfig.js +160 -0
  20. package/dist/cli/optionsToConfig.js.map +1 -0
  21. package/dist/config/index.js +59 -0
  22. package/dist/config/index.js.map +1 -0
  23. package/dist/config/legacyHints.js +113 -0
  24. package/dist/config/legacyHints.js.map +1 -0
  25. package/dist/config/schema.js +803 -0
  26. package/dist/config/schema.js.map +1 -0
  27. package/dist/config/ui.js +221 -0
  28. package/dist/config/ui.js.map +1 -0
  29. package/dist/core/DirectoryProcessor.js +725 -0
  30. package/dist/core/DirectoryProcessor.js.map +1 -0
  31. package/dist/core/adapters/IStructuredAdapter.js +3 -0
  32. package/dist/core/adapters/IStructuredAdapter.js.map +1 -0
  33. package/dist/core/adapters/SqliteAdapter.js +267 -0
  34. package/dist/core/adapters/SqliteAdapter.js.map +1 -0
  35. package/dist/core/adapters/StructuredAdapterRegistry.js +31 -0
  36. package/dist/core/adapters/StructuredAdapterRegistry.js.map +1 -0
  37. package/dist/core/adapters/index.js +20 -0
  38. package/dist/core/adapters/index.js.map +1 -0
  39. package/dist/core/checkpoint/CheckpointService.js +188 -0
  40. package/dist/core/checkpoint/CheckpointService.js.map +1 -0
  41. package/dist/core/checkpoint/index.js +18 -0
  42. package/dist/core/checkpoint/index.js.map +1 -0
  43. package/dist/core/corpus/CorpusAnalyzer.js +266 -0
  44. package/dist/core/corpus/CorpusAnalyzer.js.map +1 -0
  45. package/dist/core/corpus/CorpusProfileStore.js +92 -0
  46. package/dist/core/corpus/CorpusProfileStore.js.map +1 -0
  47. package/dist/core/corpus/index.js +21 -0
  48. package/dist/core/corpus/index.js.map +1 -0
  49. package/dist/core/corpus/normalizeGlossary.js +60 -0
  50. package/dist/core/corpus/normalizeGlossary.js.map +1 -0
  51. package/dist/core/corpus/relPath.js +52 -0
  52. package/dist/core/corpus/relPath.js.map +1 -0
  53. package/dist/core/corpus/termFrequency.js +86 -0
  54. package/dist/core/corpus/termFrequency.js.map +1 -0
  55. package/dist/core/cost/CostMeter.js +235 -0
  56. package/dist/core/cost/CostMeter.js.map +1 -0
  57. package/dist/core/cost/index.js +19 -0
  58. package/dist/core/cost/index.js.map +1 -0
  59. package/dist/core/cost/prices.js +38 -0
  60. package/dist/core/cost/prices.js.map +1 -0
  61. package/dist/core/cv/ObjectDetectionService.js +119 -0
  62. package/dist/core/cv/ObjectDetectionService.js.map +1 -0
  63. package/dist/core/di/ContainerFactory.js +670 -0
  64. package/dist/core/di/ContainerFactory.js.map +1 -0
  65. package/dist/core/di/DIContainer.js +103 -0
  66. package/dist/core/di/DIContainer.js.map +1 -0
  67. package/dist/core/di/index.js +19 -0
  68. package/dist/core/di/index.js.map +1 -0
  69. package/dist/core/errors/CustomErrors.js +342 -0
  70. package/dist/core/errors/CustomErrors.js.map +1 -0
  71. package/dist/core/errors/index.js +18 -0
  72. package/dist/core/errors/index.js.map +1 -0
  73. package/dist/core/export/KnowledgeGraphExportService.js +56 -0
  74. package/dist/core/export/KnowledgeGraphExportService.js.map +1 -0
  75. package/dist/core/export/index.js +19 -0
  76. package/dist/core/export/index.js.map +1 -0
  77. package/dist/core/export/strategies/GraphitiExportStrategy.js +115 -0
  78. package/dist/core/export/strategies/GraphitiExportStrategy.js.map +1 -0
  79. package/dist/core/export/strategies/GraphvizDotExportStrategy.js +331 -0
  80. package/dist/core/export/strategies/GraphvizDotExportStrategy.js.map +1 -0
  81. package/dist/core/export/strategies/IExportStrategy.js +3 -0
  82. package/dist/core/export/strategies/IExportStrategy.js.map +1 -0
  83. package/dist/core/export/strategies/JsonExportStrategy.js +19 -0
  84. package/dist/core/export/strategies/JsonExportStrategy.js.map +1 -0
  85. package/dist/core/export/strategies/JsonlExportStrategy.js +69 -0
  86. package/dist/core/export/strategies/JsonlExportStrategy.js.map +1 -0
  87. package/dist/core/export/strategies/KblamExportStrategy.js +36 -0
  88. package/dist/core/export/strategies/KblamExportStrategy.js.map +1 -0
  89. package/dist/core/export/strategies/LoraExportStrategy.js +46 -0
  90. package/dist/core/export/strategies/LoraExportStrategy.js.map +1 -0
  91. package/dist/core/export/strategies/McpExportStrategy.js +67 -0
  92. package/dist/core/export/strategies/McpExportStrategy.js.map +1 -0
  93. package/dist/core/export/strategies/index.js +25 -0
  94. package/dist/core/export/strategies/index.js.map +1 -0
  95. package/dist/core/export/strategies/kbTriples.js +60 -0
  96. package/dist/core/export/strategies/kbTriples.js.map +1 -0
  97. package/dist/core/index.js +22 -0
  98. package/dist/core/index.js.map +1 -0
  99. package/dist/core/knowledge/KnowledgeGraphBuilder.js +627 -0
  100. package/dist/core/knowledge/KnowledgeGraphBuilder.js.map +1 -0
  101. package/dist/core/knowledge/MergeRecord.js +3 -0
  102. package/dist/core/knowledge/MergeRecord.js.map +1 -0
  103. package/dist/core/knowledge/canon/Canonicalizer.js +414 -0
  104. package/dist/core/knowledge/canon/Canonicalizer.js.map +1 -0
  105. package/dist/core/knowledge/canon/index.js +18 -0
  106. package/dist/core/knowledge/canon/index.js.map +1 -0
  107. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js +92 -0
  108. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js.map +1 -0
  109. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js +52 -0
  110. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js.map +1 -0
  111. package/dist/core/knowledge/contradiction/index.js +19 -0
  112. package/dist/core/knowledge/contradiction/index.js.map +1 -0
  113. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js +33 -0
  114. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js.map +1 -0
  115. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js +82 -0
  116. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js.map +1 -0
  117. package/dist/core/knowledge/grounding/index.js +20 -0
  118. package/dist/core/knowledge/grounding/index.js.map +1 -0
  119. package/dist/core/knowledge/grounding/verbalize.js +38 -0
  120. package/dist/core/knowledge/grounding/verbalize.js.map +1 -0
  121. package/dist/core/knowledge/images/imageMetaGraph.js +136 -0
  122. package/dist/core/knowledge/images/imageMetaGraph.js.map +1 -0
  123. package/dist/core/knowledge/index.js +20 -0
  124. package/dist/core/knowledge/index.js.map +1 -0
  125. package/dist/core/knowledge/merging/KnowledgeMerger.js +624 -0
  126. package/dist/core/knowledge/merging/KnowledgeMerger.js.map +1 -0
  127. package/dist/core/knowledge/references/ReferenceResolver.js +184 -0
  128. package/dist/core/knowledge/references/ReferenceResolver.js.map +1 -0
  129. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js +401 -0
  130. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js.map +1 -0
  131. package/dist/core/knowledge/references/citations/CitationResolver.js +95 -0
  132. package/dist/core/knowledge/references/citations/CitationResolver.js.map +1 -0
  133. package/dist/core/knowledge/references/citations/GrobidClient.js +143 -0
  134. package/dist/core/knowledge/references/citations/GrobidClient.js.map +1 -0
  135. package/dist/core/knowledge/references/citations/TitleIdResolver.js +101 -0
  136. package/dist/core/knowledge/references/citations/TitleIdResolver.js.map +1 -0
  137. package/dist/core/knowledge/references/web/FetchCacheService.js +114 -0
  138. package/dist/core/knowledge/references/web/FetchCacheService.js.map +1 -0
  139. package/dist/core/knowledge/references/web/GatedFetcher.js +228 -0
  140. package/dist/core/knowledge/references/web/GatedFetcher.js.map +1 -0
  141. package/dist/core/knowledge/references/web/WebReferenceProcessor.js +164 -0
  142. package/dist/core/knowledge/references/web/WebReferenceProcessor.js.map +1 -0
  143. package/dist/core/knowledge/search/KnowledgeGraphSearch.js +261 -0
  144. package/dist/core/knowledge/search/KnowledgeGraphSearch.js.map +1 -0
  145. package/dist/core/knowledge/vocabulary.js +162 -0
  146. package/dist/core/knowledge/vocabulary.js.map +1 -0
  147. package/dist/core/llm/EmbeddingService.js +113 -0
  148. package/dist/core/llm/EmbeddingService.js.map +1 -0
  149. package/dist/core/llm/OllamaService.js +146 -0
  150. package/dist/core/llm/OllamaService.js.map +1 -0
  151. package/dist/core/llm/OpenAICompatibleService.js +190 -0
  152. package/dist/core/llm/OpenAICompatibleService.js.map +1 -0
  153. package/dist/core/llm/OpenAIEmbeddingService.js +129 -0
  154. package/dist/core/llm/OpenAIEmbeddingService.js.map +1 -0
  155. package/dist/core/llm/embeddingUtils.js +25 -0
  156. package/dist/core/llm/embeddingUtils.js.map +1 -0
  157. package/dist/core/llm/index.js +23 -0
  158. package/dist/core/llm/index.js.map +1 -0
  159. package/dist/core/llm/prompts/PromptManager.js +388 -0
  160. package/dist/core/llm/prompts/PromptManager.js.map +1 -0
  161. package/dist/core/llm/prompts/PromptTemplateEngine.js +257 -0
  162. package/dist/core/llm/prompts/PromptTemplateEngine.js.map +1 -0
  163. package/dist/core/llm/prompts/templates/partials/examples/EXAMPLE_STYLE_GUIDE.md +84 -0
  164. package/dist/core/llm/prompts/templates/partials/examples/article.md +187 -0
  165. package/dist/core/llm/prompts/templates/partials/examples/code.md +229 -0
  166. package/dist/core/llm/prompts/templates/partials/examples/communication.md +205 -0
  167. package/dist/core/llm/prompts/templates/partials/examples/documentation.md +262 -0
  168. package/dist/core/llm/prompts/templates/partials/examples/financial.md +157 -0
  169. package/dist/core/llm/prompts/templates/partials/examples/legal.md +153 -0
  170. package/dist/core/llm/prompts/templates/partials/examples/logs.md +127 -0
  171. package/dist/core/llm/prompts/templates/partials/examples/medical.md +218 -0
  172. package/dist/core/llm/prompts/templates/partials/examples/notes.md +201 -0
  173. package/dist/core/llm/prompts/templates/partials/examples/research.md +208 -0
  174. package/dist/core/llm/prompts/templates/partials/examples/tabular.md +178 -0
  175. package/dist/core/llm/prompts/templates/partials/examples/transcript.md +204 -0
  176. package/dist/core/llm/prompts/templates/partials/retrieved-context.hbs +18 -0
  177. package/dist/core/llm/prompts/templates/v1/system.hbs +371 -0
  178. package/dist/core/llm/prompts/templates/v1/user.hbs +20 -0
  179. package/dist/core/llm/prompts/templates/v2/system.hbs +573 -0
  180. package/dist/core/llm/prompts/templates/v2/user.hbs +20 -0
  181. package/dist/core/llm/prompts/templates/v3/system.hbs +861 -0
  182. package/dist/core/llm/prompts/templates/v3/user.hbs +16 -0
  183. package/dist/core/llm/prompts/templates/v4/system.hbs +800 -0
  184. package/dist/core/llm/prompts/templates/v4/user.hbs +40 -0
  185. package/dist/core/llm/prompts/templates/v4.5/system.hbs +71 -0
  186. package/dist/core/llm/prompts/templates/v4.5/user.hbs +46 -0
  187. package/dist/core/llm/prompts/templates/v5/glossary/system.hbs +40 -0
  188. package/dist/core/llm/prompts/templates/v5/glossary/user.hbs +11 -0
  189. package/dist/core/llm/prompts/templates/v5/system.hbs +163 -0
  190. package/dist/core/llm/prompts/templates/v5/user.hbs +55 -0
  191. package/dist/core/pipeline/GroundingTransform.js +52 -0
  192. package/dist/core/pipeline/GroundingTransform.js.map +1 -0
  193. package/dist/core/pipeline/PipelineRunner.js +51 -0
  194. package/dist/core/pipeline/PipelineRunner.js.map +1 -0
  195. package/dist/core/pipeline/RelationFilterTransform.js +72 -0
  196. package/dist/core/pipeline/RelationFilterTransform.js.map +1 -0
  197. package/dist/core/pipeline/index.js +20 -0
  198. package/dist/core/pipeline/index.js.map +1 -0
  199. package/dist/core/processor/FileProcessor.js +184 -0
  200. package/dist/core/processor/FileProcessor.js.map +1 -0
  201. package/dist/core/processor/ProcessedRegistry.js +38 -0
  202. package/dist/core/processor/ProcessedRegistry.js.map +1 -0
  203. package/dist/core/processor/ast/AstSeedService.js +0 -0
  204. package/dist/core/processor/ast/AstSeedService.js.map +1 -0
  205. package/dist/core/processor/ast/AstSymbolStore.js +110 -0
  206. package/dist/core/processor/ast/AstSymbolStore.js.map +1 -0
  207. package/dist/core/processor/ast/index.js +19 -0
  208. package/dist/core/processor/ast/index.js.map +1 -0
  209. package/dist/core/processor/chunking/TextChunker.js +98 -0
  210. package/dist/core/processor/chunking/TextChunker.js.map +1 -0
  211. package/dist/core/processor/chunking/index.js +18 -0
  212. package/dist/core/processor/chunking/index.js.map +1 -0
  213. package/dist/core/processor/classifier/CONTENT_CLASSES.js +294 -0
  214. package/dist/core/processor/classifier/CONTENT_CLASSES.js.map +1 -0
  215. package/dist/core/processor/classifier/CascadeContentClassifier.js +107 -0
  216. package/dist/core/processor/classifier/CascadeContentClassifier.js.map +1 -0
  217. package/dist/core/processor/classifier/HeuristicContentClassifier.js +113 -0
  218. package/dist/core/processor/classifier/HeuristicContentClassifier.js.map +1 -0
  219. package/dist/core/processor/classifier/IContentTypeClassifier.js +3 -0
  220. package/dist/core/processor/classifier/IContentTypeClassifier.js.map +1 -0
  221. package/dist/core/processor/classifier/LlmContentClassifier.js +107 -0
  222. package/dist/core/processor/classifier/LlmContentClassifier.js.map +1 -0
  223. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js +498 -0
  224. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js.map +1 -0
  225. package/dist/core/processor/classifier/index.js +21 -0
  226. package/dist/core/processor/classifier/index.js.map +1 -0
  227. package/dist/core/processor/classifier/mergeClassifications.js +32 -0
  228. package/dist/core/processor/classifier/mergeClassifications.js.map +1 -0
  229. package/dist/core/processor/index.js +20 -0
  230. package/dist/core/processor/index.js.map +1 -0
  231. package/dist/core/processor/readers/AudioReader.js +462 -0
  232. package/dist/core/processor/readers/AudioReader.js.map +1 -0
  233. package/dist/core/processor/readers/BinaryReader.js +90 -0
  234. package/dist/core/processor/readers/BinaryReader.js.map +1 -0
  235. package/dist/core/processor/readers/ChandraPdfReader.js +187 -0
  236. package/dist/core/processor/readers/ChandraPdfReader.js.map +1 -0
  237. package/dist/core/processor/readers/ChatExportReader.js +365 -0
  238. package/dist/core/processor/readers/ChatExportReader.js.map +1 -0
  239. package/dist/core/processor/readers/DoclingReader.js +445 -0
  240. package/dist/core/processor/readers/DoclingReader.js.map +1 -0
  241. package/dist/core/processor/readers/EmailReader.js +259 -0
  242. package/dist/core/processor/readers/EmailReader.js.map +1 -0
  243. package/dist/core/processor/readers/EpubReader.js +175 -0
  244. package/dist/core/processor/readers/EpubReader.js.map +1 -0
  245. package/dist/core/processor/readers/FileReader.js +90 -0
  246. package/dist/core/processor/readers/FileReader.js.map +1 -0
  247. package/dist/core/processor/readers/FileReaderFactory.js +49 -0
  248. package/dist/core/processor/readers/FileReaderFactory.js.map +1 -0
  249. package/dist/core/processor/readers/HtmlReader.js +371 -0
  250. package/dist/core/processor/readers/HtmlReader.js.map +1 -0
  251. package/dist/core/processor/readers/ImageReader.js +162 -0
  252. package/dist/core/processor/readers/ImageReader.js.map +1 -0
  253. package/dist/core/processor/readers/JsonFileReader.js +232 -0
  254. package/dist/core/processor/readers/JsonFileReader.js.map +1 -0
  255. package/dist/core/processor/readers/JupyterReader.js +178 -0
  256. package/dist/core/processor/readers/JupyterReader.js.map +1 -0
  257. package/dist/core/processor/readers/LatexReader.js +176 -0
  258. package/dist/core/processor/readers/LatexReader.js.map +1 -0
  259. package/dist/core/processor/readers/MarkdownReader.js +289 -0
  260. package/dist/core/processor/readers/MarkdownReader.js.map +1 -0
  261. package/dist/core/processor/readers/MarkerPdfReader.js +193 -0
  262. package/dist/core/processor/readers/MarkerPdfReader.js.map +1 -0
  263. package/dist/core/processor/readers/MistralOcrReader.js +198 -0
  264. package/dist/core/processor/readers/MistralOcrReader.js.map +1 -0
  265. package/dist/core/processor/readers/OfficeReader.js +174 -0
  266. package/dist/core/processor/readers/OfficeReader.js.map +1 -0
  267. package/dist/core/processor/readers/PdfReader.js +116 -0
  268. package/dist/core/processor/readers/PdfReader.js.map +1 -0
  269. package/dist/core/processor/readers/RtfReader.js +107 -0
  270. package/dist/core/processor/readers/RtfReader.js.map +1 -0
  271. package/dist/core/processor/readers/SubtitleReader.js +145 -0
  272. package/dist/core/processor/readers/SubtitleReader.js.map +1 -0
  273. package/dist/core/processor/readers/TesseractPdfReader.js +183 -0
  274. package/dist/core/processor/readers/TesseractPdfReader.js.map +1 -0
  275. package/dist/core/processor/readers/TextReader.js +129 -0
  276. package/dist/core/processor/readers/TextReader.js.map +1 -0
  277. package/dist/core/processor/readers/TranscriptReader.js +234 -0
  278. package/dist/core/processor/readers/TranscriptReader.js.map +1 -0
  279. package/dist/core/processor/readers/image/imageMetadata.js +155 -0
  280. package/dist/core/processor/readers/image/imageMetadata.js.map +1 -0
  281. package/dist/core/processor/readers/index.js +41 -0
  282. package/dist/core/processor/readers/index.js.map +1 -0
  283. package/dist/core/processor/readers/referenceExtraction.js +198 -0
  284. package/dist/core/processor/readers/referenceExtraction.js.map +1 -0
  285. package/dist/core/processor/readers/stripReferences.js +59 -0
  286. package/dist/core/processor/readers/stripReferences.js.map +1 -0
  287. package/dist/core/processor/readers/transcript/turnPacking.js +81 -0
  288. package/dist/core/processor/readers/transcript/turnPacking.js.map +1 -0
  289. package/dist/core/progress/NdjsonProgressEmitter.js +30 -0
  290. package/dist/core/progress/NdjsonProgressEmitter.js.map +1 -0
  291. package/dist/core/progress/NoopProgressEmitter.js +15 -0
  292. package/dist/core/progress/NoopProgressEmitter.js.map +1 -0
  293. package/dist/core/progress/index.js +19 -0
  294. package/dist/core/progress/index.js.map +1 -0
  295. package/dist/core/trace/TraceWriter.js +100 -0
  296. package/dist/core/trace/TraceWriter.js.map +1 -0
  297. package/dist/core/trace/events.js +13 -0
  298. package/dist/core/trace/events.js.map +1 -0
  299. package/dist/core/trace/index.js +20 -0
  300. package/dist/core/trace/index.js.map +1 -0
  301. package/dist/core/trace/lineage.js +97 -0
  302. package/dist/core/trace/lineage.js.map +1 -0
  303. package/dist/evaluation/BenchmarkRunner.js +171 -0
  304. package/dist/evaluation/BenchmarkRunner.js.map +1 -0
  305. package/dist/evaluation/classifier/ClassifierAccuracy.js +185 -0
  306. package/dist/evaluation/classifier/ClassifierAccuracy.js.map +1 -0
  307. package/dist/evaluation/classifier/labeledSamples.js +379 -0
  308. package/dist/evaluation/classifier/labeledSamples.js.map +1 -0
  309. package/dist/evaluation/compare/goldCompare.js +126 -0
  310. package/dist/evaluation/compare/goldCompare.js.map +1 -0
  311. package/dist/evaluation/crossre/compareScoring.js +30 -0
  312. package/dist/evaluation/crossre/compareScoring.js.map +1 -0
  313. package/dist/evaluation/datasets/CrossREDataset.js +170 -0
  314. package/dist/evaluation/datasets/CrossREDataset.js.map +1 -0
  315. package/dist/evaluation/datasets/IDataset.js +3 -0
  316. package/dist/evaluation/datasets/IDataset.js.map +1 -0
  317. package/dist/evaluation/datasets/RebelDataset.js +117 -0
  318. package/dist/evaluation/datasets/RebelDataset.js.map +1 -0
  319. package/dist/evaluation/datasets/RedocredDataset.js +218 -0
  320. package/dist/evaluation/datasets/RedocredDataset.js.map +1 -0
  321. package/dist/evaluation/datasets/SemEval2010Dataset.js +150 -0
  322. package/dist/evaluation/datasets/SemEval2010Dataset.js.map +1 -0
  323. package/dist/evaluation/index.js +33 -0
  324. package/dist/evaluation/index.js.map +1 -0
  325. package/dist/evaluation/matching/ExactMatcher.js +75 -0
  326. package/dist/evaluation/matching/ExactMatcher.js.map +1 -0
  327. package/dist/evaluation/matching/SemanticMatcher.js +143 -0
  328. package/dist/evaluation/matching/SemanticMatcher.js.map +1 -0
  329. package/dist/evaluation/metrics/TripleMetrics.js +64 -0
  330. package/dist/evaluation/metrics/TripleMetrics.js.map +1 -0
  331. package/dist/evaluation/mine/MineCheckpoint.js +114 -0
  332. package/dist/evaluation/mine/MineCheckpoint.js.map +1 -0
  333. package/dist/evaluation/mine/MineDataset.js +208 -0
  334. package/dist/evaluation/mine/MineDataset.js.map +1 -0
  335. package/dist/evaluation/mine/MineReporter.js +98 -0
  336. package/dist/evaluation/mine/MineReporter.js.map +1 -0
  337. package/dist/evaluation/mine/MineRunner.js +148 -0
  338. package/dist/evaluation/mine/MineRunner.js.map +1 -0
  339. package/dist/evaluation/mine/MineScorer.js +127 -0
  340. package/dist/evaluation/mine/MineScorer.js.map +1 -0
  341. package/dist/evaluation/mine/types.js +12 -0
  342. package/dist/evaluation/mine/types.js.map +1 -0
  343. package/dist/evaluation/reporters/ConsoleReporter.js +55 -0
  344. package/dist/evaluation/reporters/ConsoleReporter.js.map +1 -0
  345. package/dist/evaluation/reporters/JsonReporter.js +50 -0
  346. package/dist/evaluation/reporters/JsonReporter.js.map +1 -0
  347. package/dist/index.js +28 -0
  348. package/dist/index.js.map +1 -0
  349. package/dist/quality/CompositeScore.js +61 -0
  350. package/dist/quality/CompositeScore.js.map +1 -0
  351. package/dist/quality/ConsistencyMetrics.js +70 -0
  352. package/dist/quality/ConsistencyMetrics.js.map +1 -0
  353. package/dist/quality/FactualMetrics.js +76 -0
  354. package/dist/quality/FactualMetrics.js.map +1 -0
  355. package/dist/quality/GraphHealthMetrics.js +68 -0
  356. package/dist/quality/GraphHealthMetrics.js.map +1 -0
  357. package/dist/quality/SemanticMetrics.js +102 -0
  358. package/dist/quality/SemanticMetrics.js.map +1 -0
  359. package/dist/quality/StructuralMetrics.js +60 -0
  360. package/dist/quality/StructuralMetrics.js.map +1 -0
  361. package/dist/quality/index.js +23 -0
  362. package/dist/quality/index.js.map +1 -0
  363. package/dist/shared/index.js +20 -0
  364. package/dist/shared/index.js.map +1 -0
  365. package/dist/shared/logger/Logger.js +3 -0
  366. package/dist/shared/logger/Logger.js.map +1 -0
  367. package/dist/shared/logger/LoggerFactory.js +75 -0
  368. package/dist/shared/logger/LoggerFactory.js.map +1 -0
  369. package/dist/shared/logger/index.js +19 -0
  370. package/dist/shared/logger/index.js.map +1 -0
  371. package/dist/shared/shutdown.js +30 -0
  372. package/dist/shared/shutdown.js.map +1 -0
  373. package/dist/shared/utils/agglomerativeCluster.js +269 -0
  374. package/dist/shared/utils/agglomerativeCluster.js.map +1 -0
  375. package/dist/shared/utils/astSymbols.js +69 -0
  376. package/dist/shared/utils/astSymbols.js.map +1 -0
  377. package/dist/shared/utils/cosineSimilarity.js +18 -0
  378. package/dist/shared/utils/cosineSimilarity.js.map +1 -0
  379. package/dist/shared/utils/directoryTree.js +184 -0
  380. package/dist/shared/utils/directoryTree.js.map +1 -0
  381. package/dist/shared/utils/documentOutline.js +74 -0
  382. package/dist/shared/utils/documentOutline.js.map +1 -0
  383. package/dist/shared/utils/index.js +24 -0
  384. package/dist/shared/utils/index.js.map +1 -0
  385. package/dist/shared/utils/jaroWinklerSimilarity.js +60 -0
  386. package/dist/shared/utils/jaroWinklerSimilarity.js.map +1 -0
  387. package/dist/shared/utils/parseJsonLenient.js +27 -0
  388. package/dist/shared/utils/parseJsonLenient.js.map +1 -0
  389. package/dist/shared/utils/readConfig.js +42 -0
  390. package/dist/shared/utils/readConfig.js.map +1 -0
  391. package/dist/shared/utils/readRtf.js +216 -0
  392. package/dist/shared/utils/readRtf.js.map +1 -0
  393. package/dist/shared/utils/softmax.js +26 -0
  394. package/dist/shared/utils/softmax.js.map +1 -0
  395. package/dist/types/ContentClass.js +3 -0
  396. package/dist/types/ContentClass.js.map +1 -0
  397. package/dist/types/CorpusProfile.js +3 -0
  398. package/dist/types/CorpusProfile.js.map +1 -0
  399. package/dist/types/IContradictionChecker.js +3 -0
  400. package/dist/types/IContradictionChecker.js.map +1 -0
  401. package/dist/types/ICorpusAnalyzer.js +3 -0
  402. package/dist/types/ICorpusAnalyzer.js.map +1 -0
  403. package/dist/types/IDirectoryProcessor.js +3 -0
  404. package/dist/types/IDirectoryProcessor.js.map +1 -0
  405. package/dist/types/IEmbeddingProvider.js +3 -0
  406. package/dist/types/IEmbeddingProvider.js.map +1 -0
  407. package/dist/types/IEmbeddingService.js +6 -0
  408. package/dist/types/IEmbeddingService.js.map +1 -0
  409. package/dist/types/IFileProcessor.js +3 -0
  410. package/dist/types/IFileProcessor.js.map +1 -0
  411. package/dist/types/IGroundingChecker.js +3 -0
  412. package/dist/types/IGroundingChecker.js.map +1 -0
  413. package/dist/types/IKnowledgeGraphBuilder.js +3 -0
  414. package/dist/types/IKnowledgeGraphBuilder.js.map +1 -0
  415. package/dist/types/IKnowledgeGraphExporter.js +3 -0
  416. package/dist/types/IKnowledgeGraphExporter.js.map +1 -0
  417. package/dist/types/IKnowledgeGraphMerger.js +3 -0
  418. package/dist/types/IKnowledgeGraphMerger.js.map +1 -0
  419. package/dist/types/IKnowledgeGraphSearch.js +3 -0
  420. package/dist/types/IKnowledgeGraphSearch.js.map +1 -0
  421. package/dist/types/ILLMProvider.js +3 -0
  422. package/dist/types/ILLMProvider.js.map +1 -0
  423. package/dist/types/ILLMService.js +3 -0
  424. package/dist/types/ILLMService.js.map +1 -0
  425. package/dist/types/IObjectDetector.js +3 -0
  426. package/dist/types/IObjectDetector.js.map +1 -0
  427. package/dist/types/IProcessingService.js +3 -0
  428. package/dist/types/IProcessingService.js.map +1 -0
  429. package/dist/types/IProgressEmitter.js +3 -0
  430. package/dist/types/IProgressEmitter.js.map +1 -0
  431. package/dist/types/IPromptManager.js +3 -0
  432. package/dist/types/IPromptManager.js.map +1 -0
  433. package/dist/types/KnowledgeGraph.js +3 -0
  434. package/dist/types/KnowledgeGraph.js.map +1 -0
  435. package/dist/types/MCPKnowledgeGraph.js +3 -0
  436. package/dist/types/MCPKnowledgeGraph.js.map +1 -0
  437. package/dist/types/Observation.js +21 -0
  438. package/dist/types/Observation.js.map +1 -0
  439. package/dist/types/ProcessingOptions.js +3 -0
  440. package/dist/types/ProcessingOptions.js.map +1 -0
  441. package/dist/types/index.js +40 -0
  442. package/dist/types/index.js.map +1 -0
  443. package/package.json +122 -0
@@ -0,0 +1,379 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.LABELED_SAMPLES = void 0;
4
+ exports.LABELED_SAMPLES = [
5
+ // ── code ───────────────────────────────────────────────────────────────
6
+ {
7
+ id: "code-1",
8
+ path: "src/services/AuthService.ts",
9
+ expected: "code",
10
+ content: `import { TokenStore } from "./TokenStore";
11
+
12
+ export class AuthService extends BaseService {
13
+ async login(user: string, password: string): Promise<Session> {
14
+ const hash = await this.hasher.hash(password);
15
+ if (!this.store.verify(user, hash)) {
16
+ throw new Error("invalid credentials");
17
+ }
18
+ return this.store.createSession(user);
19
+ }
20
+ }`,
21
+ },
22
+ {
23
+ id: "code-2",
24
+ path: "scripts/clean_data.py",
25
+ expected: "code",
26
+ content: `import os
27
+ import json
28
+
29
+ def clean(records):
30
+ for r in records:
31
+ if r.get("value") is None:
32
+ continue
33
+ yield {"id": r["id"], "value": float(r["value"])}
34
+
35
+ def main():
36
+ with open("data.json") as f:
37
+ records = json.load(f)
38
+ print(list(clean(records)))`,
39
+ },
40
+ // ── financial ──────────────────────────────────────────────────────────
41
+ {
42
+ id: "financial-1",
43
+ path: "reports/q3-earnings.md",
44
+ expected: "financial",
45
+ content: `Q3 revenue rose to $4.2 billion, beating analyst estimates of $3.9 billion.
46
+ EBITDA margin improved to 28%. The board declared a dividend of $0.45 per share.
47
+ NASDAQ: ACME closed up 6% after the earnings call. Full-year guidance was raised.`,
48
+ },
49
+ {
50
+ id: "financial-2",
51
+ path: "finance/portfolio-review.txt",
52
+ expected: "financial",
53
+ content: `Portfolio allocation: 60% equity, 30% bonds, 10% cash. The S&P 500 holdings
54
+ returned $1,250,000 in unrealized gains. P/E ratio of the tech sleeve sits at 24.
55
+ Dividend yield across the portfolio is 2.1%. SEC 10-K filings reviewed for each holding.`,
56
+ },
57
+ // ── medical ────────────────────────────────────────────────────────────
58
+ {
59
+ id: "medical-1",
60
+ path: "records/patient-note.txt",
61
+ expected: "medical",
62
+ content: `Patient presents with hypertension. Prescribed lisinopril 10 mg PO daily and
63
+ metformin 500 mg BID for type 2 diabetes. Blood pressure 148/92 mmHg, heart rate 84 bpm.
64
+ Diagnosis confirmed; follow-up in 4 weeks to reassess medication and symptoms.`,
65
+ },
66
+ {
67
+ id: "medical-2",
68
+ path: "trials/study-protocol.txt",
69
+ expected: "medical",
70
+ content: `This randomized controlled, double-blind, placebo-controlled trial evaluates
71
+ drug efficacy in Phase II. FDA approval is pending. Adverse events were recorded per
72
+ protocol. IRB-approved informed consent obtained from all participants. 200 mg IV dose.`,
73
+ },
74
+ // ── legal ──────────────────────────────────────────────────────────────
75
+ {
76
+ id: "legal-1",
77
+ path: "legal/nda.txt",
78
+ expected: "legal",
79
+ content: `WHEREAS the parties agree to the following terms. Effective Date: 2023-01-01.
80
+ Termination: either party may terminate upon 30 days notice. The receiving party shall
81
+ not disclose Confidential Information. This agreement is governed by the jurisdiction of Delaware.`,
82
+ },
83
+ {
84
+ id: "legal-2",
85
+ path: "contracts/master-services-agreement.md",
86
+ expected: "legal",
87
+ content: `The Contractor shall indemnify the Client for any breach of this Agreement.
88
+ Liability for damages is capped per Section 7.2. Counsel for both parties reviewed the
89
+ provisions. The plaintiff waives any claim arising under this clause.`,
90
+ },
91
+ // ── research ───────────────────────────────────────────────────────────
92
+ {
93
+ id: "research-1",
94
+ path: "papers/sparse-attention.md",
95
+ expected: "research",
96
+ content: `## Abstract
97
+ We propose a sparse-attention method and demonstrate that it reduces memory cost.
98
+ Our experimental results on the PG-19 dataset improve perplexity over the baseline.
99
+ We build on Beltagy et al. (2020). doi:10.1145/1234567. p < 0.01 across 4 seeds.`,
100
+ },
101
+ {
102
+ id: "research-2",
103
+ path: "research/ablation-findings.md",
104
+ expected: "research",
105
+ content: `## Methodology
106
+ We investigated whether dropout improves generalization. The hypothesis was tested on a
107
+ held-out benchmark. Experimental evaluation reports accuracy: 0.91 and f1-score: 0.88.
108
+ The confidence interval excludes the baseline. Smith et al. (2019) reported similar findings.`,
109
+ },
110
+ // ── transcript ─────────────────────────────────────────────────────────
111
+ {
112
+ id: "transcript-1",
113
+ path: "meetings/q4-standup.txt",
114
+ expected: "transcript",
115
+ content: `Meeting Minutes - Q4 Standup
116
+ Attendees: John, Sarah, Mike
117
+ Sarah: we agreed to ship the beta on Friday.
118
+ Mike: I'll follow up with the vendor.
119
+ Action Items:
120
+ - John: finalize the budget by next week`,
121
+ },
122
+ {
123
+ id: "transcript-2",
124
+ path: "transcripts/customer-interview.txt",
125
+ expected: "transcript",
126
+ content: `Interviewer: what's your biggest pain point today?
127
+ Participant: onboarding takes too long.
128
+ Interviewer: and how would you fix it?
129
+ Participant: a guided setup. We decided to prototype that next sprint.
130
+ Speaker 2: agreed, let's schedule a review.`,
131
+ },
132
+ // ── tabular ────────────────────────────────────────────────────────────
133
+ {
134
+ id: "tabular-1",
135
+ path: "data/sales-2023.csv",
136
+ expected: "tabular",
137
+ content: `product,region,units_sold,revenue,margin
138
+ Widget A,EMEA,1500,15000,0.32
139
+ Widget B,APAC,2200,25000,0.28
140
+ Widget C,AMER,900,9000,0.41
141
+ Widget D,EMEA,1750,19500,0.30`,
142
+ },
143
+ {
144
+ id: "tabular-2",
145
+ path: "exports/inventory.tsv",
146
+ expected: "tabular",
147
+ content: `sku\twarehouse\ton_hand\treorder_point\tsupplier
148
+ A-100\tDAL\t420\t100\tAcme
149
+ B-200\tSEA\t85\t120\tGlobex
150
+ C-300\tNYC\t310\t150\tInitech
151
+ D-400\tDAL\t12\t50\tAcme`,
152
+ },
153
+ // ── communication ──────────────────────────────────────────────────────
154
+ {
155
+ id: "communication-1",
156
+ path: "mail/budget-update.eml",
157
+ expected: "communication",
158
+ content: `From: john@acme.com
159
+ To: sarah@acme.com
160
+ Cc: mike@acme.com
161
+ Subject: Q4 budget draft
162
+
163
+ Hi Sarah, attached is the budget draft. Please review the travel section.
164
+ Best regards,
165
+ John`,
166
+ },
167
+ {
168
+ id: "communication-2",
169
+ path: "messages/reply-thread.txt",
170
+ expected: "communication",
171
+ content: `On Mon, Nov 3, Sarah Lee wrote:
172
+ > Can you send the updated numbers?
173
+
174
+ Hi Sarah, sure — forwarded them just now. Thanks for the reminder.
175
+ Regards, John (cc: mike@acme.com)`,
176
+ },
177
+ // ── documentation ──────────────────────────────────────────────────────
178
+ {
179
+ id: "documentation-1",
180
+ path: "README.md",
181
+ expected: "documentation",
182
+ content: `# Wanshi
183
+
184
+ Turns files into knowledge graphs.
185
+
186
+ ## Installation
187
+ \`\`\`bash
188
+ npm install -g wanshi
189
+ \`\`\`
190
+
191
+ ## Getting Started
192
+ To get started, run the CLI. You can configure the output format in config.yaml.`,
193
+ },
194
+ {
195
+ id: "documentation-2",
196
+ path: "docs/api-guide.md",
197
+ expected: "documentation",
198
+ content: `## API Reference
199
+ This guide shows how to use the endpoints. Follow these steps to authenticate.
200
+
201
+ \`\`\`bash
202
+ GET /graph
203
+ \`\`\`
204
+
205
+ You need to set the API key first. See the configuration section for parameters.`,
206
+ },
207
+ // ── technical ──────────────────────────────────────────────────────────
208
+ {
209
+ id: "technical-1",
210
+ path: "logs/app.log",
211
+ expected: "technical",
212
+ content: `2023-11-03T09:12:44.512Z INFO server started on port 8080
213
+ 2023-11-03T09:12:45.001Z WARN cache miss for key user:42
214
+ 2023-11-03T09:12:46.220Z ERROR connection timeout to database after 5000ms
215
+ 2023-11-03T09:12:47.330Z INFO retrying connection (attempt 2)`,
216
+ },
217
+ {
218
+ id: "technical-2",
219
+ path: "config/server.yaml",
220
+ expected: "technical",
221
+ content: `host: 0.0.0.0
222
+ port: 8080
223
+ timeout: 30
224
+ database:
225
+ url: postgres://localhost/app
226
+ pool: 10
227
+ cache:
228
+ ttl: 300`,
229
+ },
230
+ // ── narrative ──────────────────────────────────────────────────────────
231
+ {
232
+ id: "narrative-1",
233
+ path: "articles/ai-and-society.md",
234
+ expected: "narrative",
235
+ content: `The future of work is being reshaped by automation. However, the picture is more
236
+ nuanced than the headlines suggest. Furthermore, many roles will evolve rather than vanish.
237
+ According to recent analysis, the transition will be gradual. The story explores both sides.`,
238
+ },
239
+ {
240
+ id: "narrative-2",
241
+ path: "posts/remote-work-essay.txt",
242
+ expected: "narrative",
243
+ content: `This essay examines how remote work changed team culture. It discusses the trade-offs
244
+ between flexibility and connection. Therefore, companies must rethink their norms.
245
+ Meanwhile, employees report higher satisfaction. The article argues for a hybrid approach.`,
246
+ },
247
+ // ── reference ──────────────────────────────────────────────────────────
248
+ {
249
+ id: "reference-1",
250
+ path: "reference/glossary.md",
251
+ expected: "reference",
252
+ content: `Entity: a uniquely named node in the knowledge graph.
253
+ Observation: a provenance-stamped fact attached to an entity.
254
+ Relation: a typed directed edge between two entities.
255
+ See also: cross-reference the schema definition for each term.`,
256
+ },
257
+ {
258
+ id: "reference-2",
259
+ path: "specs/abbreviations.md",
260
+ expected: "reference",
261
+ content: `API: Application Programming Interface.
262
+ DOI: Digital Object Identifier.
263
+ KG: Knowledge Graph.
264
+ This catalog lists common acronyms alphabetically. Each entry is indexed by its symbol.`,
265
+ },
266
+ // ── hard / ambiguous (neutral paths, cross-cutting signals) ─────────────
267
+ // These are the cases that matter for S2/S3: confusable content and paths
268
+ // that carry no telltale extension, so the classifier must decide on content
269
+ // alone. The gold label is the most defensible single class, but the current
270
+ // heuristic is expected to confuse several of these — that is the point.
271
+ {
272
+ id: "hard-research-prose",
273
+ path: "notes/sparse-idea.md",
274
+ expected: "research",
275
+ content: `We hypothesize that sparse attention preserves accuracy while cutting cost.
276
+ Our analysis on a held-out benchmark suggests the approach generalizes. The methodology
277
+ extends prior work, and the experimental results beat the baseline on most metrics.`,
278
+ },
279
+ {
280
+ id: "hard-narrative-sciencey",
281
+ path: "articles/science-column.md",
282
+ expected: "narrative",
283
+ content: `The promise of fusion energy has captivated scientists for decades. According to
284
+ researchers, recent breakthroughs bring it closer. However, skeptics argue the timeline
285
+ remains uncertain. This article explores what the latest experiments mean for the public.`,
286
+ },
287
+ {
288
+ id: "hard-doc-plain",
289
+ path: "docs/overview.txt",
290
+ expected: "documentation",
291
+ content: `This page explains how to set up the tool. You can install it with the package
292
+ manager, then run the command. To get started, follow these steps and configure the
293
+ options. See the usage section for more, and refer to the tutorial for a walkthrough.`,
294
+ },
295
+ {
296
+ id: "hard-reference-defs",
297
+ path: "notes/terms.txt",
298
+ expected: "reference",
299
+ content: `Idempotent: an operation that yields the same result when applied repeatedly.
300
+ Latency: the time between a request and its response.
301
+ Throughput: the number of operations completed per unit time.
302
+ See also: the performance glossary for related definitions.`,
303
+ },
304
+ {
305
+ id: "hard-technical-plain",
306
+ path: "infra/setup.txt",
307
+ expected: "technical",
308
+ content: `host: api.internal
309
+ port: 9090
310
+ timeout: 60
311
+ The service connects to the database and the cache. On startup it loads the configuration
312
+ and binds to the port. Restart the daemon after changing any setting.`,
313
+ },
314
+ {
315
+ id: "hard-code-in-md",
316
+ path: "notes/snippet.md",
317
+ expected: "code",
318
+ content: `Here's the helper I wrote:
319
+
320
+ \`\`\`js
321
+ function debounce(fn, ms) {
322
+ let t;
323
+ return (...args) => { clearTimeout(t); t = setTimeout(() => fn(...args), ms); };
324
+ }
325
+ \`\`\``,
326
+ },
327
+ {
328
+ id: "hard-communication-chat",
329
+ path: "chat/dm.md",
330
+ expected: "communication",
331
+ content: `hey, did you get a chance to look at the proposal?
332
+ yeah, sent you my notes a minute ago. let me know if the budget section makes sense.
333
+ thanks! I'll reply after the call. cc'ing Dana so she's in the loop.`,
334
+ },
335
+ {
336
+ id: "hard-transcript-plain",
337
+ path: "audio/recording.txt",
338
+ expected: "transcript",
339
+ content: `Anna: so where did we land on the launch date?
340
+ Ben: I think the 15th is realistic if QA signs off.
341
+ Anna: okay, let's plan for that and revisit Thursday.
342
+ Ben: works for me. I'll update the schedule.`,
343
+ },
344
+ {
345
+ id: "hard-medical-prose",
346
+ path: "health/summary.txt",
347
+ expected: "medical",
348
+ content: `The patient has a chronic condition managed with ongoing treatment. Symptoms include
349
+ fatigue and elevated blood pressure. The care team adjusted the therapeutic plan and will
350
+ monitor the diagnosis. A follow-up visit is scheduled to review the prescription.`,
351
+ },
352
+ {
353
+ id: "hard-financial-prose",
354
+ path: "memos/quarter.txt",
355
+ expected: "financial",
356
+ content: `Revenue grew this quarter and earnings exceeded our internal forecast. The investment
357
+ in the new line is paying off, and the board expects the dividend to hold. Analysts remain
358
+ bullish on the stock despite a softer outlook for the sector.`,
359
+ },
360
+ {
361
+ id: "hard-legal-light",
362
+ path: "policy/terms.txt",
363
+ expected: "legal",
364
+ content: `By using this service you agree to these terms. The provider may terminate access for
365
+ breach of the conditions. Each party retains liability only as set out in this agreement.
366
+ Disputes are subject to the courts of the stated jurisdiction.`,
367
+ },
368
+ {
369
+ id: "hard-tabular-markdown",
370
+ path: "reports/summary.md",
371
+ expected: "tabular",
372
+ content: `| product | region | units | revenue |
373
+ | ------- | ------ | ----- | ------- |
374
+ | Widget A | EMEA | 1500 | 15000 |
375
+ | Widget B | APAC | 2200 | 25000 |
376
+ | Widget C | AMER | 900 | 9000 |`,
377
+ },
378
+ ];
379
+ //# sourceMappingURL=labeledSamples.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"labeledSamples.js","sourceRoot":"","sources":["../../../src/evaluation/classifier/labeledSamples.ts"],"names":[],"mappings":";;;AAoBa,QAAA,eAAe,GAAoB;IAC9C,0EAA0E;IAC1E;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,6BAA6B;QACnC,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE;;;;;;;;;;EAUX;KACC;IACD;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,uBAAuB;QAC7B,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE;;;;;;;;;;;;gCAYmB;KAC7B;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,wBAAwB;QAC9B,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;kFAEqE;KAC/E;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,8BAA8B;QACpC,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;yFAE4E;KACtF;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,0BAA0B;QAChC,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;+EAEkE;KAC5E;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,2BAA2B;QACjC,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;wFAE2E;KACrF;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,eAAe;QACrB,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE;;mGAEsF;KAChG;IACD;QACE,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,wCAAwC;QAC9C,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE;;sEAEyD;KACnE;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,4BAA4B;QAClC,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE;;;iFAGoE;KAC9E;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,+BAA+B;QACrC,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE;;;8FAGiF;KAC3F;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,yBAAyB;QAC/B,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE;;;;;yCAK4B;KACtC;IACD;QACE,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,oCAAoC;QAC1C,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE;;;;4CAI+B;KACzC;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,qBAAqB;QAC3B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;;;8BAIiB;KAC3B;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,uBAAuB;QAC7B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;;;yBAIY;KACtB;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,wBAAwB;QAC9B,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;;;;;;KAOR;KACF;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,2BAA2B;QACjC,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;;;kCAIqB;KAC/B;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,WAAW;QACjB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;;;;;;;;;iFAUoE;KAC9E;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,mBAAmB;QACzB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;;;;;;iFAOoE;KAC9E;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,cAAc;QACpB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;+DAGkD;KAC5D;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;;;;;WAOF;KACR;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,4BAA4B;QAClC,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;6FAEgF;KAC1F;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,6BAA6B;QACnC,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;2FAE8E;KACxF;IAED,0EAA0E;IAC1E;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,uBAAuB;QAC7B,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;+DAGkD;KAC5D;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,wBAAwB;QAC9B,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;wFAG2E;KACrF;IAED,2EAA2E;IAC3E,0EAA0E;IAC1E,6EAA6E;IAC7E,6EAA6E;IAC7E,yEAAyE;IACzE;QACE,EAAE,EAAE,qBAAqB;QACzB,IAAI,EAAE,sBAAsB;QAC5B,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE;;oFAEuE;KACjF;IACD;QACE,EAAE,EAAE,yBAAyB;QAC7B,IAAI,EAAE,4BAA4B;QAClC,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;0FAE6E;KACvF;IACD;QACE,EAAE,EAAE,gBAAgB;QACpB,IAAI,EAAE,mBAAmB;QACzB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;sFAEyE;KACnF;IACD;QACE,EAAE,EAAE,qBAAqB;QACzB,IAAI,EAAE,iBAAiB;QACvB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;4DAG+C;KACzD;IACD;QACE,EAAE,EAAE,sBAAsB;QAC1B,IAAI,EAAE,iBAAiB;QACvB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;;;sEAIyD;KACnE;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,kBAAkB;QACxB,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE;;;;;;;OAON;KACJ;IACD;QACE,EAAE,EAAE,yBAAyB;QAC7B,IAAI,EAAE,YAAY;QAClB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE;;qEAEwD;KAClE;IACD;QACE,EAAE,EAAE,uBAAuB;QAC3B,IAAI,EAAE,qBAAqB;QAC3B,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE;;;6CAGgC;KAC1C;IACD;QACE,EAAE,EAAE,oBAAoB;QACxB,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;kFAEqE;KAC/E;IACD;QACE,EAAE,EAAE,sBAAsB;QAC1B,IAAI,EAAE,mBAAmB;QACzB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE;;8DAEiD;KAC3D;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,kBAAkB;QACxB,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE;;+DAEkD;KAC5D;IACD;QACE,EAAE,EAAE,uBAAuB;QAC3B,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE;;;;iCAIoB;KAC9B;CACF,CAAC"}
@@ -0,0 +1,126 @@
1
+ "use strict";
2
+ // Shared scoring core for the gold-labeled two-way comparisons (wanshi vs KGGen).
3
+ //
4
+ // Dataset-agnostic: CrossRE (sentence-level, 6 domains), SemEval (sentence-level,
5
+ // no domains), Re-DocRED (document-level, Wikidata schema + Ign-F1). The CLI
6
+ // (scripts/gold-compare.ts) handles loading/extraction/caching; this module owns
7
+ // the metric math so there is ONE source of truth across datasets.
8
+ //
9
+ // HEADLINE = node entity-capture (semantic): every graph node → a self-triplet →
10
+ // matched against gold entities. Fair across open-predicate tools whose free
11
+ // predicates won't string/embed-match a dataset's abstract gold predicate vocab.
12
+ // Triplet-level entity/relation/triple F1 are also reported (understate uniformly).
13
+ //
14
+ // Optional per-call extras:
15
+ // - domainById → per-domain node-capture F1 (CrossRE).
16
+ // - ignoreKeys → Re-DocRED Ign-F1: drop train-seen (subj|pred|obj) triples from
17
+ // BOTH gold and predictions before the triplet-level metrics, so a
18
+ // tool gets no credit for memorized training facts.
19
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
20
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
21
+ return new (P || (P = Promise))(function (resolve, reject) {
22
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
23
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
24
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
25
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
26
+ });
27
+ };
28
+ Object.defineProperty(exports, "__esModule", { value: true });
29
+ exports.tripleKey = tripleKey;
30
+ exports.scoreGraph = scoreGraph;
31
+ exports.loadJsonl = loadJsonl;
32
+ exports.appendJsonl = appendJsonl;
33
+ const TripleMetrics_1 = require("../metrics/TripleMetrics");
34
+ const compareScoring_1 = require("../crossre/compareScoring");
35
+ const addTally = (a, b) => {
36
+ a.tp += b.tp;
37
+ a.fp += b.fp;
38
+ a.fn += b.fn;
39
+ };
40
+ /** Normalized (subject|predicate|object) key for train-seen-triple exclusion (Ign-F1). */
41
+ function tripleKey(t) {
42
+ return `${t.subject.trim().toLowerCase()}|${t.predicate.trim().toLowerCase()}|${t.object.trim().toLowerCase()}`;
43
+ }
44
+ function scoreGraph(ids, graphById, goldById, exact, semantic, opts) {
45
+ return __awaiter(this, void 0, void 0, function* () {
46
+ var _a, _b, _c;
47
+ const domainById = opts === null || opts === void 0 ? void 0 : opts.domainById;
48
+ const ignoreKeys = opts === null || opts === void 0 ? void 0 : opts.ignoreKeys;
49
+ const exactTrip = [];
50
+ const semTrip = [];
51
+ const ignExactTrip = [];
52
+ const ignSemTrip = [];
53
+ const nodeSem = { tp: 0, fp: 0, fn: 0 }; // micro-averaged across samples
54
+ const nodeExact = { tp: 0, fp: 0, fn: 0 };
55
+ const nodeByDomain = new Map();
56
+ let triples = 0, ents = 0;
57
+ for (const id of ids) {
58
+ const kg = (_a = graphById.get(id)) !== null && _a !== void 0 ? _a : { entities: [], relations: [] };
59
+ const gold = (_b = goldById.get(id)) !== null && _b !== void 0 ? _b : [];
60
+ const trip = (0, compareScoring_1.kgToTriplets)(kg);
61
+ const nodes = (0, compareScoring_1.nodeTriplets)(kg);
62
+ triples += trip.length;
63
+ ents += kg.entities.length;
64
+ exactTrip.push((0, TripleMetrics_1.computeExactMetrics)(trip, gold, exact));
65
+ semTrip.push(yield (0, TripleMetrics_1.computeSemanticMetrics)(trip, gold, semantic));
66
+ // Ign-F1: drop train-seen triples from both sides before scoring.
67
+ if (ignoreKeys) {
68
+ const tripF = trip.filter((t) => !ignoreKeys.has(tripleKey(t)));
69
+ const goldF = gold.filter((t) => !ignoreKeys.has(tripleKey(t)));
70
+ ignExactTrip.push((0, TripleMetrics_1.computeExactMetrics)(tripF, goldF, exact));
71
+ ignSemTrip.push(yield (0, TripleMetrics_1.computeSemanticMetrics)(tripF, goldF, semantic));
72
+ }
73
+ // Node entity-capture: match the full node set against gold entities.
74
+ const ns = yield semantic.matchEntities(nodes, gold);
75
+ addTally(nodeSem, ns);
76
+ addTally(nodeExact, exact.matchEntities(nodes, gold));
77
+ if (domainById) {
78
+ const d = (_c = domainById.get(id)) !== null && _c !== void 0 ? _c : 'unknown';
79
+ if (!nodeByDomain.has(d))
80
+ nodeByDomain.set(d, { tp: 0, fp: 0, fn: 0 });
81
+ addTally(nodeByDomain.get(d), ns);
82
+ }
83
+ }
84
+ let perDomainNode;
85
+ if (domainById) {
86
+ perDomainNode = new Map();
87
+ for (const [d, t] of nodeByDomain)
88
+ perDomainNode.set(d, (0, TripleMetrics_1.computeMetrics)(t.tp, t.fp, t.fn));
89
+ }
90
+ return {
91
+ nodeEntitySem: (0, TripleMetrics_1.computeMetrics)(nodeSem.tp, nodeSem.fp, nodeSem.fn),
92
+ nodeEntityExact: (0, TripleMetrics_1.computeMetrics)(nodeExact.tp, nodeExact.fp, nodeExact.fn),
93
+ tripletSem: (0, TripleMetrics_1.microAverage)(semTrip),
94
+ tripletExact: (0, TripleMetrics_1.microAverage)(exactTrip),
95
+ perDomainNode,
96
+ ignTripletSem: ignoreKeys ? (0, TripleMetrics_1.microAverage)(ignSemTrip) : undefined,
97
+ ignTripletExact: ignoreKeys ? (0, TripleMetrics_1.microAverage)(ignExactTrip) : undefined,
98
+ triplesPer: ids.length ? triples / ids.length : 0,
99
+ entsPer: ids.length ? ents / ids.length : 0,
100
+ };
101
+ });
102
+ }
103
+ // ─── JSONL cache (append + load + truncation-tolerant; the CheckpointService idiom) ──
104
+ function loadJsonl(file, fs) {
105
+ const map = new Map();
106
+ if (!fs.existsSync(file))
107
+ return map;
108
+ for (const line of fs.readFileSync(file, 'utf-8').split('\n')) {
109
+ const t = line.trim();
110
+ if (!t)
111
+ continue;
112
+ try {
113
+ const rec = JSON.parse(t);
114
+ if (rec && rec.id !== undefined)
115
+ map.set(rec.id, rec);
116
+ }
117
+ catch (_a) {
118
+ /* tolerate a truncated final line from an interrupted write */
119
+ }
120
+ }
121
+ return map;
122
+ }
123
+ function appendJsonl(file, rec, fs) {
124
+ fs.appendFileSync(file, JSON.stringify(rec) + '\n', 'utf-8');
125
+ }
126
+ //# sourceMappingURL=goldCompare.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"goldCompare.js","sourceRoot":"","sources":["../../../src/evaluation/compare/goldCompare.ts"],"names":[],"mappings":";AAAA,kFAAkF;AAClF,EAAE;AACF,kFAAkF;AAClF,6EAA6E;AAC7E,iFAAiF;AACjF,mEAAmE;AACnE,EAAE;AACF,iFAAiF;AACjF,6EAA6E;AAC7E,iFAAiF;AACjF,oFAAoF;AACpF,EAAE;AACF,4BAA4B;AAC5B,0DAA0D;AAC1D,mFAAmF;AACnF,qFAAqF;AACrF,sEAAsE;;;;;;;;;;;AA+BtE,8BAEC;AAED,gCAoEC;AAGD,8BAcC;AAED,kCAEC;AAvHD,4DAAqH;AAErH,8DAAuE;AAmBvE,MAAM,QAAQ,GAAG,CAAC,CAAQ,EAAE,CAAyC,EAAE,EAAE;IACvE,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC;IAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC;IAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC;AAC3C,CAAC,CAAC;AAEF,0FAA0F;AAC1F,SAAgB,SAAS,CAAC,CAAU;IAClC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC;AAClH,CAAC;AAED,SAAsB,UAAU,CAC9B,GAAa,EACb,SAAsC,EACtC,QAAgC,EAChC,KAAmB,EACnB,QAAyB,EACzB,IAAqE;;;QAErE,MAAM,UAAU,GAAG,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,UAAU,CAAC;QACpC,MAAM,UAAU,GAAG,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,UAAU,CAAC;QAEpC,MAAM,SAAS,GAAmB,EAAE,CAAC;QACrC,MAAM,OAAO,GAAmB,EAAE,CAAC;QACnC,MAAM,YAAY,GAAmB,EAAE,CAAC;QACxC,MAAM,UAAU,GAAmB,EAAE,CAAC;QACtC,MAAM,OAAO,GAAU,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAG,gCAAgC;QAClF,MAAM,SAAS,GAAU,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC;QACjD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAiB,CAAC;QAC9C,IAAI,OAAO,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,CAAC;QAE1B,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;YACrB,MAAM,EAAE,GAAG,MAAA,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,mCAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;YAChE,MAAM,IAAI,GAAG,MAAA,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,mCAAI,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,IAAA,6BAAY,EAAC,EAAE,CAAC,CAAC;YAC9B,MAAM,KAAK,GAAG,IAAA,6BAAY,EAAC,EAAE,CAAC,CAAC;YAC/B,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC;YACvB,IAAI,IAAI,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YAE3B,SAAS,CAAC,IAAI,CAAC,IAAA,mCAAmB,EAAC,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;YACvD,OAAO,CAAC,IAAI,CAAC,MAAM,IAAA,sCAAsB,EAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC;YAEjE,kEAAkE;YAClE,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAChE,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAChE,YAAY,CAAC,IAAI,CAAC,IAAA,mCAAmB,EAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC;gBAC5D,UAAU,CAAC,IAAI,CAAC,MAAM,IAAA,sCAAsB,EAAC,KAAK,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC;YACxE,CAAC;YAED,sEAAsE;YACtE,MAAM,EAAE,GAAG,MAAM,QAAQ,CAAC,aAAa,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;YACrD,QAAQ,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;YACtB,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,aAAa,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;YAEtD,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,MAAA,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,mCAAI,SAAS,CAAC;gBAC1C,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;oBAAE,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;gBACvE,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAED,IAAI,aAAmD,CAAC;QACxD,IAAI,UAAU,EAAE,CAAC;YACf,aAAa,GAAG,IAAI,GAAG,EAAE,CAAC;YAC1B,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,YAAY;gBAAE,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,IAAA,8BAAc,EAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5F,CAAC;QAED,OAAO;YACL,aAAa,EAAE,IAAA,8BAAc,EAAC,OAAO,CAAC,EAAE,EAAE,OAAO,CAAC,EAAE,EAAE,OAAO,CAAC,EAAE,CAAC;YACjE,eAAe,EAAE,IAAA,8BAAc,EAAC,SAAS,CAAC,EAAE,EAAE,SAAS,CAAC,EAAE,EAAE,SAAS,CAAC,EAAE,CAAC;YACzE,UAAU,EAAE,IAAA,4BAAY,EAAC,OAAO,CAAC;YACjC,YAAY,EAAE,IAAA,4BAAY,EAAC,SAAS,CAAC;YACrC,aAAa;YACb,aAAa,EAAE,UAAU,CAAC,CAAC,CAAC,IAAA,4BAAY,EAAC,UAAU,CAAC,CAAC,CAAC,CAAC,SAAS;YAChE,eAAe,EAAE,UAAU,CAAC,CAAC,CAAC,IAAA,4BAAY,EAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS;YACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YACjD,OAAO,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;SAC5C,CAAC;IACJ,CAAC;CAAA;AAED,wFAAwF;AACxF,SAAgB,SAAS,CAAU,IAAY,EAAE,EAAuB;IACtE,MAAM,GAAG,GAAG,IAAI,GAAG,EAAa,CAAC;IACjC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,GAAG,CAAC;IACrC,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9D,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC;YAAE,SAAS;QACjB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,GAAG,IAAI,GAAG,CAAC,EAAE,KAAK,SAAS;gBAAE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;QACxD,CAAC;QAAC,WAAM,CAAC;YACP,+DAA+D;QACjE,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAgB,WAAW,CAAC,IAAY,EAAE,GAAY,EAAE,EAAuB;IAC7E,EAAE,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAC/D,CAAC"}
@@ -0,0 +1,30 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.kgToTriplets = kgToTriplets;
4
+ exports.nodeTriplets = nodeTriplets;
5
+ /**
6
+ * Flatten a KnowledgeGraph's relations into triplets — one per relationType label.
7
+ * An empty/blank relationType falls back to `related to`, mirroring
8
+ * `MineDataset.toGraph` (which gives KGGen's empty edges the same label). Without
9
+ * this, wanshi relations that came back with `relationType: []` would silently
10
+ * vanish from the triplet metrics while KGGen's empty edges survive — an asymmetry
11
+ * that would understate wanshi on the relation/triple levels.
12
+ */
13
+ function kgToTriplets(kg) {
14
+ return kg.relations.flatMap((r) => {
15
+ const labels = r.relationType.length ? r.relationType : ['related to'];
16
+ return labels.map((rel) => ({ subject: r.from, predicate: rel || 'related to', object: r.to }));
17
+ });
18
+ }
19
+ /**
20
+ * Represent each graph NODE as a self-triplet so the existing
21
+ * Exact/SemanticMatcher.matchEntities (which reads subjects ∪ objects) measures
22
+ * entity-capture over the full node set — NOT just relation endpoints. This is the
23
+ * fair cross-tool headline: "did the tool recover the gold entities at all",
24
+ * independent of whether it also drew an edge between them (wanshi is edge-sparse,
25
+ * so scoring entities only via relation endpoints would understate it).
26
+ */
27
+ function nodeTriplets(kg) {
28
+ return kg.entities.map((e) => ({ subject: e.name, predicate: '', object: e.name }));
29
+ }
30
+ //# sourceMappingURL=compareScoring.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"compareScoring.js","sourceRoot":"","sources":["../../../src/evaluation/crossre/compareScoring.ts"],"names":[],"mappings":";;AAWA,oCAKC;AAUD,oCAEC;AAzBD;;;;;;;GAOG;AACH,SAAgB,YAAY,CAAC,EAAkB;IAC7C,OAAO,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;QAChC,MAAM,MAAM,GAAG,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;QACvE,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,IAAI,YAAY,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAClG,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;GAOG;AACH,SAAgB,YAAY,CAAC,EAAkB;IAC7C,OAAO,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;AACtF,CAAC"}