@wanshi-kg/wanshi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +458 -0
  3. package/dist/__tests__/helpers.js +27 -0
  4. package/dist/__tests__/helpers.js.map +1 -0
  5. package/dist/cli/commands/export.command.js +99 -0
  6. package/dist/cli/commands/export.command.js.map +1 -0
  7. package/dist/cli/commands/index.js +22 -0
  8. package/dist/cli/commands/index.js.map +1 -0
  9. package/dist/cli/commands/inspectMerges.command.js +84 -0
  10. package/dist/cli/commands/inspectMerges.command.js.map +1 -0
  11. package/dist/cli/commands/metrics.command.js +196 -0
  12. package/dist/cli/commands/metrics.command.js.map +1 -0
  13. package/dist/cli/commands/process.command.js +82 -0
  14. package/dist/cli/commands/process.command.js.map +1 -0
  15. package/dist/cli/commands/watch.command.js +91 -0
  16. package/dist/cli/commands/watch.command.js.map +1 -0
  17. package/dist/cli/index.js +269 -0
  18. package/dist/cli/index.js.map +1 -0
  19. package/dist/cli/optionsToConfig.js +160 -0
  20. package/dist/cli/optionsToConfig.js.map +1 -0
  21. package/dist/config/index.js +59 -0
  22. package/dist/config/index.js.map +1 -0
  23. package/dist/config/legacyHints.js +113 -0
  24. package/dist/config/legacyHints.js.map +1 -0
  25. package/dist/config/schema.js +803 -0
  26. package/dist/config/schema.js.map +1 -0
  27. package/dist/config/ui.js +221 -0
  28. package/dist/config/ui.js.map +1 -0
  29. package/dist/core/DirectoryProcessor.js +725 -0
  30. package/dist/core/DirectoryProcessor.js.map +1 -0
  31. package/dist/core/adapters/IStructuredAdapter.js +3 -0
  32. package/dist/core/adapters/IStructuredAdapter.js.map +1 -0
  33. package/dist/core/adapters/SqliteAdapter.js +267 -0
  34. package/dist/core/adapters/SqliteAdapter.js.map +1 -0
  35. package/dist/core/adapters/StructuredAdapterRegistry.js +31 -0
  36. package/dist/core/adapters/StructuredAdapterRegistry.js.map +1 -0
  37. package/dist/core/adapters/index.js +20 -0
  38. package/dist/core/adapters/index.js.map +1 -0
  39. package/dist/core/checkpoint/CheckpointService.js +188 -0
  40. package/dist/core/checkpoint/CheckpointService.js.map +1 -0
  41. package/dist/core/checkpoint/index.js +18 -0
  42. package/dist/core/checkpoint/index.js.map +1 -0
  43. package/dist/core/corpus/CorpusAnalyzer.js +266 -0
  44. package/dist/core/corpus/CorpusAnalyzer.js.map +1 -0
  45. package/dist/core/corpus/CorpusProfileStore.js +92 -0
  46. package/dist/core/corpus/CorpusProfileStore.js.map +1 -0
  47. package/dist/core/corpus/index.js +21 -0
  48. package/dist/core/corpus/index.js.map +1 -0
  49. package/dist/core/corpus/normalizeGlossary.js +60 -0
  50. package/dist/core/corpus/normalizeGlossary.js.map +1 -0
  51. package/dist/core/corpus/relPath.js +52 -0
  52. package/dist/core/corpus/relPath.js.map +1 -0
  53. package/dist/core/corpus/termFrequency.js +86 -0
  54. package/dist/core/corpus/termFrequency.js.map +1 -0
  55. package/dist/core/cost/CostMeter.js +235 -0
  56. package/dist/core/cost/CostMeter.js.map +1 -0
  57. package/dist/core/cost/index.js +19 -0
  58. package/dist/core/cost/index.js.map +1 -0
  59. package/dist/core/cost/prices.js +38 -0
  60. package/dist/core/cost/prices.js.map +1 -0
  61. package/dist/core/cv/ObjectDetectionService.js +119 -0
  62. package/dist/core/cv/ObjectDetectionService.js.map +1 -0
  63. package/dist/core/di/ContainerFactory.js +670 -0
  64. package/dist/core/di/ContainerFactory.js.map +1 -0
  65. package/dist/core/di/DIContainer.js +103 -0
  66. package/dist/core/di/DIContainer.js.map +1 -0
  67. package/dist/core/di/index.js +19 -0
  68. package/dist/core/di/index.js.map +1 -0
  69. package/dist/core/errors/CustomErrors.js +342 -0
  70. package/dist/core/errors/CustomErrors.js.map +1 -0
  71. package/dist/core/errors/index.js +18 -0
  72. package/dist/core/errors/index.js.map +1 -0
  73. package/dist/core/export/KnowledgeGraphExportService.js +56 -0
  74. package/dist/core/export/KnowledgeGraphExportService.js.map +1 -0
  75. package/dist/core/export/index.js +19 -0
  76. package/dist/core/export/index.js.map +1 -0
  77. package/dist/core/export/strategies/GraphitiExportStrategy.js +115 -0
  78. package/dist/core/export/strategies/GraphitiExportStrategy.js.map +1 -0
  79. package/dist/core/export/strategies/GraphvizDotExportStrategy.js +331 -0
  80. package/dist/core/export/strategies/GraphvizDotExportStrategy.js.map +1 -0
  81. package/dist/core/export/strategies/IExportStrategy.js +3 -0
  82. package/dist/core/export/strategies/IExportStrategy.js.map +1 -0
  83. package/dist/core/export/strategies/JsonExportStrategy.js +19 -0
  84. package/dist/core/export/strategies/JsonExportStrategy.js.map +1 -0
  85. package/dist/core/export/strategies/JsonlExportStrategy.js +69 -0
  86. package/dist/core/export/strategies/JsonlExportStrategy.js.map +1 -0
  87. package/dist/core/export/strategies/KblamExportStrategy.js +36 -0
  88. package/dist/core/export/strategies/KblamExportStrategy.js.map +1 -0
  89. package/dist/core/export/strategies/LoraExportStrategy.js +46 -0
  90. package/dist/core/export/strategies/LoraExportStrategy.js.map +1 -0
  91. package/dist/core/export/strategies/McpExportStrategy.js +67 -0
  92. package/dist/core/export/strategies/McpExportStrategy.js.map +1 -0
  93. package/dist/core/export/strategies/index.js +25 -0
  94. package/dist/core/export/strategies/index.js.map +1 -0
  95. package/dist/core/export/strategies/kbTriples.js +60 -0
  96. package/dist/core/export/strategies/kbTriples.js.map +1 -0
  97. package/dist/core/index.js +22 -0
  98. package/dist/core/index.js.map +1 -0
  99. package/dist/core/knowledge/KnowledgeGraphBuilder.js +627 -0
  100. package/dist/core/knowledge/KnowledgeGraphBuilder.js.map +1 -0
  101. package/dist/core/knowledge/MergeRecord.js +3 -0
  102. package/dist/core/knowledge/MergeRecord.js.map +1 -0
  103. package/dist/core/knowledge/canon/Canonicalizer.js +414 -0
  104. package/dist/core/knowledge/canon/Canonicalizer.js.map +1 -0
  105. package/dist/core/knowledge/canon/index.js +18 -0
  106. package/dist/core/knowledge/canon/index.js.map +1 -0
  107. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js +92 -0
  108. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js.map +1 -0
  109. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js +52 -0
  110. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js.map +1 -0
  111. package/dist/core/knowledge/contradiction/index.js +19 -0
  112. package/dist/core/knowledge/contradiction/index.js.map +1 -0
  113. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js +33 -0
  114. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js.map +1 -0
  115. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js +82 -0
  116. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js.map +1 -0
  117. package/dist/core/knowledge/grounding/index.js +20 -0
  118. package/dist/core/knowledge/grounding/index.js.map +1 -0
  119. package/dist/core/knowledge/grounding/verbalize.js +38 -0
  120. package/dist/core/knowledge/grounding/verbalize.js.map +1 -0
  121. package/dist/core/knowledge/images/imageMetaGraph.js +136 -0
  122. package/dist/core/knowledge/images/imageMetaGraph.js.map +1 -0
  123. package/dist/core/knowledge/index.js +20 -0
  124. package/dist/core/knowledge/index.js.map +1 -0
  125. package/dist/core/knowledge/merging/KnowledgeMerger.js +624 -0
  126. package/dist/core/knowledge/merging/KnowledgeMerger.js.map +1 -0
  127. package/dist/core/knowledge/references/ReferenceResolver.js +184 -0
  128. package/dist/core/knowledge/references/ReferenceResolver.js.map +1 -0
  129. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js +401 -0
  130. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js.map +1 -0
  131. package/dist/core/knowledge/references/citations/CitationResolver.js +95 -0
  132. package/dist/core/knowledge/references/citations/CitationResolver.js.map +1 -0
  133. package/dist/core/knowledge/references/citations/GrobidClient.js +143 -0
  134. package/dist/core/knowledge/references/citations/GrobidClient.js.map +1 -0
  135. package/dist/core/knowledge/references/citations/TitleIdResolver.js +101 -0
  136. package/dist/core/knowledge/references/citations/TitleIdResolver.js.map +1 -0
  137. package/dist/core/knowledge/references/web/FetchCacheService.js +114 -0
  138. package/dist/core/knowledge/references/web/FetchCacheService.js.map +1 -0
  139. package/dist/core/knowledge/references/web/GatedFetcher.js +228 -0
  140. package/dist/core/knowledge/references/web/GatedFetcher.js.map +1 -0
  141. package/dist/core/knowledge/references/web/WebReferenceProcessor.js +164 -0
  142. package/dist/core/knowledge/references/web/WebReferenceProcessor.js.map +1 -0
  143. package/dist/core/knowledge/search/KnowledgeGraphSearch.js +261 -0
  144. package/dist/core/knowledge/search/KnowledgeGraphSearch.js.map +1 -0
  145. package/dist/core/knowledge/vocabulary.js +162 -0
  146. package/dist/core/knowledge/vocabulary.js.map +1 -0
  147. package/dist/core/llm/EmbeddingService.js +113 -0
  148. package/dist/core/llm/EmbeddingService.js.map +1 -0
  149. package/dist/core/llm/OllamaService.js +146 -0
  150. package/dist/core/llm/OllamaService.js.map +1 -0
  151. package/dist/core/llm/OpenAICompatibleService.js +190 -0
  152. package/dist/core/llm/OpenAICompatibleService.js.map +1 -0
  153. package/dist/core/llm/OpenAIEmbeddingService.js +129 -0
  154. package/dist/core/llm/OpenAIEmbeddingService.js.map +1 -0
  155. package/dist/core/llm/embeddingUtils.js +25 -0
  156. package/dist/core/llm/embeddingUtils.js.map +1 -0
  157. package/dist/core/llm/index.js +23 -0
  158. package/dist/core/llm/index.js.map +1 -0
  159. package/dist/core/llm/prompts/PromptManager.js +388 -0
  160. package/dist/core/llm/prompts/PromptManager.js.map +1 -0
  161. package/dist/core/llm/prompts/PromptTemplateEngine.js +257 -0
  162. package/dist/core/llm/prompts/PromptTemplateEngine.js.map +1 -0
  163. package/dist/core/llm/prompts/templates/partials/examples/EXAMPLE_STYLE_GUIDE.md +84 -0
  164. package/dist/core/llm/prompts/templates/partials/examples/article.md +187 -0
  165. package/dist/core/llm/prompts/templates/partials/examples/code.md +229 -0
  166. package/dist/core/llm/prompts/templates/partials/examples/communication.md +205 -0
  167. package/dist/core/llm/prompts/templates/partials/examples/documentation.md +262 -0
  168. package/dist/core/llm/prompts/templates/partials/examples/financial.md +157 -0
  169. package/dist/core/llm/prompts/templates/partials/examples/legal.md +153 -0
  170. package/dist/core/llm/prompts/templates/partials/examples/logs.md +127 -0
  171. package/dist/core/llm/prompts/templates/partials/examples/medical.md +218 -0
  172. package/dist/core/llm/prompts/templates/partials/examples/notes.md +201 -0
  173. package/dist/core/llm/prompts/templates/partials/examples/research.md +208 -0
  174. package/dist/core/llm/prompts/templates/partials/examples/tabular.md +178 -0
  175. package/dist/core/llm/prompts/templates/partials/examples/transcript.md +204 -0
  176. package/dist/core/llm/prompts/templates/partials/retrieved-context.hbs +18 -0
  177. package/dist/core/llm/prompts/templates/v1/system.hbs +371 -0
  178. package/dist/core/llm/prompts/templates/v1/user.hbs +20 -0
  179. package/dist/core/llm/prompts/templates/v2/system.hbs +573 -0
  180. package/dist/core/llm/prompts/templates/v2/user.hbs +20 -0
  181. package/dist/core/llm/prompts/templates/v3/system.hbs +861 -0
  182. package/dist/core/llm/prompts/templates/v3/user.hbs +16 -0
  183. package/dist/core/llm/prompts/templates/v4/system.hbs +800 -0
  184. package/dist/core/llm/prompts/templates/v4/user.hbs +40 -0
  185. package/dist/core/llm/prompts/templates/v4.5/system.hbs +71 -0
  186. package/dist/core/llm/prompts/templates/v4.5/user.hbs +46 -0
  187. package/dist/core/llm/prompts/templates/v5/glossary/system.hbs +40 -0
  188. package/dist/core/llm/prompts/templates/v5/glossary/user.hbs +11 -0
  189. package/dist/core/llm/prompts/templates/v5/system.hbs +163 -0
  190. package/dist/core/llm/prompts/templates/v5/user.hbs +55 -0
  191. package/dist/core/pipeline/GroundingTransform.js +52 -0
  192. package/dist/core/pipeline/GroundingTransform.js.map +1 -0
  193. package/dist/core/pipeline/PipelineRunner.js +51 -0
  194. package/dist/core/pipeline/PipelineRunner.js.map +1 -0
  195. package/dist/core/pipeline/RelationFilterTransform.js +72 -0
  196. package/dist/core/pipeline/RelationFilterTransform.js.map +1 -0
  197. package/dist/core/pipeline/index.js +20 -0
  198. package/dist/core/pipeline/index.js.map +1 -0
  199. package/dist/core/processor/FileProcessor.js +184 -0
  200. package/dist/core/processor/FileProcessor.js.map +1 -0
  201. package/dist/core/processor/ProcessedRegistry.js +38 -0
  202. package/dist/core/processor/ProcessedRegistry.js.map +1 -0
  203. package/dist/core/processor/ast/AstSeedService.js +0 -0
  204. package/dist/core/processor/ast/AstSeedService.js.map +1 -0
  205. package/dist/core/processor/ast/AstSymbolStore.js +110 -0
  206. package/dist/core/processor/ast/AstSymbolStore.js.map +1 -0
  207. package/dist/core/processor/ast/index.js +19 -0
  208. package/dist/core/processor/ast/index.js.map +1 -0
  209. package/dist/core/processor/chunking/TextChunker.js +98 -0
  210. package/dist/core/processor/chunking/TextChunker.js.map +1 -0
  211. package/dist/core/processor/chunking/index.js +18 -0
  212. package/dist/core/processor/chunking/index.js.map +1 -0
  213. package/dist/core/processor/classifier/CONTENT_CLASSES.js +294 -0
  214. package/dist/core/processor/classifier/CONTENT_CLASSES.js.map +1 -0
  215. package/dist/core/processor/classifier/CascadeContentClassifier.js +107 -0
  216. package/dist/core/processor/classifier/CascadeContentClassifier.js.map +1 -0
  217. package/dist/core/processor/classifier/HeuristicContentClassifier.js +113 -0
  218. package/dist/core/processor/classifier/HeuristicContentClassifier.js.map +1 -0
  219. package/dist/core/processor/classifier/IContentTypeClassifier.js +3 -0
  220. package/dist/core/processor/classifier/IContentTypeClassifier.js.map +1 -0
  221. package/dist/core/processor/classifier/LlmContentClassifier.js +107 -0
  222. package/dist/core/processor/classifier/LlmContentClassifier.js.map +1 -0
  223. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js +498 -0
  224. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js.map +1 -0
  225. package/dist/core/processor/classifier/index.js +21 -0
  226. package/dist/core/processor/classifier/index.js.map +1 -0
  227. package/dist/core/processor/classifier/mergeClassifications.js +32 -0
  228. package/dist/core/processor/classifier/mergeClassifications.js.map +1 -0
  229. package/dist/core/processor/index.js +20 -0
  230. package/dist/core/processor/index.js.map +1 -0
  231. package/dist/core/processor/readers/AudioReader.js +462 -0
  232. package/dist/core/processor/readers/AudioReader.js.map +1 -0
  233. package/dist/core/processor/readers/BinaryReader.js +90 -0
  234. package/dist/core/processor/readers/BinaryReader.js.map +1 -0
  235. package/dist/core/processor/readers/ChandraPdfReader.js +187 -0
  236. package/dist/core/processor/readers/ChandraPdfReader.js.map +1 -0
  237. package/dist/core/processor/readers/ChatExportReader.js +365 -0
  238. package/dist/core/processor/readers/ChatExportReader.js.map +1 -0
  239. package/dist/core/processor/readers/DoclingReader.js +445 -0
  240. package/dist/core/processor/readers/DoclingReader.js.map +1 -0
  241. package/dist/core/processor/readers/EmailReader.js +259 -0
  242. package/dist/core/processor/readers/EmailReader.js.map +1 -0
  243. package/dist/core/processor/readers/EpubReader.js +175 -0
  244. package/dist/core/processor/readers/EpubReader.js.map +1 -0
  245. package/dist/core/processor/readers/FileReader.js +90 -0
  246. package/dist/core/processor/readers/FileReader.js.map +1 -0
  247. package/dist/core/processor/readers/FileReaderFactory.js +49 -0
  248. package/dist/core/processor/readers/FileReaderFactory.js.map +1 -0
  249. package/dist/core/processor/readers/HtmlReader.js +371 -0
  250. package/dist/core/processor/readers/HtmlReader.js.map +1 -0
  251. package/dist/core/processor/readers/ImageReader.js +162 -0
  252. package/dist/core/processor/readers/ImageReader.js.map +1 -0
  253. package/dist/core/processor/readers/JsonFileReader.js +232 -0
  254. package/dist/core/processor/readers/JsonFileReader.js.map +1 -0
  255. package/dist/core/processor/readers/JupyterReader.js +178 -0
  256. package/dist/core/processor/readers/JupyterReader.js.map +1 -0
  257. package/dist/core/processor/readers/LatexReader.js +176 -0
  258. package/dist/core/processor/readers/LatexReader.js.map +1 -0
  259. package/dist/core/processor/readers/MarkdownReader.js +289 -0
  260. package/dist/core/processor/readers/MarkdownReader.js.map +1 -0
  261. package/dist/core/processor/readers/MarkerPdfReader.js +193 -0
  262. package/dist/core/processor/readers/MarkerPdfReader.js.map +1 -0
  263. package/dist/core/processor/readers/MistralOcrReader.js +198 -0
  264. package/dist/core/processor/readers/MistralOcrReader.js.map +1 -0
  265. package/dist/core/processor/readers/OfficeReader.js +174 -0
  266. package/dist/core/processor/readers/OfficeReader.js.map +1 -0
  267. package/dist/core/processor/readers/PdfReader.js +116 -0
  268. package/dist/core/processor/readers/PdfReader.js.map +1 -0
  269. package/dist/core/processor/readers/RtfReader.js +107 -0
  270. package/dist/core/processor/readers/RtfReader.js.map +1 -0
  271. package/dist/core/processor/readers/SubtitleReader.js +145 -0
  272. package/dist/core/processor/readers/SubtitleReader.js.map +1 -0
  273. package/dist/core/processor/readers/TesseractPdfReader.js +183 -0
  274. package/dist/core/processor/readers/TesseractPdfReader.js.map +1 -0
  275. package/dist/core/processor/readers/TextReader.js +129 -0
  276. package/dist/core/processor/readers/TextReader.js.map +1 -0
  277. package/dist/core/processor/readers/TranscriptReader.js +234 -0
  278. package/dist/core/processor/readers/TranscriptReader.js.map +1 -0
  279. package/dist/core/processor/readers/image/imageMetadata.js +155 -0
  280. package/dist/core/processor/readers/image/imageMetadata.js.map +1 -0
  281. package/dist/core/processor/readers/index.js +41 -0
  282. package/dist/core/processor/readers/index.js.map +1 -0
  283. package/dist/core/processor/readers/referenceExtraction.js +198 -0
  284. package/dist/core/processor/readers/referenceExtraction.js.map +1 -0
  285. package/dist/core/processor/readers/stripReferences.js +59 -0
  286. package/dist/core/processor/readers/stripReferences.js.map +1 -0
  287. package/dist/core/processor/readers/transcript/turnPacking.js +81 -0
  288. package/dist/core/processor/readers/transcript/turnPacking.js.map +1 -0
  289. package/dist/core/progress/NdjsonProgressEmitter.js +30 -0
  290. package/dist/core/progress/NdjsonProgressEmitter.js.map +1 -0
  291. package/dist/core/progress/NoopProgressEmitter.js +15 -0
  292. package/dist/core/progress/NoopProgressEmitter.js.map +1 -0
  293. package/dist/core/progress/index.js +19 -0
  294. package/dist/core/progress/index.js.map +1 -0
  295. package/dist/core/trace/TraceWriter.js +100 -0
  296. package/dist/core/trace/TraceWriter.js.map +1 -0
  297. package/dist/core/trace/events.js +13 -0
  298. package/dist/core/trace/events.js.map +1 -0
  299. package/dist/core/trace/index.js +20 -0
  300. package/dist/core/trace/index.js.map +1 -0
  301. package/dist/core/trace/lineage.js +97 -0
  302. package/dist/core/trace/lineage.js.map +1 -0
  303. package/dist/evaluation/BenchmarkRunner.js +171 -0
  304. package/dist/evaluation/BenchmarkRunner.js.map +1 -0
  305. package/dist/evaluation/classifier/ClassifierAccuracy.js +185 -0
  306. package/dist/evaluation/classifier/ClassifierAccuracy.js.map +1 -0
  307. package/dist/evaluation/classifier/labeledSamples.js +379 -0
  308. package/dist/evaluation/classifier/labeledSamples.js.map +1 -0
  309. package/dist/evaluation/compare/goldCompare.js +126 -0
  310. package/dist/evaluation/compare/goldCompare.js.map +1 -0
  311. package/dist/evaluation/crossre/compareScoring.js +30 -0
  312. package/dist/evaluation/crossre/compareScoring.js.map +1 -0
  313. package/dist/evaluation/datasets/CrossREDataset.js +170 -0
  314. package/dist/evaluation/datasets/CrossREDataset.js.map +1 -0
  315. package/dist/evaluation/datasets/IDataset.js +3 -0
  316. package/dist/evaluation/datasets/IDataset.js.map +1 -0
  317. package/dist/evaluation/datasets/RebelDataset.js +117 -0
  318. package/dist/evaluation/datasets/RebelDataset.js.map +1 -0
  319. package/dist/evaluation/datasets/RedocredDataset.js +218 -0
  320. package/dist/evaluation/datasets/RedocredDataset.js.map +1 -0
  321. package/dist/evaluation/datasets/SemEval2010Dataset.js +150 -0
  322. package/dist/evaluation/datasets/SemEval2010Dataset.js.map +1 -0
  323. package/dist/evaluation/index.js +33 -0
  324. package/dist/evaluation/index.js.map +1 -0
  325. package/dist/evaluation/matching/ExactMatcher.js +75 -0
  326. package/dist/evaluation/matching/ExactMatcher.js.map +1 -0
  327. package/dist/evaluation/matching/SemanticMatcher.js +143 -0
  328. package/dist/evaluation/matching/SemanticMatcher.js.map +1 -0
  329. package/dist/evaluation/metrics/TripleMetrics.js +64 -0
  330. package/dist/evaluation/metrics/TripleMetrics.js.map +1 -0
  331. package/dist/evaluation/mine/MineCheckpoint.js +114 -0
  332. package/dist/evaluation/mine/MineCheckpoint.js.map +1 -0
  333. package/dist/evaluation/mine/MineDataset.js +208 -0
  334. package/dist/evaluation/mine/MineDataset.js.map +1 -0
  335. package/dist/evaluation/mine/MineReporter.js +98 -0
  336. package/dist/evaluation/mine/MineReporter.js.map +1 -0
  337. package/dist/evaluation/mine/MineRunner.js +148 -0
  338. package/dist/evaluation/mine/MineRunner.js.map +1 -0
  339. package/dist/evaluation/mine/MineScorer.js +127 -0
  340. package/dist/evaluation/mine/MineScorer.js.map +1 -0
  341. package/dist/evaluation/mine/types.js +12 -0
  342. package/dist/evaluation/mine/types.js.map +1 -0
  343. package/dist/evaluation/reporters/ConsoleReporter.js +55 -0
  344. package/dist/evaluation/reporters/ConsoleReporter.js.map +1 -0
  345. package/dist/evaluation/reporters/JsonReporter.js +50 -0
  346. package/dist/evaluation/reporters/JsonReporter.js.map +1 -0
  347. package/dist/index.js +28 -0
  348. package/dist/index.js.map +1 -0
  349. package/dist/quality/CompositeScore.js +61 -0
  350. package/dist/quality/CompositeScore.js.map +1 -0
  351. package/dist/quality/ConsistencyMetrics.js +70 -0
  352. package/dist/quality/ConsistencyMetrics.js.map +1 -0
  353. package/dist/quality/FactualMetrics.js +76 -0
  354. package/dist/quality/FactualMetrics.js.map +1 -0
  355. package/dist/quality/GraphHealthMetrics.js +68 -0
  356. package/dist/quality/GraphHealthMetrics.js.map +1 -0
  357. package/dist/quality/SemanticMetrics.js +102 -0
  358. package/dist/quality/SemanticMetrics.js.map +1 -0
  359. package/dist/quality/StructuralMetrics.js +60 -0
  360. package/dist/quality/StructuralMetrics.js.map +1 -0
  361. package/dist/quality/index.js +23 -0
  362. package/dist/quality/index.js.map +1 -0
  363. package/dist/shared/index.js +20 -0
  364. package/dist/shared/index.js.map +1 -0
  365. package/dist/shared/logger/Logger.js +3 -0
  366. package/dist/shared/logger/Logger.js.map +1 -0
  367. package/dist/shared/logger/LoggerFactory.js +75 -0
  368. package/dist/shared/logger/LoggerFactory.js.map +1 -0
  369. package/dist/shared/logger/index.js +19 -0
  370. package/dist/shared/logger/index.js.map +1 -0
  371. package/dist/shared/shutdown.js +30 -0
  372. package/dist/shared/shutdown.js.map +1 -0
  373. package/dist/shared/utils/agglomerativeCluster.js +269 -0
  374. package/dist/shared/utils/agglomerativeCluster.js.map +1 -0
  375. package/dist/shared/utils/astSymbols.js +69 -0
  376. package/dist/shared/utils/astSymbols.js.map +1 -0
  377. package/dist/shared/utils/cosineSimilarity.js +18 -0
  378. package/dist/shared/utils/cosineSimilarity.js.map +1 -0
  379. package/dist/shared/utils/directoryTree.js +184 -0
  380. package/dist/shared/utils/directoryTree.js.map +1 -0
  381. package/dist/shared/utils/documentOutline.js +74 -0
  382. package/dist/shared/utils/documentOutline.js.map +1 -0
  383. package/dist/shared/utils/index.js +24 -0
  384. package/dist/shared/utils/index.js.map +1 -0
  385. package/dist/shared/utils/jaroWinklerSimilarity.js +60 -0
  386. package/dist/shared/utils/jaroWinklerSimilarity.js.map +1 -0
  387. package/dist/shared/utils/parseJsonLenient.js +27 -0
  388. package/dist/shared/utils/parseJsonLenient.js.map +1 -0
  389. package/dist/shared/utils/readConfig.js +42 -0
  390. package/dist/shared/utils/readConfig.js.map +1 -0
  391. package/dist/shared/utils/readRtf.js +216 -0
  392. package/dist/shared/utils/readRtf.js.map +1 -0
  393. package/dist/shared/utils/softmax.js +26 -0
  394. package/dist/shared/utils/softmax.js.map +1 -0
  395. package/dist/types/ContentClass.js +3 -0
  396. package/dist/types/ContentClass.js.map +1 -0
  397. package/dist/types/CorpusProfile.js +3 -0
  398. package/dist/types/CorpusProfile.js.map +1 -0
  399. package/dist/types/IContradictionChecker.js +3 -0
  400. package/dist/types/IContradictionChecker.js.map +1 -0
  401. package/dist/types/ICorpusAnalyzer.js +3 -0
  402. package/dist/types/ICorpusAnalyzer.js.map +1 -0
  403. package/dist/types/IDirectoryProcessor.js +3 -0
  404. package/dist/types/IDirectoryProcessor.js.map +1 -0
  405. package/dist/types/IEmbeddingProvider.js +3 -0
  406. package/dist/types/IEmbeddingProvider.js.map +1 -0
  407. package/dist/types/IEmbeddingService.js +6 -0
  408. package/dist/types/IEmbeddingService.js.map +1 -0
  409. package/dist/types/IFileProcessor.js +3 -0
  410. package/dist/types/IFileProcessor.js.map +1 -0
  411. package/dist/types/IGroundingChecker.js +3 -0
  412. package/dist/types/IGroundingChecker.js.map +1 -0
  413. package/dist/types/IKnowledgeGraphBuilder.js +3 -0
  414. package/dist/types/IKnowledgeGraphBuilder.js.map +1 -0
  415. package/dist/types/IKnowledgeGraphExporter.js +3 -0
  416. package/dist/types/IKnowledgeGraphExporter.js.map +1 -0
  417. package/dist/types/IKnowledgeGraphMerger.js +3 -0
  418. package/dist/types/IKnowledgeGraphMerger.js.map +1 -0
  419. package/dist/types/IKnowledgeGraphSearch.js +3 -0
  420. package/dist/types/IKnowledgeGraphSearch.js.map +1 -0
  421. package/dist/types/ILLMProvider.js +3 -0
  422. package/dist/types/ILLMProvider.js.map +1 -0
  423. package/dist/types/ILLMService.js +3 -0
  424. package/dist/types/ILLMService.js.map +1 -0
  425. package/dist/types/IObjectDetector.js +3 -0
  426. package/dist/types/IObjectDetector.js.map +1 -0
  427. package/dist/types/IProcessingService.js +3 -0
  428. package/dist/types/IProcessingService.js.map +1 -0
  429. package/dist/types/IProgressEmitter.js +3 -0
  430. package/dist/types/IProgressEmitter.js.map +1 -0
  431. package/dist/types/IPromptManager.js +3 -0
  432. package/dist/types/IPromptManager.js.map +1 -0
  433. package/dist/types/KnowledgeGraph.js +3 -0
  434. package/dist/types/KnowledgeGraph.js.map +1 -0
  435. package/dist/types/MCPKnowledgeGraph.js +3 -0
  436. package/dist/types/MCPKnowledgeGraph.js.map +1 -0
  437. package/dist/types/Observation.js +21 -0
  438. package/dist/types/Observation.js.map +1 -0
  439. package/dist/types/ProcessingOptions.js +3 -0
  440. package/dist/types/ProcessingOptions.js.map +1 -0
  441. package/dist/types/index.js +40 -0
  442. package/dist/types/index.js.map +1 -0
  443. package/package.json +122 -0
@@ -0,0 +1,40 @@
1
+ ## File Information
2
+
3
+ {{#if filePath}}Path: {{filePath}}{{/if}}
4
+ {{#if totalChunks}}Chunk: {{chunkIndex}} of {{totalChunks}}{{/if}}
5
+ {{#if fileOutline}}
6
+
7
+ {{#if metadata}}
8
+ ## File Metadata
9
+
10
+
11
+ {{/if}}
12
+
13
+ ## Document Outline
14
+
15
+ ```
16
+ {{fileOutline}}
17
+ ```
18
+ {{/if}}
19
+
20
+ {{#if retrievedEntities}}
21
+ ## Existing Knowledge Context
22
+
23
+ Entities already extracted from other parts of this project (do NOT copy them — only reference or extend them):
24
+
25
+ {{#each retrievedEntities}}
26
+ - **{{name}}** ({{entityType}}): {{truncate (join observations "; ") 200}}
27
+ {{/each}}
28
+ {{/if}}
29
+
30
+ {{#if domainHints}}
31
+ ## Domain Context
32
+
33
+ {{domainHints}}
34
+
35
+ {{/if}}
36
+ ## Content to Analyze
37
+
38
+ ```
39
+ {{#if chunkContent}}{{chunkContent}}{{else}}{{fileContent}}{{/if}}
40
+ ```
@@ -0,0 +1,71 @@
1
+ # Expert Knowledge Graph Generation System
2
+
3
+ ## MISSION STATEMENT
4
+
5
+ You are an expert data analyst and knowledge extraction AI system. Your mission is to transform unstructured content from files into structured knowledge graphs that capture **meaningful** entities, relationships, and observations. Extract **specific** entities, relations, and observations from provided text/code/documentation/image content achieving over 90% factual accuracy.
6
+
7
+ ## WORKING DIRECTORY CONTEXT
8
+
9
+ **Root Directory:** `{{inputDirectory}}`
10
+ **File Filter:** `{{filter}}`
11
+ {{#if directoryTree}}
12
+ **Directory Structure (filtered):**
13
+
14
+ ```
15
+ {{directoryTree}}
16
+ ```
17
+
18
+ Use this directory structure to understand file relationships, project organization, and contextual connections between entities.
19
+ {{#if userDescription}}
20
+ User provided following description of files in the working directory:
21
+ ```
22
+ {{userDescription}}
23
+ ```
24
+
25
+ {{/if}}
26
+ {{/if}}
27
+ ## OUTPUT SCHEMA
28
+
29
+ You **MUST** output a valid JSON following this exact schema:
30
+
31
+ ```json
32
+ {
33
+ "entities": [
34
+ {
35
+ "name": "unique_identifier",
36
+ "entityType": "person|organization|technology|concept|method|function|class|module|file|error|event|standard|protocol|algorithm|data_structure|etc",
37
+ "observations": ["meaningful_fact_1", "meaningful_fact_2", "..."]
38
+ }
39
+ ],
40
+ "relations": [
41
+ {
42
+ "from": "entity_name",
43
+ "to": "entity_name",
44
+ "relationType": ["relationship_type_1", "relationship_type_2", "..."]
45
+ }
46
+ ]
47
+ }
48
+ ```
49
+
50
+ ## EXTRACTION INSTRUCTIONS
51
+
52
+ Given a text, extract:
53
+
54
+ 1. **Entities** — named things (people, places, organizations, concepts, events, products, etc.)
55
+ - name: short canonical identifier
56
+ - entityType: category (person, organization, location, concept, event, product, ...)
57
+ - observations: factual statements about the entity found in the text
58
+
59
+ 2. **Relations** — directed connections between entities
60
+ - from: source entity name (must match an entity name exactly)
61
+ - to: target entity name (must match an entity name exactly)
62
+ - relationType: one or more short relation labels, e.g. ["works_at"] or ["founded", "leads"]
63
+
64
+ Rules:
65
+ - Only extract information explicitly stated in the text.
66
+ - Entity names used in relations must exactly match names in the entities list.
67
+ - Omit anything you are not confident about.
68
+
69
+ ## FINAL REMINDER
70
+
71
+ Your success is measured by the **meaningfulness and accuracy** of extracted knowledge. When in doubt, prefer returning an empty graph over including trivial or hallucinated information. Focus on entities and relationships that would be valuable to a knowledge worker trying to understand the codebase, project, or domain.
@@ -0,0 +1,46 @@
1
+ ## File Information
2
+
3
+ {{#if filePath}}Path: {{filePath}}{{/if}}
4
+ {{#if totalChunks}}Chunk: {{chunkIndex}} of {{totalChunks}}{{/if}}
5
+ {{#if fileOutline}}
6
+
7
+ {{#if metadata}}
8
+ ## File Metadata
9
+
10
+
11
+ {{/if}}
12
+
13
+ ## Document Outline
14
+
15
+ ```
16
+ {{fileOutline}}
17
+ ```
18
+ {{/if}}
19
+
20
+ {{#if retrievedEntities}}
21
+ ## Existing Knowledge Context
22
+
23
+ Entities already extracted from other parts of this project (do NOT copy them — only reference or extend them):
24
+
25
+ {{#each retrievedEntities}}
26
+ - **{{name}}** ({{entityType}}): {{truncate (join observations "; ") 200}}
27
+ {{/each}}
28
+ {{/if}}
29
+
30
+ {{#if domainHints}}
31
+ ## Domain Context
32
+
33
+ {{domainHints}}
34
+
35
+ {{/if}}
36
+ {{#if corpusGlossary}}
37
+ ## Corpus Glossary
38
+
39
+ {{corpusGlossary}}
40
+
41
+ {{/if}}
42
+ ## Content to Analyze
43
+
44
+ ```
45
+ {{#if chunkContent}}{{chunkContent}}{{else}}{{fileContent}}{{/if}}
46
+ ```
@@ -0,0 +1,40 @@
1
+ You design the controlled vocabulary for knowledge-graph extraction over a
2
+ document corpus. Extraction runs file-by-file; without a shared vocabulary each
3
+ file invents its own entity types and relation predicates, and the merged graph
4
+ fragments into hundreds of one-off types. Your job is to prevent that by fixing a
5
+ small, canonical vocabulary up front.
6
+
7
+ You are given the corpus's dominant content type, its most frequent terms, and a
8
+ few representative snippets. Produce three lists:
9
+
10
+ 1. **entityNames** — the real recurring proper nouns and key concepts in this
11
+ corpus, each normalized to ONE canonical spelling. Only include names that
12
+ genuinely recur or anchor the domain; skip incidental mentions. A few dozen at
13
+ most.
14
+
15
+ 2. **entityTypes** — the categories that fit this corpus. Keep this SMALL and
16
+ reusable: aim for 8–20 types that cover the corpus, not one type per concept.
17
+ Lowercase `snake_case`. Prefer a slightly-general type that applies broadly
18
+ (`function`) over a narrow one that applies once (`cli_argument_parser`).
19
+
20
+ 3. **relationTypes** — the predicates that connect entities in this corpus.
21
+ Lowercase `snake_case`, each a single verb-like predicate (`depends_on`, not
22
+ `uses,depends_on`). Keep it small — 6–15. Include `related_to` as a catch-all.
23
+
24
+ Guidance:
25
+ - Prefer terms that actually appear in the corpus over generic ontology labels.
26
+ - Every type and predicate you list will be enforced as a closed set during
27
+ extraction, so omit anything speculative — a missing type is recoverable, a
28
+ bloated list defeats the purpose.
29
+ - Choose canonical spellings deliberately: one casing convention, no synonyms
30
+ (pick `organization` xor `org`, not both).
31
+
32
+ Return JSON only, in exactly this shape:
33
+
34
+ ```json
35
+ {
36
+ "entityNames": ["...", "..."],
37
+ "entityTypes": ["...", "..."],
38
+ "relationTypes": ["...", "..."]
39
+ }
40
+ ```
@@ -0,0 +1,11 @@
1
+ Corpus content type: {{classLine}}
2
+
3
+ Most frequent terms (with counts):
4
+ ```
5
+ {{termList}}
6
+ ```
7
+
8
+ Representative snippets:
9
+ ```
10
+ {{snippets}}
11
+ ```
@@ -0,0 +1,163 @@
1
+ # Knowledge Graph Extraction
2
+
3
+ You extract a knowledge graph from one file (or one chunk of a file). The graph
4
+ feeds downstream retrieval and knowledge-injection systems, so it must be
5
+ *consistent* across files, not just locally plausible: the same concept gets the
6
+ same name and the same type everywhere, and relations point in a predictable
7
+ direction. A locally pretty graph that uses a fresh entity type for every node is
8
+ useless once merged.
9
+
10
+ ## Output schema
11
+
12
+ Return one valid JSON object, nothing else — no prose, no markdown fences:
13
+
14
+ ```json
15
+ {
16
+ "entities": [
17
+ { "name": "...", "entityType": "...", "observations": ["...", "..."] }
18
+ ],
19
+ "relations": [
20
+ { "from": "...", "to": "...", "relationType": ["..."] }
21
+ ]
22
+ }
23
+ ```
24
+
25
+ - `name` — the canonical identifier for the thing. One spelling per concept.
26
+ - `entityType` — a single category drawn from the controlled vocabulary below.
27
+ - `observations` — short factual statements about the entity, each grounded in
28
+ the content. Not a restatement of the type.
29
+ - `relations[].from` / `to` — each must name a real entity: either one you emit
30
+ in `entities`, or one already established in the provided context (the *Already
31
+ extracted* list or the corpus vocabulary), which you reference by its canonical
32
+ name **without** re-emitting it. Never invent an endpoint that names neither.
33
+ - `relationType` — an array holding **one** canonical predicate. Use a single
34
+ predicate per edge; do not stack synonyms.
35
+
36
+ If the content carries no extractable knowledge (empty, binary garbage, a stray
37
+ fragment), return `{"entities": [], "relations": []}`. An empty graph is a correct
38
+ answer, not a failure.
39
+
40
+ ## Working directory context
41
+
42
+ **Root:** `{{inputDirectory}}`
43
+ **Filter:** `{{filter}}`
44
+ {{#if directoryTree}}
45
+ **Structure (filtered):**
46
+
47
+ ```
48
+ {{directoryTree}}
49
+ ```
50
+
51
+ Use this to resolve what a file *is* and how it relates to its neighbors — e.g. a
52
+ file under `readers/` that imports a base class is a reader implementation, not a
53
+ free-floating module.
54
+ {{#if userDescription}}
55
+
56
+ User's description of the working directory:
57
+ ```
58
+ {{userDescription}}
59
+ ```
60
+ {{/if}}
61
+ {{/if}}
62
+
63
+ ## Controlled vocabularies
64
+
65
+ {{#if openVocabulary}}
66
+ **No fixed vocabulary.** For each entity choose the most natural type, and for each
67
+ relation the single most precise predicate that names what the text says — use the
68
+ exact verb/relationship the sentence implies (`feeds_on`, `becomes`,
69
+ `lays_eggs_on`, `regulates`, …). All lowercase `snake_case`. Always prefer a
70
+ specific predicate; **never** fall back to a generic `related_to`.
71
+ {{else}}
72
+ {{#if entityTypeVocabulary}}
73
+ **Entity types — use these and only these.** If an entity does not fit any of
74
+ them, choose the closest fit. Coining a new type fragments the merged graph, so
75
+ treat it as a last resort, and when you must, use lowercase `snake_case`.
76
+
77
+ ```
78
+ {{entityTypeVocabulary}}
79
+ ```
80
+ {{else}}
81
+ **Entity types.** Prefer this base set; reach for a new type only when nothing
82
+ here fits, and write it lowercase `snake_case`. A slightly-too-general type that
83
+ is reused beats a perfect type that appears once.
84
+
85
+ ```
86
+ person, organization, location, role, event, time, metric,
87
+ concept, term, document, product, technology, standard,
88
+ class, interface, function, module, service, dependency,
89
+ data_structure, config, file
90
+ ```
91
+ {{/if}}
92
+
93
+ {{#if relationTypeVocabulary}}
94
+ **Relation predicates — use these and only these.** Pick the single best fit. If
95
+ none fits, use `related_to` rather than inventing a predicate.
96
+
97
+ ```
98
+ {{relationTypeVocabulary}}
99
+ ```
100
+ {{else}}
101
+ **Relation predicates.** Pick the single best fit from this base set; fall back to
102
+ `related_to` rather than inventing one. All lowercase `snake_case`.
103
+
104
+ ```
105
+ uses, depends_on, calls, implements, extends, contains, part_of,
106
+ produces, consumes, configures, references, defines, targets,
107
+ located_in, works_at, member_of, precedes, causes, has_attribute,
108
+ related_to
109
+ ```
110
+ {{/if}}
111
+ {{/if}}
112
+
113
+ ## Extraction rules
114
+
115
+ **Entities — extract the things that carry meaning.** Functions, classes,
116
+ services, people, organizations, named concepts, products, standards. One node
117
+ per real-world concept: if two surface strings mean the same thing, merge them
118
+ under one canonical name; if one surface string means two different things in
119
+ different contexts (e.g. `Node` the runtime vs `Node` a tree element), keep them
120
+ distinct by qualifying the name (`node_runtime`, `tree_node`).
121
+
122
+ **Names:** preserve real identifiers exactly as written — a function
123
+ `processFiles` or class `AuthService` keeps its casing, because the name is the
124
+ join key that links this entity across files. Use lowercase `snake_case` only for
125
+ abstract concepts that have no canonical symbol (`spike_detection`, `rate_limiting`).
126
+ Preserve spelling and casing for proper nouns (people, orgs, places, products).
127
+ Never put a file path or chunk number in a name or an observation.
128
+
129
+ **Don't extract:** language keywords, primitive types, literal values (a dollar
130
+ amount, a timestamp, a single number) as standalone entities, or syntax. A value
131
+ belongs *inside* an observation on the entity it describes — `"$45M Series B
132
+ raised Jan 2024"` is an observation on a company, not a node named `$45M`.
133
+
134
+ **Observations** state something the content actually says and that the type
135
+ doesn't already imply. `entityType: function` + observation `"a function"` is
136
+ zero information. Good: what it does, what it takes, what it returns, a measured
137
+ value, a constraint.
138
+
139
+ **Relations are directed and canonical.**
140
+ - One predicate per edge, from the vocabulary.
141
+ - Choose a consistent direction: actor → object, specific → general, caller →
142
+ callee (`order_service uses database`, not the reverse).
143
+ - No self-loops. `from` and `to` must differ.
144
+ - Don't assert both `A → B` and `B → A` for the same relationship; pick the one
145
+ canonical direction.
146
+ - Link across files. An endpoint may be an entity established elsewhere in the
147
+ corpus (from the provided context) — pointing this file's relations at those
148
+ names is the *purpose* of that context, not a violation. The only forbidden
149
+ endpoint is a name that exists nowhere.
150
+ - Only connect entities when the content supports the link. A weak or guessed
151
+ edge is worse than no edge.
152
+
153
+ **Ground everything in the provided content.** Do not import outside knowledge,
154
+ and do not infer facts the text doesn't state. When unsure, leave it out.
155
+
156
+ {{#if domainExamples}}
157
+ ## Worked examples for this content type ({{detectedContentClass}})
158
+
159
+ Follow the entity granularity, naming, type reuse, and single-predicate relations
160
+ shown here.
161
+
162
+ {{domainExamples}}
163
+ {{/if}}
@@ -0,0 +1,55 @@
1
+ ## File information
2
+
3
+ {{#if filePath}}Path: `{{filePath}}`{{/if}}
4
+ {{#if totalChunks}}Chunk {{chunkIndex}} of {{totalChunks}}{{/if}}
5
+
6
+ {{#if metadata}}
7
+ ## File metadata
8
+
9
+ ```
10
+ {{metadata}}
11
+ ```
12
+ {{/if}}
13
+
14
+ {{#if fileOutline}}
15
+ ## Document outline
16
+
17
+ ```
18
+ {{fileOutline}}
19
+ ```
20
+ {{/if}}
21
+
22
+ {{#if corpusGlossary}}
23
+ ## Corpus vocabulary (authoritative)
24
+
25
+ These entity names, types, and relation predicates have already been established
26
+ across this corpus. Reuse them exactly. When something you find matches one of
27
+ these, use the established spelling and type rather than coining your own.
28
+
29
+ {{corpusGlossary}}
30
+ {{/if}}
31
+
32
+ {{#if retrievedEntities}}
33
+ ## Already extracted (do not re-emit)
34
+
35
+ These entities exist from other files. Do **not** copy them into your output. You
36
+ may extend them only by emitting an entity with the *same canonical name* carrying
37
+ a genuinely new observation from this file; otherwise leave them out and just point
38
+ relations at them by name.
39
+
40
+ {{#each retrievedEntities}}
41
+ - **{{name}}** ({{entityType}}): {{truncate (join observations "; ") 200}}
42
+ {{/each}}
43
+ {{/if}}
44
+
45
+ {{#if domainHints}}
46
+ ## Domain context
47
+
48
+ {{domainHints}}
49
+ {{/if}}
50
+
51
+ ## Content to analyze
52
+
53
+ ```
54
+ {{#if chunkContent}}{{chunkContent}}{{else}}{{fileContent}}{{/if}}
55
+ ```
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.GroundingTransform = void 0;
13
+ /**
14
+ * Edge co-occurrence grounding gate (canon brief §6). Drops relations whose two
15
+ * endpoints don't both appear in the source span the edge was extracted from —
16
+ * a cheap precision gate for high-recall/low-precision extraction.
17
+ *
18
+ * This is a SEAM: OFF for Experiment 1 (schema-first extraction already has
19
+ * implicit garbage suppression). It exists and is tested now so Experiment 2 is
20
+ * a flag flip — there it must run BEFORE canonicalization, or canon canonicalizes
21
+ * junk. Edges only carry `sourceSpan` when `pipeline.grounding.enabled` was set
22
+ * during extraction (see KnowledgeGraphBuilder.toGraph); without a span we keep
23
+ * the edge (conservative — can't judge what we can't see).
24
+ */
25
+ class GroundingTransform {
26
+ constructor() {
27
+ this.stage = "grounding";
28
+ }
29
+ isEnabled(ctx) {
30
+ return ctx.options.pipeline.grounding.enabled;
31
+ }
32
+ apply(graph, ctx) {
33
+ return __awaiter(this, void 0, void 0, function* () {
34
+ if (!ctx.options.pipeline.grounding.requireCooccurrence)
35
+ return graph;
36
+ const before = graph.relations.length;
37
+ const relations = graph.relations.filter((r) => {
38
+ if (!r.sourceSpan)
39
+ return true; // no span → can't judge → keep
40
+ const span = r.sourceSpan.toLowerCase();
41
+ return (span.includes(r.from.toLowerCase()) && span.includes(r.to.toLowerCase()));
42
+ });
43
+ const dropped = before - relations.length;
44
+ if (dropped > 0) {
45
+ ctx.logger.info(`Grounding gate dropped ${dropped}/${before} edge(s) whose endpoints don't co-occur in their source span`);
46
+ }
47
+ return { entities: graph.entities, relations };
48
+ });
49
+ }
50
+ }
51
+ exports.GroundingTransform = GroundingTransform;
52
+ //# sourceMappingURL=GroundingTransform.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GroundingTransform.js","sourceRoot":"","sources":["../../../src/core/pipeline/GroundingTransform.ts"],"names":[],"mappings":";;;;;;;;;;;;AAGA;;;;;;;;;;;GAWG;AACH,MAAa,kBAAkB;IAA/B;QACW,UAAK,GAAG,WAAW,CAAC;IA0B/B,CAAC;IAxBC,SAAS,CAAC,GAAqB;QAC7B,OAAO,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC;IAChD,CAAC;IAEK,KAAK,CAAC,KAAqB,EAAE,GAAqB;;YACtD,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,mBAAmB;gBAAE,OAAO,KAAK,CAAC;YAEtE,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC;YACtC,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC7C,IAAI,CAAC,CAAC,CAAC,UAAU;oBAAE,OAAO,IAAI,CAAC,CAAC,+BAA+B;gBAC/D,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC;gBACxC,OAAO,CACL,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CACzE,CAAC;YACJ,CAAC,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;YAC1C,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;gBAChB,GAAG,CAAC,MAAM,CAAC,IAAI,CACb,0BAA0B,OAAO,IAAI,MAAM,8DAA8D,CAC1G,CAAC;YACJ,CAAC;YACD,OAAO,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC;QACjD,CAAC;KAAA;CACF;AA3BD,gDA2BC"}
@@ -0,0 +1,51 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.PipelineRunner = exports.TRANSFORM_STAGES = void 0;
13
+ /** Stage tokens that resolve to post-extraction graph→graph transforms. */
14
+ exports.TRANSFORM_STAGES = ["grounding", "canonicalization"];
15
+ /**
16
+ * Runs the enabled graph→graph transforms in the order given by
17
+ * `pipeline.stages`. Producer tokens (tf_analysis / schema_induction /
18
+ * extraction) and unknown tokens are skipped here — they're handled (or ignored)
19
+ * upstream. Reordering `pipeline.stages` reorders the transforms (the seam
20
+ * Experiment 2 needs: grounding before canonicalization).
21
+ */
22
+ class PipelineRunner {
23
+ constructor(transforms, ctx) {
24
+ this.transforms = transforms;
25
+ this.ctx = ctx;
26
+ }
27
+ /** True when at least one registered transform is enabled by the config. */
28
+ hasWork() {
29
+ return this.transforms.some((t) => t.isEnabled(this.ctx));
30
+ }
31
+ run(graph) {
32
+ return __awaiter(this, void 0, void 0, function* () {
33
+ const byStage = new Map(this.transforms.map((t) => [t.stage, t]));
34
+ let current = graph;
35
+ for (const token of this.ctx.options.pipeline.stages) {
36
+ const transform = byStage.get(token);
37
+ if (!transform)
38
+ continue; // producer/unknown stage — not our concern here
39
+ if (!transform.isEnabled(this.ctx)) {
40
+ this.ctx.logger.debug(`Pipeline stage '${token}' disabled — skipping`);
41
+ continue;
42
+ }
43
+ this.ctx.logger.info(`Pipeline stage '${token}' running`);
44
+ current = yield transform.apply(current, this.ctx);
45
+ }
46
+ return current;
47
+ });
48
+ }
49
+ }
50
+ exports.PipelineRunner = PipelineRunner;
51
+ //# sourceMappingURL=PipelineRunner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PipelineRunner.js","sourceRoot":"","sources":["../../../src/core/pipeline/PipelineRunner.ts"],"names":[],"mappings":";;;;;;;;;;;;AAgCA,2EAA2E;AAC9D,QAAA,gBAAgB,GAAG,CAAC,WAAW,EAAE,kBAAkB,CAAU,CAAC;AAE3E;;;;;;GAMG;AACH,MAAa,cAAc;IACzB,YACU,UAA4B,EAC5B,GAAqB;QADrB,eAAU,GAAV,UAAU,CAAkB;QAC5B,QAAG,GAAH,GAAG,CAAkB;IAC5B,CAAC;IAEJ,4EAA4E;IAC5E,OAAO;QACL,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5D,CAAC;IAEK,GAAG,CAAC,KAAqB;;YAC7B,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAClE,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;gBACrD,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACrC,IAAI,CAAC,SAAS;oBAAE,SAAS,CAAC,gDAAgD;gBAC1E,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,mBAAmB,KAAK,uBAAuB,CAAC,CAAC;oBACvE,SAAS;gBACX,CAAC;gBACD,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,mBAAmB,KAAK,WAAW,CAAC,CAAC;gBAC1D,OAAO,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;YACrD,CAAC;YACD,OAAO,OAAO,CAAC;QACjB,CAAC;KAAA;CACF;AA1BD,wCA0BC"}
@@ -0,0 +1,72 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.RelationFilterTransform = void 0;
13
+ /** True when an edge's predicate set is purely the `related_to` catch-all. */
14
+ function isRelatedToOnly(r) {
15
+ const types = Array.isArray(r.relationType) ? r.relationType : [r.relationType];
16
+ return types.length > 0 && types.every((t) => t === "related_to");
17
+ }
18
+ const pairKey = (a, b) => `${a}␟${b}`;
19
+ /**
20
+ * `related_to` pruning gate (canon brief / NR-4). `related_to` is the relation
21
+ * layer's catch-all — on prose corpora it's a large, low-value fraction. This runs
22
+ * AFTER canonicalization (so endpoint names are already canonical) and prunes per
23
+ * `pipeline.relationFilter.mode`:
24
+ * - off (default) — no change
25
+ * - redundant — drop a `related_to` edge only when the same unordered endpoint pair
26
+ * already carries a typed (non-`related_to`) edge: pure redundancy, no
27
+ * information lost.
28
+ * - all — drop every `related_to` edge (for consumers wanting typed-only graphs).
29
+ *
30
+ * Re-typing ungrounded `related_to` edges to real predicates needs an LLM pass and is
31
+ * intentionally out of scope here (a future mode).
32
+ */
33
+ class RelationFilterTransform {
34
+ constructor() {
35
+ this.stage = "relationFilter";
36
+ }
37
+ isEnabled(ctx) {
38
+ return ctx.options.pipeline.relationFilter.mode !== "off";
39
+ }
40
+ apply(graph, ctx) {
41
+ return __awaiter(this, void 0, void 0, function* () {
42
+ const mode = ctx.options.pipeline.relationFilter.mode;
43
+ if (mode === "off")
44
+ return graph;
45
+ // Unordered endpoint pairs that carry at least one typed edge.
46
+ const typedPairs = new Set();
47
+ if (mode === "redundant") {
48
+ for (const r of graph.relations) {
49
+ if (isRelatedToOnly(r))
50
+ continue;
51
+ typedPairs.add(pairKey(r.from, r.to));
52
+ typedPairs.add(pairKey(r.to, r.from));
53
+ }
54
+ }
55
+ const before = graph.relations.length;
56
+ const relations = graph.relations.filter((r) => {
57
+ if (!isRelatedToOnly(r))
58
+ return true;
59
+ if (mode === "all")
60
+ return false;
61
+ return !typedPairs.has(pairKey(r.from, r.to)); // redundant: keep only if no typed twin
62
+ });
63
+ const dropped = before - relations.length;
64
+ if (dropped > 0) {
65
+ ctx.logger.info(`Relation filter (${mode}) dropped ${dropped}/${before} 'related_to' edge(s)`);
66
+ }
67
+ return { entities: graph.entities, relations };
68
+ });
69
+ }
70
+ }
71
+ exports.RelationFilterTransform = RelationFilterTransform;
72
+ //# sourceMappingURL=RelationFilterTransform.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"RelationFilterTransform.js","sourceRoot":"","sources":["../../../src/core/pipeline/RelationFilterTransform.ts"],"names":[],"mappings":";;;;;;;;;;;;AAGA,8EAA8E;AAC9E,SAAS,eAAe,CAAC,CAAW;IAClC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;IAChF,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,YAAY,CAAC,CAAC;AACpE,CAAC;AAED,MAAM,OAAO,GAAG,CAAC,CAAS,EAAE,CAAS,EAAU,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;AAE9D;;;;;;;;;;;;;GAaG;AACH,MAAa,uBAAuB;IAApC;QACW,UAAK,GAAG,gBAAgB,CAAC;IAmCpC,CAAC;IAjCC,SAAS,CAAC,GAAqB;QAC7B,OAAO,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,KAAK,KAAK,CAAC;IAC5D,CAAC;IAEK,KAAK,CAAC,KAAqB,EAAE,GAAqB;;YACtD,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC;YACtD,IAAI,IAAI,KAAK,KAAK;gBAAE,OAAO,KAAK,CAAC;YAEjC,+DAA+D;YAC/D,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;YACrC,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzB,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;oBAChC,IAAI,eAAe,CAAC,CAAC,CAAC;wBAAE,SAAS;oBACjC,UAAU,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;oBACtC,UAAU,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;gBACxC,CAAC;YACH,CAAC;YAED,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC;YACtC,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC7C,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC;oBAAE,OAAO,IAAI,CAAC;gBACrC,IAAI,IAAI,KAAK,KAAK;oBAAE,OAAO,KAAK,CAAC;gBACjC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,wCAAwC;YACzF,CAAC,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;YAC1C,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;gBAChB,GAAG,CAAC,MAAM,CAAC,IAAI,CACb,oBAAoB,IAAI,aAAa,OAAO,IAAI,MAAM,uBAAuB,CAC9E,CAAC;YACJ,CAAC;YACD,OAAO,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC;QACjD,CAAC;KAAA;CACF;AApCD,0DAoCC"}
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./PipelineRunner"), exports);
18
+ __exportStar(require("./GroundingTransform"), exports);
19
+ __exportStar(require("./RelationFilterTransform"), exports);
20
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/core/pipeline/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,mDAAiC;AACjC,uDAAqC;AACrC,4DAA0C"}