@wanshi-kg/wanshi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +458 -0
  3. package/dist/__tests__/helpers.js +27 -0
  4. package/dist/__tests__/helpers.js.map +1 -0
  5. package/dist/cli/commands/export.command.js +99 -0
  6. package/dist/cli/commands/export.command.js.map +1 -0
  7. package/dist/cli/commands/index.js +22 -0
  8. package/dist/cli/commands/index.js.map +1 -0
  9. package/dist/cli/commands/inspectMerges.command.js +84 -0
  10. package/dist/cli/commands/inspectMerges.command.js.map +1 -0
  11. package/dist/cli/commands/metrics.command.js +196 -0
  12. package/dist/cli/commands/metrics.command.js.map +1 -0
  13. package/dist/cli/commands/process.command.js +82 -0
  14. package/dist/cli/commands/process.command.js.map +1 -0
  15. package/dist/cli/commands/watch.command.js +91 -0
  16. package/dist/cli/commands/watch.command.js.map +1 -0
  17. package/dist/cli/index.js +269 -0
  18. package/dist/cli/index.js.map +1 -0
  19. package/dist/cli/optionsToConfig.js +160 -0
  20. package/dist/cli/optionsToConfig.js.map +1 -0
  21. package/dist/config/index.js +59 -0
  22. package/dist/config/index.js.map +1 -0
  23. package/dist/config/legacyHints.js +113 -0
  24. package/dist/config/legacyHints.js.map +1 -0
  25. package/dist/config/schema.js +803 -0
  26. package/dist/config/schema.js.map +1 -0
  27. package/dist/config/ui.js +221 -0
  28. package/dist/config/ui.js.map +1 -0
  29. package/dist/core/DirectoryProcessor.js +725 -0
  30. package/dist/core/DirectoryProcessor.js.map +1 -0
  31. package/dist/core/adapters/IStructuredAdapter.js +3 -0
  32. package/dist/core/adapters/IStructuredAdapter.js.map +1 -0
  33. package/dist/core/adapters/SqliteAdapter.js +267 -0
  34. package/dist/core/adapters/SqliteAdapter.js.map +1 -0
  35. package/dist/core/adapters/StructuredAdapterRegistry.js +31 -0
  36. package/dist/core/adapters/StructuredAdapterRegistry.js.map +1 -0
  37. package/dist/core/adapters/index.js +20 -0
  38. package/dist/core/adapters/index.js.map +1 -0
  39. package/dist/core/checkpoint/CheckpointService.js +188 -0
  40. package/dist/core/checkpoint/CheckpointService.js.map +1 -0
  41. package/dist/core/checkpoint/index.js +18 -0
  42. package/dist/core/checkpoint/index.js.map +1 -0
  43. package/dist/core/corpus/CorpusAnalyzer.js +266 -0
  44. package/dist/core/corpus/CorpusAnalyzer.js.map +1 -0
  45. package/dist/core/corpus/CorpusProfileStore.js +92 -0
  46. package/dist/core/corpus/CorpusProfileStore.js.map +1 -0
  47. package/dist/core/corpus/index.js +21 -0
  48. package/dist/core/corpus/index.js.map +1 -0
  49. package/dist/core/corpus/normalizeGlossary.js +60 -0
  50. package/dist/core/corpus/normalizeGlossary.js.map +1 -0
  51. package/dist/core/corpus/relPath.js +52 -0
  52. package/dist/core/corpus/relPath.js.map +1 -0
  53. package/dist/core/corpus/termFrequency.js +86 -0
  54. package/dist/core/corpus/termFrequency.js.map +1 -0
  55. package/dist/core/cost/CostMeter.js +235 -0
  56. package/dist/core/cost/CostMeter.js.map +1 -0
  57. package/dist/core/cost/index.js +19 -0
  58. package/dist/core/cost/index.js.map +1 -0
  59. package/dist/core/cost/prices.js +38 -0
  60. package/dist/core/cost/prices.js.map +1 -0
  61. package/dist/core/cv/ObjectDetectionService.js +119 -0
  62. package/dist/core/cv/ObjectDetectionService.js.map +1 -0
  63. package/dist/core/di/ContainerFactory.js +670 -0
  64. package/dist/core/di/ContainerFactory.js.map +1 -0
  65. package/dist/core/di/DIContainer.js +103 -0
  66. package/dist/core/di/DIContainer.js.map +1 -0
  67. package/dist/core/di/index.js +19 -0
  68. package/dist/core/di/index.js.map +1 -0
  69. package/dist/core/errors/CustomErrors.js +342 -0
  70. package/dist/core/errors/CustomErrors.js.map +1 -0
  71. package/dist/core/errors/index.js +18 -0
  72. package/dist/core/errors/index.js.map +1 -0
  73. package/dist/core/export/KnowledgeGraphExportService.js +56 -0
  74. package/dist/core/export/KnowledgeGraphExportService.js.map +1 -0
  75. package/dist/core/export/index.js +19 -0
  76. package/dist/core/export/index.js.map +1 -0
  77. package/dist/core/export/strategies/GraphitiExportStrategy.js +115 -0
  78. package/dist/core/export/strategies/GraphitiExportStrategy.js.map +1 -0
  79. package/dist/core/export/strategies/GraphvizDotExportStrategy.js +331 -0
  80. package/dist/core/export/strategies/GraphvizDotExportStrategy.js.map +1 -0
  81. package/dist/core/export/strategies/IExportStrategy.js +3 -0
  82. package/dist/core/export/strategies/IExportStrategy.js.map +1 -0
  83. package/dist/core/export/strategies/JsonExportStrategy.js +19 -0
  84. package/dist/core/export/strategies/JsonExportStrategy.js.map +1 -0
  85. package/dist/core/export/strategies/JsonlExportStrategy.js +69 -0
  86. package/dist/core/export/strategies/JsonlExportStrategy.js.map +1 -0
  87. package/dist/core/export/strategies/KblamExportStrategy.js +36 -0
  88. package/dist/core/export/strategies/KblamExportStrategy.js.map +1 -0
  89. package/dist/core/export/strategies/LoraExportStrategy.js +46 -0
  90. package/dist/core/export/strategies/LoraExportStrategy.js.map +1 -0
  91. package/dist/core/export/strategies/McpExportStrategy.js +67 -0
  92. package/dist/core/export/strategies/McpExportStrategy.js.map +1 -0
  93. package/dist/core/export/strategies/index.js +25 -0
  94. package/dist/core/export/strategies/index.js.map +1 -0
  95. package/dist/core/export/strategies/kbTriples.js +60 -0
  96. package/dist/core/export/strategies/kbTriples.js.map +1 -0
  97. package/dist/core/index.js +22 -0
  98. package/dist/core/index.js.map +1 -0
  99. package/dist/core/knowledge/KnowledgeGraphBuilder.js +627 -0
  100. package/dist/core/knowledge/KnowledgeGraphBuilder.js.map +1 -0
  101. package/dist/core/knowledge/MergeRecord.js +3 -0
  102. package/dist/core/knowledge/MergeRecord.js.map +1 -0
  103. package/dist/core/knowledge/canon/Canonicalizer.js +414 -0
  104. package/dist/core/knowledge/canon/Canonicalizer.js.map +1 -0
  105. package/dist/core/knowledge/canon/index.js +18 -0
  106. package/dist/core/knowledge/canon/index.js.map +1 -0
  107. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js +92 -0
  108. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js.map +1 -0
  109. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js +52 -0
  110. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js.map +1 -0
  111. package/dist/core/knowledge/contradiction/index.js +19 -0
  112. package/dist/core/knowledge/contradiction/index.js.map +1 -0
  113. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js +33 -0
  114. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js.map +1 -0
  115. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js +82 -0
  116. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js.map +1 -0
  117. package/dist/core/knowledge/grounding/index.js +20 -0
  118. package/dist/core/knowledge/grounding/index.js.map +1 -0
  119. package/dist/core/knowledge/grounding/verbalize.js +38 -0
  120. package/dist/core/knowledge/grounding/verbalize.js.map +1 -0
  121. package/dist/core/knowledge/images/imageMetaGraph.js +136 -0
  122. package/dist/core/knowledge/images/imageMetaGraph.js.map +1 -0
  123. package/dist/core/knowledge/index.js +20 -0
  124. package/dist/core/knowledge/index.js.map +1 -0
  125. package/dist/core/knowledge/merging/KnowledgeMerger.js +624 -0
  126. package/dist/core/knowledge/merging/KnowledgeMerger.js.map +1 -0
  127. package/dist/core/knowledge/references/ReferenceResolver.js +184 -0
  128. package/dist/core/knowledge/references/ReferenceResolver.js.map +1 -0
  129. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js +401 -0
  130. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js.map +1 -0
  131. package/dist/core/knowledge/references/citations/CitationResolver.js +95 -0
  132. package/dist/core/knowledge/references/citations/CitationResolver.js.map +1 -0
  133. package/dist/core/knowledge/references/citations/GrobidClient.js +143 -0
  134. package/dist/core/knowledge/references/citations/GrobidClient.js.map +1 -0
  135. package/dist/core/knowledge/references/citations/TitleIdResolver.js +101 -0
  136. package/dist/core/knowledge/references/citations/TitleIdResolver.js.map +1 -0
  137. package/dist/core/knowledge/references/web/FetchCacheService.js +114 -0
  138. package/dist/core/knowledge/references/web/FetchCacheService.js.map +1 -0
  139. package/dist/core/knowledge/references/web/GatedFetcher.js +228 -0
  140. package/dist/core/knowledge/references/web/GatedFetcher.js.map +1 -0
  141. package/dist/core/knowledge/references/web/WebReferenceProcessor.js +164 -0
  142. package/dist/core/knowledge/references/web/WebReferenceProcessor.js.map +1 -0
  143. package/dist/core/knowledge/search/KnowledgeGraphSearch.js +261 -0
  144. package/dist/core/knowledge/search/KnowledgeGraphSearch.js.map +1 -0
  145. package/dist/core/knowledge/vocabulary.js +162 -0
  146. package/dist/core/knowledge/vocabulary.js.map +1 -0
  147. package/dist/core/llm/EmbeddingService.js +113 -0
  148. package/dist/core/llm/EmbeddingService.js.map +1 -0
  149. package/dist/core/llm/OllamaService.js +146 -0
  150. package/dist/core/llm/OllamaService.js.map +1 -0
  151. package/dist/core/llm/OpenAICompatibleService.js +190 -0
  152. package/dist/core/llm/OpenAICompatibleService.js.map +1 -0
  153. package/dist/core/llm/OpenAIEmbeddingService.js +129 -0
  154. package/dist/core/llm/OpenAIEmbeddingService.js.map +1 -0
  155. package/dist/core/llm/embeddingUtils.js +25 -0
  156. package/dist/core/llm/embeddingUtils.js.map +1 -0
  157. package/dist/core/llm/index.js +23 -0
  158. package/dist/core/llm/index.js.map +1 -0
  159. package/dist/core/llm/prompts/PromptManager.js +388 -0
  160. package/dist/core/llm/prompts/PromptManager.js.map +1 -0
  161. package/dist/core/llm/prompts/PromptTemplateEngine.js +257 -0
  162. package/dist/core/llm/prompts/PromptTemplateEngine.js.map +1 -0
  163. package/dist/core/llm/prompts/templates/partials/examples/EXAMPLE_STYLE_GUIDE.md +84 -0
  164. package/dist/core/llm/prompts/templates/partials/examples/article.md +187 -0
  165. package/dist/core/llm/prompts/templates/partials/examples/code.md +229 -0
  166. package/dist/core/llm/prompts/templates/partials/examples/communication.md +205 -0
  167. package/dist/core/llm/prompts/templates/partials/examples/documentation.md +262 -0
  168. package/dist/core/llm/prompts/templates/partials/examples/financial.md +157 -0
  169. package/dist/core/llm/prompts/templates/partials/examples/legal.md +153 -0
  170. package/dist/core/llm/prompts/templates/partials/examples/logs.md +127 -0
  171. package/dist/core/llm/prompts/templates/partials/examples/medical.md +218 -0
  172. package/dist/core/llm/prompts/templates/partials/examples/notes.md +201 -0
  173. package/dist/core/llm/prompts/templates/partials/examples/research.md +208 -0
  174. package/dist/core/llm/prompts/templates/partials/examples/tabular.md +178 -0
  175. package/dist/core/llm/prompts/templates/partials/examples/transcript.md +204 -0
  176. package/dist/core/llm/prompts/templates/partials/retrieved-context.hbs +18 -0
  177. package/dist/core/llm/prompts/templates/v1/system.hbs +371 -0
  178. package/dist/core/llm/prompts/templates/v1/user.hbs +20 -0
  179. package/dist/core/llm/prompts/templates/v2/system.hbs +573 -0
  180. package/dist/core/llm/prompts/templates/v2/user.hbs +20 -0
  181. package/dist/core/llm/prompts/templates/v3/system.hbs +861 -0
  182. package/dist/core/llm/prompts/templates/v3/user.hbs +16 -0
  183. package/dist/core/llm/prompts/templates/v4/system.hbs +800 -0
  184. package/dist/core/llm/prompts/templates/v4/user.hbs +40 -0
  185. package/dist/core/llm/prompts/templates/v4.5/system.hbs +71 -0
  186. package/dist/core/llm/prompts/templates/v4.5/user.hbs +46 -0
  187. package/dist/core/llm/prompts/templates/v5/glossary/system.hbs +40 -0
  188. package/dist/core/llm/prompts/templates/v5/glossary/user.hbs +11 -0
  189. package/dist/core/llm/prompts/templates/v5/system.hbs +163 -0
  190. package/dist/core/llm/prompts/templates/v5/user.hbs +55 -0
  191. package/dist/core/pipeline/GroundingTransform.js +52 -0
  192. package/dist/core/pipeline/GroundingTransform.js.map +1 -0
  193. package/dist/core/pipeline/PipelineRunner.js +51 -0
  194. package/dist/core/pipeline/PipelineRunner.js.map +1 -0
  195. package/dist/core/pipeline/RelationFilterTransform.js +72 -0
  196. package/dist/core/pipeline/RelationFilterTransform.js.map +1 -0
  197. package/dist/core/pipeline/index.js +20 -0
  198. package/dist/core/pipeline/index.js.map +1 -0
  199. package/dist/core/processor/FileProcessor.js +184 -0
  200. package/dist/core/processor/FileProcessor.js.map +1 -0
  201. package/dist/core/processor/ProcessedRegistry.js +38 -0
  202. package/dist/core/processor/ProcessedRegistry.js.map +1 -0
  203. package/dist/core/processor/ast/AstSeedService.js +0 -0
  204. package/dist/core/processor/ast/AstSeedService.js.map +1 -0
  205. package/dist/core/processor/ast/AstSymbolStore.js +110 -0
  206. package/dist/core/processor/ast/AstSymbolStore.js.map +1 -0
  207. package/dist/core/processor/ast/index.js +19 -0
  208. package/dist/core/processor/ast/index.js.map +1 -0
  209. package/dist/core/processor/chunking/TextChunker.js +98 -0
  210. package/dist/core/processor/chunking/TextChunker.js.map +1 -0
  211. package/dist/core/processor/chunking/index.js +18 -0
  212. package/dist/core/processor/chunking/index.js.map +1 -0
  213. package/dist/core/processor/classifier/CONTENT_CLASSES.js +294 -0
  214. package/dist/core/processor/classifier/CONTENT_CLASSES.js.map +1 -0
  215. package/dist/core/processor/classifier/CascadeContentClassifier.js +107 -0
  216. package/dist/core/processor/classifier/CascadeContentClassifier.js.map +1 -0
  217. package/dist/core/processor/classifier/HeuristicContentClassifier.js +113 -0
  218. package/dist/core/processor/classifier/HeuristicContentClassifier.js.map +1 -0
  219. package/dist/core/processor/classifier/IContentTypeClassifier.js +3 -0
  220. package/dist/core/processor/classifier/IContentTypeClassifier.js.map +1 -0
  221. package/dist/core/processor/classifier/LlmContentClassifier.js +107 -0
  222. package/dist/core/processor/classifier/LlmContentClassifier.js.map +1 -0
  223. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js +498 -0
  224. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js.map +1 -0
  225. package/dist/core/processor/classifier/index.js +21 -0
  226. package/dist/core/processor/classifier/index.js.map +1 -0
  227. package/dist/core/processor/classifier/mergeClassifications.js +32 -0
  228. package/dist/core/processor/classifier/mergeClassifications.js.map +1 -0
  229. package/dist/core/processor/index.js +20 -0
  230. package/dist/core/processor/index.js.map +1 -0
  231. package/dist/core/processor/readers/AudioReader.js +462 -0
  232. package/dist/core/processor/readers/AudioReader.js.map +1 -0
  233. package/dist/core/processor/readers/BinaryReader.js +90 -0
  234. package/dist/core/processor/readers/BinaryReader.js.map +1 -0
  235. package/dist/core/processor/readers/ChandraPdfReader.js +187 -0
  236. package/dist/core/processor/readers/ChandraPdfReader.js.map +1 -0
  237. package/dist/core/processor/readers/ChatExportReader.js +365 -0
  238. package/dist/core/processor/readers/ChatExportReader.js.map +1 -0
  239. package/dist/core/processor/readers/DoclingReader.js +445 -0
  240. package/dist/core/processor/readers/DoclingReader.js.map +1 -0
  241. package/dist/core/processor/readers/EmailReader.js +259 -0
  242. package/dist/core/processor/readers/EmailReader.js.map +1 -0
  243. package/dist/core/processor/readers/EpubReader.js +175 -0
  244. package/dist/core/processor/readers/EpubReader.js.map +1 -0
  245. package/dist/core/processor/readers/FileReader.js +90 -0
  246. package/dist/core/processor/readers/FileReader.js.map +1 -0
  247. package/dist/core/processor/readers/FileReaderFactory.js +49 -0
  248. package/dist/core/processor/readers/FileReaderFactory.js.map +1 -0
  249. package/dist/core/processor/readers/HtmlReader.js +371 -0
  250. package/dist/core/processor/readers/HtmlReader.js.map +1 -0
  251. package/dist/core/processor/readers/ImageReader.js +162 -0
  252. package/dist/core/processor/readers/ImageReader.js.map +1 -0
  253. package/dist/core/processor/readers/JsonFileReader.js +232 -0
  254. package/dist/core/processor/readers/JsonFileReader.js.map +1 -0
  255. package/dist/core/processor/readers/JupyterReader.js +178 -0
  256. package/dist/core/processor/readers/JupyterReader.js.map +1 -0
  257. package/dist/core/processor/readers/LatexReader.js +176 -0
  258. package/dist/core/processor/readers/LatexReader.js.map +1 -0
  259. package/dist/core/processor/readers/MarkdownReader.js +289 -0
  260. package/dist/core/processor/readers/MarkdownReader.js.map +1 -0
  261. package/dist/core/processor/readers/MarkerPdfReader.js +193 -0
  262. package/dist/core/processor/readers/MarkerPdfReader.js.map +1 -0
  263. package/dist/core/processor/readers/MistralOcrReader.js +198 -0
  264. package/dist/core/processor/readers/MistralOcrReader.js.map +1 -0
  265. package/dist/core/processor/readers/OfficeReader.js +174 -0
  266. package/dist/core/processor/readers/OfficeReader.js.map +1 -0
  267. package/dist/core/processor/readers/PdfReader.js +116 -0
  268. package/dist/core/processor/readers/PdfReader.js.map +1 -0
  269. package/dist/core/processor/readers/RtfReader.js +107 -0
  270. package/dist/core/processor/readers/RtfReader.js.map +1 -0
  271. package/dist/core/processor/readers/SubtitleReader.js +145 -0
  272. package/dist/core/processor/readers/SubtitleReader.js.map +1 -0
  273. package/dist/core/processor/readers/TesseractPdfReader.js +183 -0
  274. package/dist/core/processor/readers/TesseractPdfReader.js.map +1 -0
  275. package/dist/core/processor/readers/TextReader.js +129 -0
  276. package/dist/core/processor/readers/TextReader.js.map +1 -0
  277. package/dist/core/processor/readers/TranscriptReader.js +234 -0
  278. package/dist/core/processor/readers/TranscriptReader.js.map +1 -0
  279. package/dist/core/processor/readers/image/imageMetadata.js +155 -0
  280. package/dist/core/processor/readers/image/imageMetadata.js.map +1 -0
  281. package/dist/core/processor/readers/index.js +41 -0
  282. package/dist/core/processor/readers/index.js.map +1 -0
  283. package/dist/core/processor/readers/referenceExtraction.js +198 -0
  284. package/dist/core/processor/readers/referenceExtraction.js.map +1 -0
  285. package/dist/core/processor/readers/stripReferences.js +59 -0
  286. package/dist/core/processor/readers/stripReferences.js.map +1 -0
  287. package/dist/core/processor/readers/transcript/turnPacking.js +81 -0
  288. package/dist/core/processor/readers/transcript/turnPacking.js.map +1 -0
  289. package/dist/core/progress/NdjsonProgressEmitter.js +30 -0
  290. package/dist/core/progress/NdjsonProgressEmitter.js.map +1 -0
  291. package/dist/core/progress/NoopProgressEmitter.js +15 -0
  292. package/dist/core/progress/NoopProgressEmitter.js.map +1 -0
  293. package/dist/core/progress/index.js +19 -0
  294. package/dist/core/progress/index.js.map +1 -0
  295. package/dist/core/trace/TraceWriter.js +100 -0
  296. package/dist/core/trace/TraceWriter.js.map +1 -0
  297. package/dist/core/trace/events.js +13 -0
  298. package/dist/core/trace/events.js.map +1 -0
  299. package/dist/core/trace/index.js +20 -0
  300. package/dist/core/trace/index.js.map +1 -0
  301. package/dist/core/trace/lineage.js +97 -0
  302. package/dist/core/trace/lineage.js.map +1 -0
  303. package/dist/evaluation/BenchmarkRunner.js +171 -0
  304. package/dist/evaluation/BenchmarkRunner.js.map +1 -0
  305. package/dist/evaluation/classifier/ClassifierAccuracy.js +185 -0
  306. package/dist/evaluation/classifier/ClassifierAccuracy.js.map +1 -0
  307. package/dist/evaluation/classifier/labeledSamples.js +379 -0
  308. package/dist/evaluation/classifier/labeledSamples.js.map +1 -0
  309. package/dist/evaluation/compare/goldCompare.js +126 -0
  310. package/dist/evaluation/compare/goldCompare.js.map +1 -0
  311. package/dist/evaluation/crossre/compareScoring.js +30 -0
  312. package/dist/evaluation/crossre/compareScoring.js.map +1 -0
  313. package/dist/evaluation/datasets/CrossREDataset.js +170 -0
  314. package/dist/evaluation/datasets/CrossREDataset.js.map +1 -0
  315. package/dist/evaluation/datasets/IDataset.js +3 -0
  316. package/dist/evaluation/datasets/IDataset.js.map +1 -0
  317. package/dist/evaluation/datasets/RebelDataset.js +117 -0
  318. package/dist/evaluation/datasets/RebelDataset.js.map +1 -0
  319. package/dist/evaluation/datasets/RedocredDataset.js +218 -0
  320. package/dist/evaluation/datasets/RedocredDataset.js.map +1 -0
  321. package/dist/evaluation/datasets/SemEval2010Dataset.js +150 -0
  322. package/dist/evaluation/datasets/SemEval2010Dataset.js.map +1 -0
  323. package/dist/evaluation/index.js +33 -0
  324. package/dist/evaluation/index.js.map +1 -0
  325. package/dist/evaluation/matching/ExactMatcher.js +75 -0
  326. package/dist/evaluation/matching/ExactMatcher.js.map +1 -0
  327. package/dist/evaluation/matching/SemanticMatcher.js +143 -0
  328. package/dist/evaluation/matching/SemanticMatcher.js.map +1 -0
  329. package/dist/evaluation/metrics/TripleMetrics.js +64 -0
  330. package/dist/evaluation/metrics/TripleMetrics.js.map +1 -0
  331. package/dist/evaluation/mine/MineCheckpoint.js +114 -0
  332. package/dist/evaluation/mine/MineCheckpoint.js.map +1 -0
  333. package/dist/evaluation/mine/MineDataset.js +208 -0
  334. package/dist/evaluation/mine/MineDataset.js.map +1 -0
  335. package/dist/evaluation/mine/MineReporter.js +98 -0
  336. package/dist/evaluation/mine/MineReporter.js.map +1 -0
  337. package/dist/evaluation/mine/MineRunner.js +148 -0
  338. package/dist/evaluation/mine/MineRunner.js.map +1 -0
  339. package/dist/evaluation/mine/MineScorer.js +127 -0
  340. package/dist/evaluation/mine/MineScorer.js.map +1 -0
  341. package/dist/evaluation/mine/types.js +12 -0
  342. package/dist/evaluation/mine/types.js.map +1 -0
  343. package/dist/evaluation/reporters/ConsoleReporter.js +55 -0
  344. package/dist/evaluation/reporters/ConsoleReporter.js.map +1 -0
  345. package/dist/evaluation/reporters/JsonReporter.js +50 -0
  346. package/dist/evaluation/reporters/JsonReporter.js.map +1 -0
  347. package/dist/index.js +28 -0
  348. package/dist/index.js.map +1 -0
  349. package/dist/quality/CompositeScore.js +61 -0
  350. package/dist/quality/CompositeScore.js.map +1 -0
  351. package/dist/quality/ConsistencyMetrics.js +70 -0
  352. package/dist/quality/ConsistencyMetrics.js.map +1 -0
  353. package/dist/quality/FactualMetrics.js +76 -0
  354. package/dist/quality/FactualMetrics.js.map +1 -0
  355. package/dist/quality/GraphHealthMetrics.js +68 -0
  356. package/dist/quality/GraphHealthMetrics.js.map +1 -0
  357. package/dist/quality/SemanticMetrics.js +102 -0
  358. package/dist/quality/SemanticMetrics.js.map +1 -0
  359. package/dist/quality/StructuralMetrics.js +60 -0
  360. package/dist/quality/StructuralMetrics.js.map +1 -0
  361. package/dist/quality/index.js +23 -0
  362. package/dist/quality/index.js.map +1 -0
  363. package/dist/shared/index.js +20 -0
  364. package/dist/shared/index.js.map +1 -0
  365. package/dist/shared/logger/Logger.js +3 -0
  366. package/dist/shared/logger/Logger.js.map +1 -0
  367. package/dist/shared/logger/LoggerFactory.js +75 -0
  368. package/dist/shared/logger/LoggerFactory.js.map +1 -0
  369. package/dist/shared/logger/index.js +19 -0
  370. package/dist/shared/logger/index.js.map +1 -0
  371. package/dist/shared/shutdown.js +30 -0
  372. package/dist/shared/shutdown.js.map +1 -0
  373. package/dist/shared/utils/agglomerativeCluster.js +269 -0
  374. package/dist/shared/utils/agglomerativeCluster.js.map +1 -0
  375. package/dist/shared/utils/astSymbols.js +69 -0
  376. package/dist/shared/utils/astSymbols.js.map +1 -0
  377. package/dist/shared/utils/cosineSimilarity.js +18 -0
  378. package/dist/shared/utils/cosineSimilarity.js.map +1 -0
  379. package/dist/shared/utils/directoryTree.js +184 -0
  380. package/dist/shared/utils/directoryTree.js.map +1 -0
  381. package/dist/shared/utils/documentOutline.js +74 -0
  382. package/dist/shared/utils/documentOutline.js.map +1 -0
  383. package/dist/shared/utils/index.js +24 -0
  384. package/dist/shared/utils/index.js.map +1 -0
  385. package/dist/shared/utils/jaroWinklerSimilarity.js +60 -0
  386. package/dist/shared/utils/jaroWinklerSimilarity.js.map +1 -0
  387. package/dist/shared/utils/parseJsonLenient.js +27 -0
  388. package/dist/shared/utils/parseJsonLenient.js.map +1 -0
  389. package/dist/shared/utils/readConfig.js +42 -0
  390. package/dist/shared/utils/readConfig.js.map +1 -0
  391. package/dist/shared/utils/readRtf.js +216 -0
  392. package/dist/shared/utils/readRtf.js.map +1 -0
  393. package/dist/shared/utils/softmax.js +26 -0
  394. package/dist/shared/utils/softmax.js.map +1 -0
  395. package/dist/types/ContentClass.js +3 -0
  396. package/dist/types/ContentClass.js.map +1 -0
  397. package/dist/types/CorpusProfile.js +3 -0
  398. package/dist/types/CorpusProfile.js.map +1 -0
  399. package/dist/types/IContradictionChecker.js +3 -0
  400. package/dist/types/IContradictionChecker.js.map +1 -0
  401. package/dist/types/ICorpusAnalyzer.js +3 -0
  402. package/dist/types/ICorpusAnalyzer.js.map +1 -0
  403. package/dist/types/IDirectoryProcessor.js +3 -0
  404. package/dist/types/IDirectoryProcessor.js.map +1 -0
  405. package/dist/types/IEmbeddingProvider.js +3 -0
  406. package/dist/types/IEmbeddingProvider.js.map +1 -0
  407. package/dist/types/IEmbeddingService.js +6 -0
  408. package/dist/types/IEmbeddingService.js.map +1 -0
  409. package/dist/types/IFileProcessor.js +3 -0
  410. package/dist/types/IFileProcessor.js.map +1 -0
  411. package/dist/types/IGroundingChecker.js +3 -0
  412. package/dist/types/IGroundingChecker.js.map +1 -0
  413. package/dist/types/IKnowledgeGraphBuilder.js +3 -0
  414. package/dist/types/IKnowledgeGraphBuilder.js.map +1 -0
  415. package/dist/types/IKnowledgeGraphExporter.js +3 -0
  416. package/dist/types/IKnowledgeGraphExporter.js.map +1 -0
  417. package/dist/types/IKnowledgeGraphMerger.js +3 -0
  418. package/dist/types/IKnowledgeGraphMerger.js.map +1 -0
  419. package/dist/types/IKnowledgeGraphSearch.js +3 -0
  420. package/dist/types/IKnowledgeGraphSearch.js.map +1 -0
  421. package/dist/types/ILLMProvider.js +3 -0
  422. package/dist/types/ILLMProvider.js.map +1 -0
  423. package/dist/types/ILLMService.js +3 -0
  424. package/dist/types/ILLMService.js.map +1 -0
  425. package/dist/types/IObjectDetector.js +3 -0
  426. package/dist/types/IObjectDetector.js.map +1 -0
  427. package/dist/types/IProcessingService.js +3 -0
  428. package/dist/types/IProcessingService.js.map +1 -0
  429. package/dist/types/IProgressEmitter.js +3 -0
  430. package/dist/types/IProgressEmitter.js.map +1 -0
  431. package/dist/types/IPromptManager.js +3 -0
  432. package/dist/types/IPromptManager.js.map +1 -0
  433. package/dist/types/KnowledgeGraph.js +3 -0
  434. package/dist/types/KnowledgeGraph.js.map +1 -0
  435. package/dist/types/MCPKnowledgeGraph.js +3 -0
  436. package/dist/types/MCPKnowledgeGraph.js.map +1 -0
  437. package/dist/types/Observation.js +21 -0
  438. package/dist/types/Observation.js.map +1 -0
  439. package/dist/types/ProcessingOptions.js +3 -0
  440. package/dist/types/ProcessingOptions.js.map +1 -0
  441. package/dist/types/index.js +40 -0
  442. package/dist/types/index.js.map +1 -0
  443. package/package.json +122 -0
@@ -0,0 +1,261 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.KnowledgeGraphSearch = void 0;
16
+ const path_1 = __importDefault(require("path"));
17
+ const utils_1 = require("../../../shared/utils");
18
+ const types_1 = require("../../../types");
19
+ // Enhanced search with multiple strategies
20
+ class KnowledgeGraphSearch {
21
+ constructor(embeddingService, logger) {
22
+ this.embeddingService = embeddingService;
23
+ this.logger = logger;
24
+ }
25
+ // Strategy 1: Content-based search using file content directly
26
+ searchByFileContent(fileContent_1, fileName_1, graphs_1) {
27
+ return __awaiter(this, arguments, void 0, function* (fileContent, fileName, graphs, options = {}) {
28
+ this.logger.debug(`Searching knowledge graphs for context relevant to: ${fileName}`);
29
+ // Extract key terms from file content (simple but effective)
30
+ const keyTerms = this.extractKeyTerms(fileContent, fileName);
31
+ // Multi-strategy search
32
+ const results = yield Promise.all([
33
+ this.searchByKeyTerms(keyTerms, graphs, options),
34
+ this.searchByFileRelationship(fileName, graphs, options),
35
+ this.searchByEmbeddings(fileContent, graphs, options)
36
+ ]);
37
+ // Merge and rank results
38
+ return this.mergeSearchResults(results, options.limit || 10);
39
+ });
40
+ }
41
+ // Strategy 2: Extract meaningful terms without LLM
42
+ extractKeyTerms(content, fileName) {
43
+ const terms = new Set();
44
+ // Add filename-based terms
45
+ const fileBaseName = path_1.default.basename(fileName, path_1.default.extname(fileName));
46
+ terms.add(fileBaseName);
47
+ // Extract programming language keywords
48
+ const codeTerms = this.extractCodeTerms(content);
49
+ codeTerms.forEach(term => terms.add(term));
50
+ // Extract camelCase/snake_case identifiers
51
+ const identifiers = content.match(/[a-zA-Z_][a-zA-Z0-9_]*[A-Z][a-zA-Z0-9_]*/g) || [];
52
+ identifiers.slice(0, 20).forEach(term => terms.add(term)); // Limit to prevent noise
53
+ // Extract quoted strings (might be important concepts)
54
+ const quotes = content.match(/"([^"]+)"|'([^']+)'/g) || [];
55
+ quotes.slice(0, 10).forEach(quote => {
56
+ const cleaned = quote.replace(/['"]/g, '');
57
+ if (cleaned.length > 3 && cleaned.length < 30) {
58
+ terms.add(cleaned);
59
+ }
60
+ });
61
+ // Extract URLs/imports
62
+ const imports = content.match(/(?:import|from|require)\s+["']([^"']+)["']/g) || [];
63
+ imports.forEach(imp => {
64
+ var _a;
65
+ const module = (_a = imp.match(/["']([^"']+)["']/)) === null || _a === void 0 ? void 0 : _a[1];
66
+ if (module)
67
+ terms.add(module);
68
+ });
69
+ return Array.from(terms);
70
+ }
71
+ extractCodeTerms(content) {
72
+ const terms = [];
73
+ // Function/class names
74
+ const functions = content.match(/(?:function|class|def|fn)\s+([a-zA-Z_][a-zA-Z0-9_]*)/g) || [];
75
+ functions.forEach(match => {
76
+ const name = match.split(/\s+/).pop();
77
+ if (name)
78
+ terms.push(name);
79
+ });
80
+ // Variable declarations
81
+ const variables = content.match(/(?:const|let|var|final)\s+([a-zA-Z_][a-zA-Z0-9_]*)/g) || [];
82
+ variables.forEach(match => {
83
+ const name = match.split(/\s+/).pop();
84
+ if (name)
85
+ terms.push(name);
86
+ });
87
+ return terms;
88
+ }
89
+ // Strategy 3: Search by extracted key terms
90
+ searchByKeyTerms(keyTerms, graphs, options) {
91
+ return __awaiter(this, void 0, void 0, function* () {
92
+ const allEntities = [];
93
+ const allRelations = [];
94
+ const entityNames = new Set();
95
+ for (const graph of graphs) {
96
+ for (const entity of graph.entities) {
97
+ let relevanceScore = 0;
98
+ // Check entity name against key terms
99
+ for (const term of keyTerms) {
100
+ if ((0, utils_1.jaroWinklerSimilarity)(entity.name, term) > 0.7) {
101
+ relevanceScore += 1;
102
+ }
103
+ if (entity.name.toLowerCase().includes(term.toLowerCase())) {
104
+ relevanceScore += 0.5;
105
+ }
106
+ }
107
+ // Check entity type
108
+ for (const term of keyTerms) {
109
+ if ((0, utils_1.jaroWinklerSimilarity)(entity.entityType, term) > 0.7) {
110
+ relevanceScore += 0.8;
111
+ }
112
+ }
113
+ // Check observations if enabled
114
+ if (options.includeObservations !== false) {
115
+ for (const obs of entity.observations || []) {
116
+ for (const term of keyTerms) {
117
+ if ((0, types_1.obsText)(obs).toLowerCase().includes(term.toLowerCase())) {
118
+ relevanceScore += 0.3;
119
+ }
120
+ }
121
+ }
122
+ }
123
+ if (relevanceScore > 0.5) {
124
+ allEntities.push(Object.assign(Object.assign({}, entity), { relevanceScore }));
125
+ entityNames.add(entity.name);
126
+ }
127
+ }
128
+ // Add relations between relevant entities
129
+ for (const relation of graph.relations) {
130
+ if (entityNames.has(relation.from) && entityNames.has(relation.to)) {
131
+ allRelations.push(relation);
132
+ }
133
+ }
134
+ }
135
+ // Sort by relevance and limit
136
+ allEntities.sort((a, b) => (b.relevanceScore || 0) - (a.relevanceScore || 0));
137
+ return {
138
+ entities: allEntities.slice(0, options.limit || 10),
139
+ relations: allRelations
140
+ };
141
+ });
142
+ }
143
+ // Strategy 4: Search by file relationship
144
+ searchByFileRelationship(fileName, graphs, options) {
145
+ return __awaiter(this, void 0, void 0, function* () {
146
+ const fileBaseName = path_1.default.basename(fileName, path_1.default.extname(fileName));
147
+ const fileDirName = path_1.default.dirname(fileName);
148
+ const relatedEntities = [];
149
+ const relatedRelations = [];
150
+ const entityNames = new Set();
151
+ for (const graph of graphs) {
152
+ for (const entity of graph.entities) {
153
+ let isRelated = false;
154
+ // Same file
155
+ if (entity.files[0] === fileName) {
156
+ isRelated = true;
157
+ }
158
+ // Same directory
159
+ if (entity.files[0] && path_1.default.dirname(entity.files[0]) === fileDirName) {
160
+ isRelated = true;
161
+ }
162
+ // Similar file name
163
+ if (entity.files[0] && (0, utils_1.jaroWinklerSimilarity)(path_1.default.basename(entity.files[0], path_1.default.extname(entity.files[0])), fileBaseName) > 0.6) {
164
+ isRelated = true;
165
+ }
166
+ if (isRelated) {
167
+ relatedEntities.push(entity);
168
+ entityNames.add(entity.name);
169
+ }
170
+ }
171
+ // Add relations between related entities
172
+ for (const relation of graph.relations) {
173
+ if (entityNames.has(relation.from) && entityNames.has(relation.to)) {
174
+ relatedRelations.push(relation);
175
+ }
176
+ }
177
+ }
178
+ return {
179
+ entities: relatedEntities.slice(0, options.limit || 5),
180
+ relations: relatedRelations
181
+ };
182
+ });
183
+ }
184
+ // Strategy 5: Embedding-based search (optional, with caching)
185
+ searchByEmbeddings(content, graphs, options) {
186
+ return __awaiter(this, void 0, void 0, function* () {
187
+ try {
188
+ // Create embedding for file content (truncate if too long)
189
+ const truncatedContent = content.slice(0, 2000); // Prevent context overflow
190
+ const contentEmbedding = yield this.embeddingService.embed(truncatedContent);
191
+ const scoredEntities = [];
192
+ for (const graph of graphs) {
193
+ for (const entity of graph.entities) {
194
+ // Create entity text for embedding
195
+ const entityText = `${entity.name} ${entity.entityType} ${(entity.observations || []).map(types_1.obsText).join(' ')}`;
196
+ const entityEmbedding = yield this.embeddingService.embed(entityText);
197
+ const similarity = (0, utils_1.cosineSimilarity)(contentEmbedding, entityEmbedding);
198
+ if (similarity > 0.3) { // Threshold for relevance
199
+ scoredEntities.push(Object.assign(Object.assign({}, entity), { similarityScore: similarity }));
200
+ }
201
+ }
202
+ }
203
+ // Sort by similarity and take top results
204
+ scoredEntities.sort((a, b) => b.similarityScore - a.similarityScore);
205
+ const topEntities = scoredEntities.slice(0, options.limit || 5);
206
+ const entityNames = new Set(topEntities.map(e => e.name));
207
+ const relatedRelations = [];
208
+ for (const graph of graphs) {
209
+ for (const relation of graph.relations) {
210
+ if (entityNames.has(relation.from) && entityNames.has(relation.to)) {
211
+ relatedRelations.push(relation);
212
+ }
213
+ }
214
+ }
215
+ return {
216
+ entities: topEntities,
217
+ relations: relatedRelations
218
+ };
219
+ }
220
+ catch (error) {
221
+ this.logger.warn(`Embedding search failed: ${error}`);
222
+ return { entities: [], relations: [] };
223
+ }
224
+ });
225
+ }
226
+ // Merge and deduplicate results from multiple strategies
227
+ mergeSearchResults(results, limit) {
228
+ const entityMap = new Map();
229
+ const relationSet = new Set();
230
+ const relations = [];
231
+ // Merge entities, preferring higher relevance scores
232
+ for (const result of results) {
233
+ for (const entity of result.entities) {
234
+ const existing = entityMap.get(entity.name);
235
+ if (!existing || (entity.relevanceScore || 0) > (existing.relevanceScore || 0)) {
236
+ entityMap.set(entity.name, entity);
237
+ }
238
+ }
239
+ // Merge relations
240
+ for (const relation of result.relations) {
241
+ const relationKey = `${relation.from}->${relation.to}:${JSON.stringify(relation.relationType)}`;
242
+ if (!relationSet.has(relationKey)) {
243
+ relationSet.add(relationKey);
244
+ relations.push(relation);
245
+ }
246
+ }
247
+ }
248
+ // Sort entities by relevance and limit
249
+ const sortedEntities = Array.from(entityMap.values())
250
+ .sort((a, b) => (b.relevanceScore || 0) - (a.relevanceScore || 0))
251
+ .slice(0, limit);
252
+ const finalEntityNames = new Set(sortedEntities.map(e => e.name));
253
+ const finalRelations = relations.filter(r => finalEntityNames.has(r.from) && finalEntityNames.has(r.to));
254
+ return {
255
+ entities: sortedEntities,
256
+ relations: finalRelations
257
+ };
258
+ }
259
+ }
260
+ exports.KnowledgeGraphSearch = KnowledgeGraphSearch;
261
+ //# sourceMappingURL=KnowledgeGraphSearch.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"KnowledgeGraphSearch.js","sourceRoot":"","sources":["../../../../src/core/knowledge/search/KnowledgeGraphSearch.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,gDAAwB;AACxB,iDAAgF;AAChF,0CAAsH;AAGtH,2CAA2C;AAC3C,MAAa,oBAAoB;IAE/B,YACU,gBAAoC,EACpC,MAAc;QADd,qBAAgB,GAAhB,gBAAgB,CAAoB;QACpC,WAAM,GAAN,MAAM,CAAQ;IAExB,CAAC;IAED,+DAA+D;IACzD,mBAAmB;6DACvB,WAAmB,EACnB,QAAgB,EAChB,MAAwB,EACxB,UAII,EAAE;YAEN,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,uDAAuD,QAAQ,EAAE,CAAC,CAAC;YAErF,6DAA6D;YAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YAE7D,wBAAwB;YACxB,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;gBAChC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC;gBAChD,IAAI,CAAC,wBAAwB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC;gBACxD,IAAI,CAAC,kBAAkB,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC;aACtD,CAAC,CAAC;YAEH,yBAAyB;YACzB,OAAO,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAC/D,CAAC;KAAA;IAED,mDAAmD;IAC3C,eAAe,CAAC,OAAe,EAAE,QAAgB;QACvD,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAEhC,2BAA2B;QAC3B,MAAM,YAAY,GAAG,cAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,cAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QACrE,KAAK,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QAExB,wCAAwC;QACxC,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;QACjD,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;QAE3C,2CAA2C;QAC3C,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,2CAA2C,CAAC,IAAI,EAAE,CAAC;QACrF,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,yBAAyB;QAEpF,uDAAuD;QACvD,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,sBAAsB,CAAC,IAAI,EAAE,CAAC;QAC3D,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YAClC,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;YAC3C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBAC9C,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACrB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,uBAAuB;QACvB,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,6CAA6C,CAAC,IAAI,EAAE,CAAC;QACnF,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;;YACpB,MAAM,MAAM,GAAG,MAAA,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,0CAAG,CAAC,CAAC,CAAC;YAClD,IAAI,MAAM;gBAAE,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3B,CAAC;IAEO,gBAAgB,CAAC,OAAe;QACtC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,uBAAuB;QACvB,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,uDAAuD,CAAC,IAAI,EAAE,CAAC;QAC/F,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACxB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC;YACtC,IAAI,IAAI;gBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;QAEH,wBAAwB;QACxB,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,qDAAqD,CAAC,IAAI,EAAE,CAAC;QAC7F,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACxB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC;YACtC,IAAI,IAAI;gBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;QAEH,OAAO,KAAK,CAAC;IACf,CAAC;IAED,4CAA4C;IAC9B,gBAAgB,CAC5B,QAAkB,EAClB,MAAwB,EACxB,OAAY;;YAEZ,MAAM,WAAW,GAA4C,EAAE,CAAC;YAChE,MAAM,YAAY,GAAe,EAAE,CAAC;YACpC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;YAEtC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;oBACpC,IAAI,cAAc,GAAG,CAAC,CAAC;oBAEvB,sCAAsC;oBACtC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;wBAC5B,IAAI,IAAA,6BAAqB,EAAC,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC;4BACnD,cAAc,IAAI,CAAC,CAAC;wBACtB,CAAC;wBACD,IAAI,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;4BAC3D,cAAc,IAAI,GAAG,CAAC;wBACxB,CAAC;oBACH,CAAC;oBAED,oBAAoB;oBACpB,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;wBAC5B,IAAI,IAAA,6BAAqB,EAAC,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC;4BACzD,cAAc,IAAI,GAAG,CAAC;wBACxB,CAAC;oBACH,CAAC;oBAED,gCAAgC;oBAChC,IAAI,OAAO,CAAC,mBAAmB,KAAK,KAAK,EAAE,CAAC;wBAC1C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,YAAY,IAAI,EAAE,EAAE,CAAC;4BAC5C,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;gCAC5B,IAAI,IAAA,eAAO,EAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;oCAC5D,cAAc,IAAI,GAAG,CAAC;gCACxB,CAAC;4BACH,CAAC;wBACH,CAAC;oBACH,CAAC;oBAED,IAAI,cAAc,GAAG,GAAG,EAAE,CAAC;wBACzB,WAAW,CAAC,IAAI,iCAAM,MAAM,KAAE,cAAc,IAAG,CAAC;wBAChD,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC/B,CAAC;gBACH,CAAC;gBAED,0CAA0C;gBAC1C,KAAK,MAAM,QAAQ,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;oBACvC,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;wBACnE,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBAC9B,CAAC;gBACH,CAAC;YACH,CAAC;YAED,8BAA8B;YAC9B,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC,CAAC,CAAC;YAE9E,OAAO;gBACL,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;gBACnD,SAAS,EAAE,YAAY;aACxB,CAAC;QACJ,CAAC;KAAA;IAED,0CAA0C;IAC5B,wBAAwB,CACpC,QAAgB,EAChB,MAAwB,EACxB,OAAY;;YAEZ,MAAM,YAAY,GAAG,cAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,cAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,WAAW,GAAG,cAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YAE3C,MAAM,eAAe,GAAa,EAAE,CAAC;YACrC,MAAM,gBAAgB,GAAe,EAAE,CAAC;YACxC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;YAEtC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;oBACpC,IAAI,SAAS,GAAG,KAAK,CAAC;oBAEtB,YAAY;oBACZ,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CAAC;wBACjC,SAAS,GAAG,IAAI,CAAC;oBACnB,CAAC;oBAED,iBAAiB;oBACjB,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,cAAI,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,WAAW,EAAE,CAAC;wBACrE,SAAS,GAAG,IAAI,CAAC;oBACnB,CAAC;oBAED,oBAAoB;oBACpB,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,IAAA,6BAAqB,EAC1C,cAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,cAAI,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAC7D,YAAY,CACb,GAAG,GAAG,EAAE,CAAC;wBACR,SAAS,GAAG,IAAI,CAAC;oBACnB,CAAC;oBAED,IAAI,SAAS,EAAE,CAAC;wBACd,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;wBAC7B,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC/B,CAAC;gBACH,CAAC;gBAED,yCAAyC;gBACzC,KAAK,MAAM,QAAQ,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;oBACvC,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;wBACnE,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBAClC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO;gBACL,QAAQ,EAAE,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC;gBACtD,SAAS,EAAE,gBAAgB;aAC5B,CAAC;QACJ,CAAC;KAAA;IAED,8DAA8D;IAChD,kBAAkB,CAC9B,OAAe,EACf,MAAwB,EACxB,OAAY;;YAEZ,IAAI,CAAC;gBACH,2DAA2D;gBAC3D,MAAM,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,2BAA2B;gBAC5E,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;gBAE7E,MAAM,cAAc,GAAgD,EAAE,CAAC;gBAEvE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBAC3B,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;wBACpC,mCAAmC;wBACnC,MAAM,UAAU,GAAG,GAAG,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,UAAU,IAAI,CAAC,MAAM,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,eAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC/G,MAAM,eAAe,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;wBAEtE,MAAM,UAAU,GAAG,IAAA,wBAAgB,EAAC,gBAAgB,EAAE,eAAe,CAAC,CAAC;wBAEvE,IAAI,UAAU,GAAG,GAAG,EAAE,CAAC,CAAC,0BAA0B;4BAChD,cAAc,CAAC,IAAI,iCAAM,MAAM,KAAE,eAAe,EAAE,UAAU,IAAG,CAAC;wBAClE,CAAC;oBACH,CAAC;gBACH,CAAC;gBAED,0CAA0C;gBAC1C,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC;gBACrE,MAAM,WAAW,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;gBAEhE,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;gBAC1D,MAAM,gBAAgB,GAAe,EAAE,CAAC;gBAExC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBAC3B,KAAK,MAAM,QAAQ,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;wBACvC,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;4BACnE,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;wBAClC,CAAC;oBACH,CAAC;gBACH,CAAC;gBAED,OAAO;oBACL,QAAQ,EAAE,WAAW;oBACrB,SAAS,EAAE,gBAAgB;iBAC5B,CAAC;YAEJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,4BAA4B,KAAK,EAAE,CAAC,CAAC;gBACtD,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;YACzC,CAAC;QACH,CAAC;KAAA;IAED,yDAAyD;IACjD,kBAAkB,CAAC,OAAyB,EAAE,KAAa;QACjE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC5C,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;QACtC,MAAM,SAAS,GAAe,EAAE,CAAC;QAEjC,qDAAqD;QACrD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACrC,MAAM,QAAQ,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBAC5C,IAAI,CAAC,QAAQ,IAAI,CAAE,MAAc,CAAC,cAAc,IAAI,CAAC,CAAC,GAAG,CAAE,QAAgB,CAAC,cAAc,IAAI,CAAC,CAAC,EAAE,CAAC;oBACjG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;gBACrC,CAAC;YACH,CAAC;YAED,kBAAkB;YAClB,KAAK,MAAM,QAAQ,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;gBACxC,MAAM,WAAW,GAAG,GAAG,QAAQ,CAAC,IAAI,KAAK,QAAQ,CAAC,EAAE,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;gBAChG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;oBAClC,WAAW,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;oBAC7B,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC3B,CAAC;YACH,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,MAAM,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;aAClD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAE,CAAS,CAAC,cAAc,IAAI,CAAC,CAAC,GAAG,CAAE,CAAS,CAAC,cAAc,IAAI,CAAC,CAAC,CAAC;aACnF,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;QAEnB,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAClE,MAAM,cAAc,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAC1C,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAC3D,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE,cAAc;YACxB,SAAS,EAAE,cAAc;SAC1B,CAAC;IACJ,CAAC;CACF;AA/SD,oDA+SC"}
@@ -0,0 +1,162 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DEFAULT_MIXED_DOMAIN_THRESHOLD = exports.DEFAULT_LOW_CONFIDENCE_THRESHOLD = exports.RELATION_TYPE_ESCAPE = exports.ENTITY_TYPE_ESCAPE = exports.BASE_RELATION_TYPES = exports.BASE_ENTITY_TYPES = void 0;
4
+ exports.configureDomainGate = configureDomainGate;
5
+ exports.domainGateThresholds = domainGateThresholds;
6
+ exports.resetDomainGate = resetDomainGate;
7
+ exports.activeDomainClasses = activeDomainClasses;
8
+ exports.domainVocabulary = domainVocabulary;
9
+ exports.allowedEntityTypes = allowedEntityTypes;
10
+ exports.allowedRelationTypes = allowedRelationTypes;
11
+ const NER_DOMAIN_EXAMPLES_1 = require("../processor/classifier/NER_DOMAIN_EXAMPLES");
12
+ /**
13
+ * Single source of truth for the v5 closed vocabularies (KG-05).
14
+ *
15
+ * The base sets mirror the `{{else}}` lists in `templates/v5/system.hbs`
16
+ * (a test in `vocabulary.test.ts` asserts they stay equal), and the same
17
+ * `allowed*` helpers feed *both* the Zod enum (`KnowledgeGraphBuilder`) and the
18
+ * prompt hints (`PromptManager.buildDomainHints`) — so the enum, the hints, and
19
+ * the gold examples can never drift into the three-way disagreement KG-05
20
+ * describes (entity enum scoped to a domain, relation enum not).
21
+ */
22
+ /**
23
+ * Domain-agnostic entity types, always offered alongside any detected domain's
24
+ * vocabulary, plus an `other` escape hatch so the model is never forced to
25
+ * mislabel when nothing fits.
26
+ */
27
+ exports.BASE_ENTITY_TYPES = [
28
+ "person", "organization", "location", "role", "event", "time", "metric",
29
+ "concept", "term", "document", "product", "technology", "standard",
30
+ "class", "interface", "function", "module", "service", "dependency",
31
+ "data_structure", "config", "file",
32
+ ];
33
+ /** Base relation predicates, always offered alongside any detected domain. */
34
+ exports.BASE_RELATION_TYPES = [
35
+ "uses", "depends_on", "calls", "implements", "extends", "contains", "part_of",
36
+ "produces", "consumes", "configures", "references", "defines", "targets",
37
+ "located_in", "works_at", "member_of", "precedes", "causes", "has_attribute",
38
+ "related_to",
39
+ ];
40
+ /** Escape hatches: keep the model from being forced to invent a one-off label. */
41
+ exports.ENTITY_TYPE_ESCAPE = "other";
42
+ exports.RELATION_TYPE_ESCAPE = "related_to";
43
+ /**
44
+ * Default minimum top-1 confidence to treat a classification as a domain signal.
45
+ *
46
+ * Calibrated for the softmax-probability confidences both classifiers now emit
47
+ * (S2): ~3× the 1/12 uniform baseline (≈0.083). A clearly-dominant-but-weak class
48
+ * (e.g. financial prose with no `$` smoking-gun lands at ~0.31, next at ~0.07)
49
+ * still routes; a flat/uniform distribution (garbage or empty content, p1 ≲ 0.15)
50
+ * abstains. The pre-S2 value (0.3) was tuned for the old *independent* tanh scores.
51
+ */
52
+ exports.DEFAULT_LOW_CONFIDENCE_THRESHOLD = 0.25;
53
+ /**
54
+ * Default max top1−top2 probability gap that still counts as a tie → activate both.
55
+ * Conservative on purpose: combined with the floor on the *second* class, multi
56
+ * only fires when two domains genuinely co-dominate (each ≥ floor and within this
57
+ * margin), which prevents the "doubling the enum" over-activation S2 flagged. The
58
+ * pre-S2 value (0.2) was tuned for tanh scores.
59
+ */
60
+ exports.DEFAULT_MIXED_DOMAIN_THRESHOLD = 0.15;
61
+ /**
62
+ * Run-global domain-gate thresholds (A1). A module singleton — like
63
+ * `shared/shutdown.ts` — set once per run from
64
+ * `classifier.{lowConfidenceThreshold,mixedDomainThreshold}` by `ContainerFactory`.
65
+ *
66
+ * Keeping it module-global (rather than threading config through every pure gate
67
+ * function) guarantees the Zod enum path, the prompt hints, the cascade, and the
68
+ * eval harness all gate on **identical** thresholds — the KG-05 single-source
69
+ * invariant that would otherwise be one missed caller away from divergence.
70
+ */
71
+ let lowConfidenceThreshold = exports.DEFAULT_LOW_CONFIDENCE_THRESHOLD;
72
+ let mixedDomainThreshold = exports.DEFAULT_MIXED_DOMAIN_THRESHOLD;
73
+ /** Override the gate thresholds for this run; `undefined` values keep the current. */
74
+ function configureDomainGate(opts) {
75
+ if (typeof opts.lowConfidence === "number")
76
+ lowConfidenceThreshold = opts.lowConfidence;
77
+ if (typeof opts.mixedDomain === "number")
78
+ mixedDomainThreshold = opts.mixedDomain;
79
+ }
80
+ /** The active gate thresholds (read by `activeDomainClasses` and `getTopClass`). */
81
+ function domainGateThresholds() {
82
+ return { lowConfidence: lowConfidenceThreshold, mixedDomain: mixedDomainThreshold };
83
+ }
84
+ /** Restore the default thresholds — for tests that exercise a custom gate config. */
85
+ function resetDomainGate() {
86
+ lowConfidenceThreshold = exports.DEFAULT_LOW_CONFIDENCE_THRESHOLD;
87
+ mixedDomainThreshold = exports.DEFAULT_MIXED_DOMAIN_THRESHOLD;
88
+ }
89
+ /**
90
+ * The deterministic confidence cascade (S2/S3): the domain class(es) a
91
+ * classification activates from a calibrated softmax distribution —
92
+ *
93
+ * - **abstain** (`[]`) when the top class is below the low-confidence floor;
94
+ * - **single** (`[c1]`) when one class clears the floor and dominates;
95
+ * - **multi** (`[c1, c2]`) when a close second also clears the floor, within
96
+ * the mixed-domain margin.
97
+ *
98
+ * Thresholds come from the run-global {@link domainGateThresholds}. This is the
99
+ * *one* selection both the Zod enum and the prompt hints use, so they can't
100
+ * disagree about which domain is active. (Phase B inserts an LLM tie-break into
101
+ * the "close" branch before falling through to multi.)
102
+ */
103
+ function activeDomainClasses(contentClasses) {
104
+ if (!contentClasses || contentClasses.length === 0)
105
+ return [];
106
+ const sorted = [...contentClasses].sort((a, b) => b.confidence - a.confidence);
107
+ const top = sorted[0];
108
+ if (top.confidence < lowConfidenceThreshold)
109
+ return [];
110
+ const active = [top.class];
111
+ if (sorted.length > 1 &&
112
+ sorted[1].confidence >= lowConfidenceThreshold &&
113
+ top.confidence - sorted[1].confidence <= mixedDomainThreshold) {
114
+ active.push(sorted[1].class);
115
+ }
116
+ return active;
117
+ }
118
+ /**
119
+ * The union of primary entity/relation types across the active domain class(es),
120
+ * in active-class order. Empty when no class clears the threshold.
121
+ */
122
+ function domainVocabulary(contentClasses) {
123
+ const entityTypes = [];
124
+ const relationTypes = [];
125
+ for (const cls of activeDomainClasses(contentClasses)) {
126
+ const ner = NER_DOMAIN_EXAMPLES_1.NER_DOMAIN_EXAMPLES[cls];
127
+ if (!ner)
128
+ continue;
129
+ entityTypes.push(...ner.primaryEntityTypes);
130
+ relationTypes.push(...ner.primaryRelationTypes);
131
+ }
132
+ return { entityTypes, relationTypes };
133
+ }
134
+ /**
135
+ * Closed entity-type set for the Zod enum: active-domain primary types ∪ corpus
136
+ * glossary types ∪ base set ∪ `other`. Always non-empty (the base set is the
137
+ * floor), so `entityType` is an enforced enum even with no class and no glossary.
138
+ */
139
+ function allowedEntityTypes(contentClasses, glossaryTypes = []) {
140
+ return Array.from(new Set([
141
+ ...domainVocabulary(contentClasses).entityTypes,
142
+ ...glossaryTypes,
143
+ ...exports.BASE_ENTITY_TYPES,
144
+ exports.ENTITY_TYPE_ESCAPE,
145
+ ]));
146
+ }
147
+ /**
148
+ * Closed relation-predicate set for the Zod enum: active-domain primary
149
+ * predicates ∪ corpus glossary predicates ∪ base set ∪ `related_to`. Unlike the
150
+ * pre-Phase-2 resolver this DOES include the domain predicates, closing the
151
+ * KG-05 gap where the relation enum excluded exactly the predicates the hints
152
+ * and gold examples taught.
153
+ */
154
+ function allowedRelationTypes(contentClasses, glossaryTypes = []) {
155
+ return Array.from(new Set([
156
+ ...domainVocabulary(contentClasses).relationTypes,
157
+ ...glossaryTypes,
158
+ ...exports.BASE_RELATION_TYPES,
159
+ exports.RELATION_TYPE_ESCAPE,
160
+ ]));
161
+ }
162
+ //# sourceMappingURL=vocabulary.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vocabulary.js","sourceRoot":"","sources":["../../../src/core/knowledge/vocabulary.ts"],"names":[],"mappings":";;;AAwEA,kDAMC;AAGD,oDAKC;AAGD,0CAGC;AAgBD,kDAgBC;AAMD,4CAYC;AAOD,gDAYC;AASD,oDAYC;AArLD,qFAAkF;AAElF;;;;;;;;;GASG;AAEH;;;;GAIG;AACU,QAAA,iBAAiB,GAAG;IAC/B,QAAQ,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ;IACvE,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,UAAU;IAClE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY;IACnE,gBAAgB,EAAE,QAAQ,EAAE,MAAM;CACnC,CAAC;AAEF,8EAA8E;AACjE,QAAA,mBAAmB,GAAG;IACjC,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE,UAAU,EAAE,SAAS;IAC7E,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,SAAS,EAAE,SAAS;IACxE,YAAY,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,eAAe;IAC5E,YAAY;CACb,CAAC;AAEF,kFAAkF;AACrE,QAAA,kBAAkB,GAAG,OAAO,CAAC;AAC7B,QAAA,oBAAoB,GAAG,YAAY,CAAC;AAEjD;;;;;;;;GAQG;AACU,QAAA,gCAAgC,GAAG,IAAI,CAAC;AAErD;;;;;;GAMG;AACU,QAAA,8BAA8B,GAAG,IAAI,CAAC;AAEnD;;;;;;;;;GASG;AACH,IAAI,sBAAsB,GAAG,wCAAgC,CAAC;AAC9D,IAAI,oBAAoB,GAAG,sCAA8B,CAAC;AAE1D,sFAAsF;AACtF,SAAgB,mBAAmB,CAAC,IAGnC;IACC,IAAI,OAAO,IAAI,CAAC,aAAa,KAAK,QAAQ;QAAE,sBAAsB,GAAG,IAAI,CAAC,aAAa,CAAC;IACxF,IAAI,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ;QAAE,oBAAoB,GAAG,IAAI,CAAC,WAAW,CAAC;AACpF,CAAC;AAED,oFAAoF;AACpF,SAAgB,oBAAoB;IAIlC,OAAO,EAAE,aAAa,EAAE,sBAAsB,EAAE,WAAW,EAAE,oBAAoB,EAAE,CAAC;AACtF,CAAC;AAED,qFAAqF;AACrF,SAAgB,eAAe;IAC7B,sBAAsB,GAAG,wCAAgC,CAAC;IAC1D,oBAAoB,GAAG,sCAA8B,CAAC;AACxD,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,SAAgB,mBAAmB,CACjC,cAAuC;IAEvC,IAAI,CAAC,cAAc,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAC9D,MAAM,MAAM,GAAG,CAAC,GAAG,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;IAC/E,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACtB,IAAI,GAAG,CAAC,UAAU,GAAG,sBAAsB;QAAE,OAAO,EAAE,CAAC;IACvD,MAAM,MAAM,GAAmB,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAC3C,IACE,MAAM,CAAC,MAAM,GAAG,CAAC;QACjB,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,IAAI,sBAAsB;QAC9C,GAAG,CAAC,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,IAAI,oBAAoB,EAC7D,CAAC;QACD,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAgB,gBAAgB,CAC9B,cAAuC;IAEvC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,MAAM,aAAa,GAAa,EAAE,CAAC;IACnC,KAAK,MAAM,GAAG,IAAI,mBAAmB,CAAC,cAAc,CAAC,EAAE,CAAC;QACtD,MAAM,GAAG,GAAG,yCAAmB,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,CAAC,GAAG;YAAE,SAAS;QACnB,WAAW,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,kBAAkB,CAAC,CAAC;QAC5C,aAAa,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,oBAAoB,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC;AACxC,CAAC;AAED;;;;GAIG;AACH,SAAgB,kBAAkB,CAChC,cAAuC,EACvC,gBAA0B,EAAE;IAE5B,OAAO,KAAK,CAAC,IAAI,CACf,IAAI,GAAG,CAAC;QACN,GAAG,gBAAgB,CAAC,cAAc,CAAC,CAAC,WAAW;QAC/C,GAAG,aAAa;QAChB,GAAG,yBAAiB;QACpB,0BAAkB;KACnB,CAAC,CACH,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,oBAAoB,CAClC,cAAuC,EACvC,gBAA0B,EAAE;IAE5B,OAAO,KAAK,CAAC,IAAI,CACf,IAAI,GAAG,CAAC;QACN,GAAG,gBAAgB,CAAC,cAAc,CAAC,CAAC,aAAa;QACjD,GAAG,aAAa;QAChB,GAAG,2BAAmB;QACtB,4BAAoB;KACrB,CAAC,CACH,CAAC;AACJ,CAAC"}
@@ -0,0 +1,113 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.EmbeddingService = void 0;
13
+ const ollama_1 = require("ollama");
14
+ const embeddingUtils_1 = require("./embeddingUtils");
15
+ /**
16
+ * Service for generating text embeddings
17
+ */
18
+ class EmbeddingService {
19
+ constructor(options, logger) {
20
+ this.options = Object.assign({ maxInputChars: embeddingUtils_1.DEFAULT_MAX_EMBED_CHARS }, options);
21
+ this.cache = new Map();
22
+ this.logger = logger;
23
+ this.ollama = new ollama_1.Ollama({ host: options.host });
24
+ }
25
+ /** Trim input to the configured char budget so long observations/entities
26
+ * don't exceed the embedding model's context window. */
27
+ truncate(text) {
28
+ const max = this.options.maxInputChars;
29
+ if (max && max > 0 && text.length > max) {
30
+ this.logger.debug(`Truncating embedding input from ${text.length} to ${max} chars`);
31
+ return text.slice(0, max);
32
+ }
33
+ return text;
34
+ }
35
+ /**
36
+ * Generate embeddings for a single text
37
+ */
38
+ embed(text) {
39
+ return __awaiter(this, void 0, void 0, function* () {
40
+ // Check cache first
41
+ const cached = this.cache.get(text);
42
+ if (cached) {
43
+ this.logger.debug(`Using cached embedding for text: ${text.substring(0, 50)}...`);
44
+ return cached;
45
+ }
46
+ this.logger.debug(`Generating embedding for text: ${text.substring(0, 50)}...`);
47
+ const embedding = yield this.embedWithFallback(this.truncate(text));
48
+ this.cache.set(text, embedding);
49
+ return embedding;
50
+ });
51
+ }
52
+ /**
53
+ * Embed `input`, halving it and retrying if the model rejects it for being
54
+ * too long. The char cap can't know any given model's exact token limit, so
55
+ * this self-corrects for dense content (JSON/code) on small-context models.
56
+ */
57
+ embedWithFallback(input) {
58
+ return __awaiter(this, void 0, void 0, function* () {
59
+ let current = input;
60
+ while (true) {
61
+ try {
62
+ const response = yield this.ollama.embeddings({
63
+ model: this.options.model,
64
+ prompt: current,
65
+ });
66
+ return response.embedding;
67
+ }
68
+ catch (error) {
69
+ if ((0, embeddingUtils_1.isContextLengthError)(error) && current.length > embeddingUtils_1.MIN_EMBED_CHARS) {
70
+ const next = Math.max(embeddingUtils_1.MIN_EMBED_CHARS, Math.floor(current.length / 2));
71
+ this.logger.warn(`Embedding input too long (${current.length} chars); retrying with ${next}`);
72
+ current = current.slice(0, next);
73
+ continue;
74
+ }
75
+ this.logger.error(`Failed to generate embedding: ${error}`);
76
+ throw new Error(`Failed to generate embedding: ${error}`);
77
+ }
78
+ }
79
+ });
80
+ }
81
+ /**
82
+ * Generate embeddings for multiple texts
83
+ */
84
+ embedBatch(texts) {
85
+ return __awaiter(this, void 0, void 0, function* () {
86
+ this.logger.debug(`Generating embeddings for ${texts.length} texts`);
87
+ const embeddings = [];
88
+ // Process in batches to avoid overwhelming the model
89
+ const batchSize = 10;
90
+ for (let i = 0; i < texts.length; i += batchSize) {
91
+ const batch = texts.slice(i, i + batchSize);
92
+ const batchEmbeddings = yield Promise.all(batch.map(text => this.embed(text)));
93
+ embeddings.push(...batchEmbeddings);
94
+ }
95
+ return embeddings;
96
+ });
97
+ }
98
+ /**
99
+ * Clear the embedding cache
100
+ */
101
+ clearCache() {
102
+ this.cache.clear();
103
+ this.logger.debug('Embedding cache cleared');
104
+ }
105
+ /**
106
+ * Get cache size
107
+ */
108
+ getCacheSize() {
109
+ return this.cache.size;
110
+ }
111
+ }
112
+ exports.EmbeddingService = EmbeddingService;
113
+ //# sourceMappingURL=EmbeddingService.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EmbeddingService.js","sourceRoot":"","sources":["../../../src/core/llm/EmbeddingService.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,mCAAgC;AAGhC,qDAAkG;AAUlG;;GAEG;AACH,MAAa,gBAAgB;IAM3B,YAAY,OAAyB,EAAE,MAAc;QACnD,IAAI,CAAC,OAAO,mBAAK,aAAa,EAAE,wCAAuB,IAAK,OAAO,CAAE,CAAC;QACtE,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,IAAI,eAAM,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACnD,CAAC;IAED;4DACwD;IAChD,QAAQ,CAAC,IAAY;QAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC;QACvC,IAAI,GAAG,IAAI,GAAG,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACxC,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,mCAAmC,IAAI,CAAC,MAAM,OAAO,GAAG,QAAQ,CACjE,CAAC;YACF,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC5B,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACG,KAAK,CAAC,IAAY;;YACtB,oBAAoB;YACpB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACpC,IAAI,MAAM,EAAE,CAAC;gBACX,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,oCAAoC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;gBAClF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,kCAAkC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;YAEhF,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;YACpE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAChC,OAAO,SAAS,CAAC;QACnB,CAAC;KAAA;IAED;;;;OAIG;IACW,iBAAiB,CAAC,KAAa;;YAC3C,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,OAAO,IAAI,EAAE,CAAC;gBACZ,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;wBAC5C,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK;wBACzB,MAAM,EAAE,OAAO;qBAChB,CAAC,CAAC;oBACH,OAAO,QAAQ,CAAC,SAAS,CAAC;gBAC5B,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,IAAI,IAAA,qCAAoB,EAAC,KAAK,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,gCAAe,EAAE,CAAC;wBACpE,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,gCAAe,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;wBACvE,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,6BAA6B,OAAO,CAAC,MAAM,0BAA0B,IAAI,EAAE,CAC5E,CAAC;wBACF,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;wBACjC,SAAS;oBACX,CAAC;oBACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,iCAAiC,KAAK,EAAE,CAAC,CAAC;oBAC5D,MAAM,IAAI,KAAK,CAAC,iCAAiC,KAAK,EAAE,CAAC,CAAC;gBAC5D,CAAC;YACH,CAAC;QACH,CAAC;KAAA;IAED;;OAEG;IACG,UAAU,CAAC,KAAe;;YAC9B,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,6BAA6B,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;YAErE,MAAM,UAAU,GAAe,EAAE,CAAC;YAElC,qDAAqD;YACrD,MAAM,SAAS,GAAG,EAAE,CAAC;YACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBACjD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;gBAC5C,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,GAAG,CACvC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CACpC,CAAC;gBACF,UAAU,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;YACtC,CAAC;YAED,OAAO,UAAU,CAAC;QACpB,CAAC;KAAA;IAED;;OAEG;IACH,UAAU;QACR,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACnB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;IACzB,CAAC;CACF;AA5GD,4CA4GC"}