@wanshi-kg/wanshi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +458 -0
  3. package/dist/__tests__/helpers.js +27 -0
  4. package/dist/__tests__/helpers.js.map +1 -0
  5. package/dist/cli/commands/export.command.js +99 -0
  6. package/dist/cli/commands/export.command.js.map +1 -0
  7. package/dist/cli/commands/index.js +22 -0
  8. package/dist/cli/commands/index.js.map +1 -0
  9. package/dist/cli/commands/inspectMerges.command.js +84 -0
  10. package/dist/cli/commands/inspectMerges.command.js.map +1 -0
  11. package/dist/cli/commands/metrics.command.js +196 -0
  12. package/dist/cli/commands/metrics.command.js.map +1 -0
  13. package/dist/cli/commands/process.command.js +82 -0
  14. package/dist/cli/commands/process.command.js.map +1 -0
  15. package/dist/cli/commands/watch.command.js +91 -0
  16. package/dist/cli/commands/watch.command.js.map +1 -0
  17. package/dist/cli/index.js +269 -0
  18. package/dist/cli/index.js.map +1 -0
  19. package/dist/cli/optionsToConfig.js +160 -0
  20. package/dist/cli/optionsToConfig.js.map +1 -0
  21. package/dist/config/index.js +59 -0
  22. package/dist/config/index.js.map +1 -0
  23. package/dist/config/legacyHints.js +113 -0
  24. package/dist/config/legacyHints.js.map +1 -0
  25. package/dist/config/schema.js +803 -0
  26. package/dist/config/schema.js.map +1 -0
  27. package/dist/config/ui.js +221 -0
  28. package/dist/config/ui.js.map +1 -0
  29. package/dist/core/DirectoryProcessor.js +725 -0
  30. package/dist/core/DirectoryProcessor.js.map +1 -0
  31. package/dist/core/adapters/IStructuredAdapter.js +3 -0
  32. package/dist/core/adapters/IStructuredAdapter.js.map +1 -0
  33. package/dist/core/adapters/SqliteAdapter.js +267 -0
  34. package/dist/core/adapters/SqliteAdapter.js.map +1 -0
  35. package/dist/core/adapters/StructuredAdapterRegistry.js +31 -0
  36. package/dist/core/adapters/StructuredAdapterRegistry.js.map +1 -0
  37. package/dist/core/adapters/index.js +20 -0
  38. package/dist/core/adapters/index.js.map +1 -0
  39. package/dist/core/checkpoint/CheckpointService.js +188 -0
  40. package/dist/core/checkpoint/CheckpointService.js.map +1 -0
  41. package/dist/core/checkpoint/index.js +18 -0
  42. package/dist/core/checkpoint/index.js.map +1 -0
  43. package/dist/core/corpus/CorpusAnalyzer.js +266 -0
  44. package/dist/core/corpus/CorpusAnalyzer.js.map +1 -0
  45. package/dist/core/corpus/CorpusProfileStore.js +92 -0
  46. package/dist/core/corpus/CorpusProfileStore.js.map +1 -0
  47. package/dist/core/corpus/index.js +21 -0
  48. package/dist/core/corpus/index.js.map +1 -0
  49. package/dist/core/corpus/normalizeGlossary.js +60 -0
  50. package/dist/core/corpus/normalizeGlossary.js.map +1 -0
  51. package/dist/core/corpus/relPath.js +52 -0
  52. package/dist/core/corpus/relPath.js.map +1 -0
  53. package/dist/core/corpus/termFrequency.js +86 -0
  54. package/dist/core/corpus/termFrequency.js.map +1 -0
  55. package/dist/core/cost/CostMeter.js +235 -0
  56. package/dist/core/cost/CostMeter.js.map +1 -0
  57. package/dist/core/cost/index.js +19 -0
  58. package/dist/core/cost/index.js.map +1 -0
  59. package/dist/core/cost/prices.js +38 -0
  60. package/dist/core/cost/prices.js.map +1 -0
  61. package/dist/core/cv/ObjectDetectionService.js +119 -0
  62. package/dist/core/cv/ObjectDetectionService.js.map +1 -0
  63. package/dist/core/di/ContainerFactory.js +670 -0
  64. package/dist/core/di/ContainerFactory.js.map +1 -0
  65. package/dist/core/di/DIContainer.js +103 -0
  66. package/dist/core/di/DIContainer.js.map +1 -0
  67. package/dist/core/di/index.js +19 -0
  68. package/dist/core/di/index.js.map +1 -0
  69. package/dist/core/errors/CustomErrors.js +342 -0
  70. package/dist/core/errors/CustomErrors.js.map +1 -0
  71. package/dist/core/errors/index.js +18 -0
  72. package/dist/core/errors/index.js.map +1 -0
  73. package/dist/core/export/KnowledgeGraphExportService.js +56 -0
  74. package/dist/core/export/KnowledgeGraphExportService.js.map +1 -0
  75. package/dist/core/export/index.js +19 -0
  76. package/dist/core/export/index.js.map +1 -0
  77. package/dist/core/export/strategies/GraphitiExportStrategy.js +115 -0
  78. package/dist/core/export/strategies/GraphitiExportStrategy.js.map +1 -0
  79. package/dist/core/export/strategies/GraphvizDotExportStrategy.js +331 -0
  80. package/dist/core/export/strategies/GraphvizDotExportStrategy.js.map +1 -0
  81. package/dist/core/export/strategies/IExportStrategy.js +3 -0
  82. package/dist/core/export/strategies/IExportStrategy.js.map +1 -0
  83. package/dist/core/export/strategies/JsonExportStrategy.js +19 -0
  84. package/dist/core/export/strategies/JsonExportStrategy.js.map +1 -0
  85. package/dist/core/export/strategies/JsonlExportStrategy.js +69 -0
  86. package/dist/core/export/strategies/JsonlExportStrategy.js.map +1 -0
  87. package/dist/core/export/strategies/KblamExportStrategy.js +36 -0
  88. package/dist/core/export/strategies/KblamExportStrategy.js.map +1 -0
  89. package/dist/core/export/strategies/LoraExportStrategy.js +46 -0
  90. package/dist/core/export/strategies/LoraExportStrategy.js.map +1 -0
  91. package/dist/core/export/strategies/McpExportStrategy.js +67 -0
  92. package/dist/core/export/strategies/McpExportStrategy.js.map +1 -0
  93. package/dist/core/export/strategies/index.js +25 -0
  94. package/dist/core/export/strategies/index.js.map +1 -0
  95. package/dist/core/export/strategies/kbTriples.js +60 -0
  96. package/dist/core/export/strategies/kbTriples.js.map +1 -0
  97. package/dist/core/index.js +22 -0
  98. package/dist/core/index.js.map +1 -0
  99. package/dist/core/knowledge/KnowledgeGraphBuilder.js +627 -0
  100. package/dist/core/knowledge/KnowledgeGraphBuilder.js.map +1 -0
  101. package/dist/core/knowledge/MergeRecord.js +3 -0
  102. package/dist/core/knowledge/MergeRecord.js.map +1 -0
  103. package/dist/core/knowledge/canon/Canonicalizer.js +414 -0
  104. package/dist/core/knowledge/canon/Canonicalizer.js.map +1 -0
  105. package/dist/core/knowledge/canon/index.js +18 -0
  106. package/dist/core/knowledge/canon/index.js.map +1 -0
  107. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js +92 -0
  108. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js.map +1 -0
  109. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js +52 -0
  110. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js.map +1 -0
  111. package/dist/core/knowledge/contradiction/index.js +19 -0
  112. package/dist/core/knowledge/contradiction/index.js.map +1 -0
  113. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js +33 -0
  114. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js.map +1 -0
  115. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js +82 -0
  116. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js.map +1 -0
  117. package/dist/core/knowledge/grounding/index.js +20 -0
  118. package/dist/core/knowledge/grounding/index.js.map +1 -0
  119. package/dist/core/knowledge/grounding/verbalize.js +38 -0
  120. package/dist/core/knowledge/grounding/verbalize.js.map +1 -0
  121. package/dist/core/knowledge/images/imageMetaGraph.js +136 -0
  122. package/dist/core/knowledge/images/imageMetaGraph.js.map +1 -0
  123. package/dist/core/knowledge/index.js +20 -0
  124. package/dist/core/knowledge/index.js.map +1 -0
  125. package/dist/core/knowledge/merging/KnowledgeMerger.js +624 -0
  126. package/dist/core/knowledge/merging/KnowledgeMerger.js.map +1 -0
  127. package/dist/core/knowledge/references/ReferenceResolver.js +184 -0
  128. package/dist/core/knowledge/references/ReferenceResolver.js.map +1 -0
  129. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js +401 -0
  130. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js.map +1 -0
  131. package/dist/core/knowledge/references/citations/CitationResolver.js +95 -0
  132. package/dist/core/knowledge/references/citations/CitationResolver.js.map +1 -0
  133. package/dist/core/knowledge/references/citations/GrobidClient.js +143 -0
  134. package/dist/core/knowledge/references/citations/GrobidClient.js.map +1 -0
  135. package/dist/core/knowledge/references/citations/TitleIdResolver.js +101 -0
  136. package/dist/core/knowledge/references/citations/TitleIdResolver.js.map +1 -0
  137. package/dist/core/knowledge/references/web/FetchCacheService.js +114 -0
  138. package/dist/core/knowledge/references/web/FetchCacheService.js.map +1 -0
  139. package/dist/core/knowledge/references/web/GatedFetcher.js +228 -0
  140. package/dist/core/knowledge/references/web/GatedFetcher.js.map +1 -0
  141. package/dist/core/knowledge/references/web/WebReferenceProcessor.js +164 -0
  142. package/dist/core/knowledge/references/web/WebReferenceProcessor.js.map +1 -0
  143. package/dist/core/knowledge/search/KnowledgeGraphSearch.js +261 -0
  144. package/dist/core/knowledge/search/KnowledgeGraphSearch.js.map +1 -0
  145. package/dist/core/knowledge/vocabulary.js +162 -0
  146. package/dist/core/knowledge/vocabulary.js.map +1 -0
  147. package/dist/core/llm/EmbeddingService.js +113 -0
  148. package/dist/core/llm/EmbeddingService.js.map +1 -0
  149. package/dist/core/llm/OllamaService.js +146 -0
  150. package/dist/core/llm/OllamaService.js.map +1 -0
  151. package/dist/core/llm/OpenAICompatibleService.js +190 -0
  152. package/dist/core/llm/OpenAICompatibleService.js.map +1 -0
  153. package/dist/core/llm/OpenAIEmbeddingService.js +129 -0
  154. package/dist/core/llm/OpenAIEmbeddingService.js.map +1 -0
  155. package/dist/core/llm/embeddingUtils.js +25 -0
  156. package/dist/core/llm/embeddingUtils.js.map +1 -0
  157. package/dist/core/llm/index.js +23 -0
  158. package/dist/core/llm/index.js.map +1 -0
  159. package/dist/core/llm/prompts/PromptManager.js +388 -0
  160. package/dist/core/llm/prompts/PromptManager.js.map +1 -0
  161. package/dist/core/llm/prompts/PromptTemplateEngine.js +257 -0
  162. package/dist/core/llm/prompts/PromptTemplateEngine.js.map +1 -0
  163. package/dist/core/llm/prompts/templates/partials/examples/EXAMPLE_STYLE_GUIDE.md +84 -0
  164. package/dist/core/llm/prompts/templates/partials/examples/article.md +187 -0
  165. package/dist/core/llm/prompts/templates/partials/examples/code.md +229 -0
  166. package/dist/core/llm/prompts/templates/partials/examples/communication.md +205 -0
  167. package/dist/core/llm/prompts/templates/partials/examples/documentation.md +262 -0
  168. package/dist/core/llm/prompts/templates/partials/examples/financial.md +157 -0
  169. package/dist/core/llm/prompts/templates/partials/examples/legal.md +153 -0
  170. package/dist/core/llm/prompts/templates/partials/examples/logs.md +127 -0
  171. package/dist/core/llm/prompts/templates/partials/examples/medical.md +218 -0
  172. package/dist/core/llm/prompts/templates/partials/examples/notes.md +201 -0
  173. package/dist/core/llm/prompts/templates/partials/examples/research.md +208 -0
  174. package/dist/core/llm/prompts/templates/partials/examples/tabular.md +178 -0
  175. package/dist/core/llm/prompts/templates/partials/examples/transcript.md +204 -0
  176. package/dist/core/llm/prompts/templates/partials/retrieved-context.hbs +18 -0
  177. package/dist/core/llm/prompts/templates/v1/system.hbs +371 -0
  178. package/dist/core/llm/prompts/templates/v1/user.hbs +20 -0
  179. package/dist/core/llm/prompts/templates/v2/system.hbs +573 -0
  180. package/dist/core/llm/prompts/templates/v2/user.hbs +20 -0
  181. package/dist/core/llm/prompts/templates/v3/system.hbs +861 -0
  182. package/dist/core/llm/prompts/templates/v3/user.hbs +16 -0
  183. package/dist/core/llm/prompts/templates/v4/system.hbs +800 -0
  184. package/dist/core/llm/prompts/templates/v4/user.hbs +40 -0
  185. package/dist/core/llm/prompts/templates/v4.5/system.hbs +71 -0
  186. package/dist/core/llm/prompts/templates/v4.5/user.hbs +46 -0
  187. package/dist/core/llm/prompts/templates/v5/glossary/system.hbs +40 -0
  188. package/dist/core/llm/prompts/templates/v5/glossary/user.hbs +11 -0
  189. package/dist/core/llm/prompts/templates/v5/system.hbs +163 -0
  190. package/dist/core/llm/prompts/templates/v5/user.hbs +55 -0
  191. package/dist/core/pipeline/GroundingTransform.js +52 -0
  192. package/dist/core/pipeline/GroundingTransform.js.map +1 -0
  193. package/dist/core/pipeline/PipelineRunner.js +51 -0
  194. package/dist/core/pipeline/PipelineRunner.js.map +1 -0
  195. package/dist/core/pipeline/RelationFilterTransform.js +72 -0
  196. package/dist/core/pipeline/RelationFilterTransform.js.map +1 -0
  197. package/dist/core/pipeline/index.js +20 -0
  198. package/dist/core/pipeline/index.js.map +1 -0
  199. package/dist/core/processor/FileProcessor.js +184 -0
  200. package/dist/core/processor/FileProcessor.js.map +1 -0
  201. package/dist/core/processor/ProcessedRegistry.js +38 -0
  202. package/dist/core/processor/ProcessedRegistry.js.map +1 -0
  203. package/dist/core/processor/ast/AstSeedService.js +0 -0
  204. package/dist/core/processor/ast/AstSeedService.js.map +1 -0
  205. package/dist/core/processor/ast/AstSymbolStore.js +110 -0
  206. package/dist/core/processor/ast/AstSymbolStore.js.map +1 -0
  207. package/dist/core/processor/ast/index.js +19 -0
  208. package/dist/core/processor/ast/index.js.map +1 -0
  209. package/dist/core/processor/chunking/TextChunker.js +98 -0
  210. package/dist/core/processor/chunking/TextChunker.js.map +1 -0
  211. package/dist/core/processor/chunking/index.js +18 -0
  212. package/dist/core/processor/chunking/index.js.map +1 -0
  213. package/dist/core/processor/classifier/CONTENT_CLASSES.js +294 -0
  214. package/dist/core/processor/classifier/CONTENT_CLASSES.js.map +1 -0
  215. package/dist/core/processor/classifier/CascadeContentClassifier.js +107 -0
  216. package/dist/core/processor/classifier/CascadeContentClassifier.js.map +1 -0
  217. package/dist/core/processor/classifier/HeuristicContentClassifier.js +113 -0
  218. package/dist/core/processor/classifier/HeuristicContentClassifier.js.map +1 -0
  219. package/dist/core/processor/classifier/IContentTypeClassifier.js +3 -0
  220. package/dist/core/processor/classifier/IContentTypeClassifier.js.map +1 -0
  221. package/dist/core/processor/classifier/LlmContentClassifier.js +107 -0
  222. package/dist/core/processor/classifier/LlmContentClassifier.js.map +1 -0
  223. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js +498 -0
  224. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js.map +1 -0
  225. package/dist/core/processor/classifier/index.js +21 -0
  226. package/dist/core/processor/classifier/index.js.map +1 -0
  227. package/dist/core/processor/classifier/mergeClassifications.js +32 -0
  228. package/dist/core/processor/classifier/mergeClassifications.js.map +1 -0
  229. package/dist/core/processor/index.js +20 -0
  230. package/dist/core/processor/index.js.map +1 -0
  231. package/dist/core/processor/readers/AudioReader.js +462 -0
  232. package/dist/core/processor/readers/AudioReader.js.map +1 -0
  233. package/dist/core/processor/readers/BinaryReader.js +90 -0
  234. package/dist/core/processor/readers/BinaryReader.js.map +1 -0
  235. package/dist/core/processor/readers/ChandraPdfReader.js +187 -0
  236. package/dist/core/processor/readers/ChandraPdfReader.js.map +1 -0
  237. package/dist/core/processor/readers/ChatExportReader.js +365 -0
  238. package/dist/core/processor/readers/ChatExportReader.js.map +1 -0
  239. package/dist/core/processor/readers/DoclingReader.js +445 -0
  240. package/dist/core/processor/readers/DoclingReader.js.map +1 -0
  241. package/dist/core/processor/readers/EmailReader.js +259 -0
  242. package/dist/core/processor/readers/EmailReader.js.map +1 -0
  243. package/dist/core/processor/readers/EpubReader.js +175 -0
  244. package/dist/core/processor/readers/EpubReader.js.map +1 -0
  245. package/dist/core/processor/readers/FileReader.js +90 -0
  246. package/dist/core/processor/readers/FileReader.js.map +1 -0
  247. package/dist/core/processor/readers/FileReaderFactory.js +49 -0
  248. package/dist/core/processor/readers/FileReaderFactory.js.map +1 -0
  249. package/dist/core/processor/readers/HtmlReader.js +371 -0
  250. package/dist/core/processor/readers/HtmlReader.js.map +1 -0
  251. package/dist/core/processor/readers/ImageReader.js +162 -0
  252. package/dist/core/processor/readers/ImageReader.js.map +1 -0
  253. package/dist/core/processor/readers/JsonFileReader.js +232 -0
  254. package/dist/core/processor/readers/JsonFileReader.js.map +1 -0
  255. package/dist/core/processor/readers/JupyterReader.js +178 -0
  256. package/dist/core/processor/readers/JupyterReader.js.map +1 -0
  257. package/dist/core/processor/readers/LatexReader.js +176 -0
  258. package/dist/core/processor/readers/LatexReader.js.map +1 -0
  259. package/dist/core/processor/readers/MarkdownReader.js +289 -0
  260. package/dist/core/processor/readers/MarkdownReader.js.map +1 -0
  261. package/dist/core/processor/readers/MarkerPdfReader.js +193 -0
  262. package/dist/core/processor/readers/MarkerPdfReader.js.map +1 -0
  263. package/dist/core/processor/readers/MistralOcrReader.js +198 -0
  264. package/dist/core/processor/readers/MistralOcrReader.js.map +1 -0
  265. package/dist/core/processor/readers/OfficeReader.js +174 -0
  266. package/dist/core/processor/readers/OfficeReader.js.map +1 -0
  267. package/dist/core/processor/readers/PdfReader.js +116 -0
  268. package/dist/core/processor/readers/PdfReader.js.map +1 -0
  269. package/dist/core/processor/readers/RtfReader.js +107 -0
  270. package/dist/core/processor/readers/RtfReader.js.map +1 -0
  271. package/dist/core/processor/readers/SubtitleReader.js +145 -0
  272. package/dist/core/processor/readers/SubtitleReader.js.map +1 -0
  273. package/dist/core/processor/readers/TesseractPdfReader.js +183 -0
  274. package/dist/core/processor/readers/TesseractPdfReader.js.map +1 -0
  275. package/dist/core/processor/readers/TextReader.js +129 -0
  276. package/dist/core/processor/readers/TextReader.js.map +1 -0
  277. package/dist/core/processor/readers/TranscriptReader.js +234 -0
  278. package/dist/core/processor/readers/TranscriptReader.js.map +1 -0
  279. package/dist/core/processor/readers/image/imageMetadata.js +155 -0
  280. package/dist/core/processor/readers/image/imageMetadata.js.map +1 -0
  281. package/dist/core/processor/readers/index.js +41 -0
  282. package/dist/core/processor/readers/index.js.map +1 -0
  283. package/dist/core/processor/readers/referenceExtraction.js +198 -0
  284. package/dist/core/processor/readers/referenceExtraction.js.map +1 -0
  285. package/dist/core/processor/readers/stripReferences.js +59 -0
  286. package/dist/core/processor/readers/stripReferences.js.map +1 -0
  287. package/dist/core/processor/readers/transcript/turnPacking.js +81 -0
  288. package/dist/core/processor/readers/transcript/turnPacking.js.map +1 -0
  289. package/dist/core/progress/NdjsonProgressEmitter.js +30 -0
  290. package/dist/core/progress/NdjsonProgressEmitter.js.map +1 -0
  291. package/dist/core/progress/NoopProgressEmitter.js +15 -0
  292. package/dist/core/progress/NoopProgressEmitter.js.map +1 -0
  293. package/dist/core/progress/index.js +19 -0
  294. package/dist/core/progress/index.js.map +1 -0
  295. package/dist/core/trace/TraceWriter.js +100 -0
  296. package/dist/core/trace/TraceWriter.js.map +1 -0
  297. package/dist/core/trace/events.js +13 -0
  298. package/dist/core/trace/events.js.map +1 -0
  299. package/dist/core/trace/index.js +20 -0
  300. package/dist/core/trace/index.js.map +1 -0
  301. package/dist/core/trace/lineage.js +97 -0
  302. package/dist/core/trace/lineage.js.map +1 -0
  303. package/dist/evaluation/BenchmarkRunner.js +171 -0
  304. package/dist/evaluation/BenchmarkRunner.js.map +1 -0
  305. package/dist/evaluation/classifier/ClassifierAccuracy.js +185 -0
  306. package/dist/evaluation/classifier/ClassifierAccuracy.js.map +1 -0
  307. package/dist/evaluation/classifier/labeledSamples.js +379 -0
  308. package/dist/evaluation/classifier/labeledSamples.js.map +1 -0
  309. package/dist/evaluation/compare/goldCompare.js +126 -0
  310. package/dist/evaluation/compare/goldCompare.js.map +1 -0
  311. package/dist/evaluation/crossre/compareScoring.js +30 -0
  312. package/dist/evaluation/crossre/compareScoring.js.map +1 -0
  313. package/dist/evaluation/datasets/CrossREDataset.js +170 -0
  314. package/dist/evaluation/datasets/CrossREDataset.js.map +1 -0
  315. package/dist/evaluation/datasets/IDataset.js +3 -0
  316. package/dist/evaluation/datasets/IDataset.js.map +1 -0
  317. package/dist/evaluation/datasets/RebelDataset.js +117 -0
  318. package/dist/evaluation/datasets/RebelDataset.js.map +1 -0
  319. package/dist/evaluation/datasets/RedocredDataset.js +218 -0
  320. package/dist/evaluation/datasets/RedocredDataset.js.map +1 -0
  321. package/dist/evaluation/datasets/SemEval2010Dataset.js +150 -0
  322. package/dist/evaluation/datasets/SemEval2010Dataset.js.map +1 -0
  323. package/dist/evaluation/index.js +33 -0
  324. package/dist/evaluation/index.js.map +1 -0
  325. package/dist/evaluation/matching/ExactMatcher.js +75 -0
  326. package/dist/evaluation/matching/ExactMatcher.js.map +1 -0
  327. package/dist/evaluation/matching/SemanticMatcher.js +143 -0
  328. package/dist/evaluation/matching/SemanticMatcher.js.map +1 -0
  329. package/dist/evaluation/metrics/TripleMetrics.js +64 -0
  330. package/dist/evaluation/metrics/TripleMetrics.js.map +1 -0
  331. package/dist/evaluation/mine/MineCheckpoint.js +114 -0
  332. package/dist/evaluation/mine/MineCheckpoint.js.map +1 -0
  333. package/dist/evaluation/mine/MineDataset.js +208 -0
  334. package/dist/evaluation/mine/MineDataset.js.map +1 -0
  335. package/dist/evaluation/mine/MineReporter.js +98 -0
  336. package/dist/evaluation/mine/MineReporter.js.map +1 -0
  337. package/dist/evaluation/mine/MineRunner.js +148 -0
  338. package/dist/evaluation/mine/MineRunner.js.map +1 -0
  339. package/dist/evaluation/mine/MineScorer.js +127 -0
  340. package/dist/evaluation/mine/MineScorer.js.map +1 -0
  341. package/dist/evaluation/mine/types.js +12 -0
  342. package/dist/evaluation/mine/types.js.map +1 -0
  343. package/dist/evaluation/reporters/ConsoleReporter.js +55 -0
  344. package/dist/evaluation/reporters/ConsoleReporter.js.map +1 -0
  345. package/dist/evaluation/reporters/JsonReporter.js +50 -0
  346. package/dist/evaluation/reporters/JsonReporter.js.map +1 -0
  347. package/dist/index.js +28 -0
  348. package/dist/index.js.map +1 -0
  349. package/dist/quality/CompositeScore.js +61 -0
  350. package/dist/quality/CompositeScore.js.map +1 -0
  351. package/dist/quality/ConsistencyMetrics.js +70 -0
  352. package/dist/quality/ConsistencyMetrics.js.map +1 -0
  353. package/dist/quality/FactualMetrics.js +76 -0
  354. package/dist/quality/FactualMetrics.js.map +1 -0
  355. package/dist/quality/GraphHealthMetrics.js +68 -0
  356. package/dist/quality/GraphHealthMetrics.js.map +1 -0
  357. package/dist/quality/SemanticMetrics.js +102 -0
  358. package/dist/quality/SemanticMetrics.js.map +1 -0
  359. package/dist/quality/StructuralMetrics.js +60 -0
  360. package/dist/quality/StructuralMetrics.js.map +1 -0
  361. package/dist/quality/index.js +23 -0
  362. package/dist/quality/index.js.map +1 -0
  363. package/dist/shared/index.js +20 -0
  364. package/dist/shared/index.js.map +1 -0
  365. package/dist/shared/logger/Logger.js +3 -0
  366. package/dist/shared/logger/Logger.js.map +1 -0
  367. package/dist/shared/logger/LoggerFactory.js +75 -0
  368. package/dist/shared/logger/LoggerFactory.js.map +1 -0
  369. package/dist/shared/logger/index.js +19 -0
  370. package/dist/shared/logger/index.js.map +1 -0
  371. package/dist/shared/shutdown.js +30 -0
  372. package/dist/shared/shutdown.js.map +1 -0
  373. package/dist/shared/utils/agglomerativeCluster.js +269 -0
  374. package/dist/shared/utils/agglomerativeCluster.js.map +1 -0
  375. package/dist/shared/utils/astSymbols.js +69 -0
  376. package/dist/shared/utils/astSymbols.js.map +1 -0
  377. package/dist/shared/utils/cosineSimilarity.js +18 -0
  378. package/dist/shared/utils/cosineSimilarity.js.map +1 -0
  379. package/dist/shared/utils/directoryTree.js +184 -0
  380. package/dist/shared/utils/directoryTree.js.map +1 -0
  381. package/dist/shared/utils/documentOutline.js +74 -0
  382. package/dist/shared/utils/documentOutline.js.map +1 -0
  383. package/dist/shared/utils/index.js +24 -0
  384. package/dist/shared/utils/index.js.map +1 -0
  385. package/dist/shared/utils/jaroWinklerSimilarity.js +60 -0
  386. package/dist/shared/utils/jaroWinklerSimilarity.js.map +1 -0
  387. package/dist/shared/utils/parseJsonLenient.js +27 -0
  388. package/dist/shared/utils/parseJsonLenient.js.map +1 -0
  389. package/dist/shared/utils/readConfig.js +42 -0
  390. package/dist/shared/utils/readConfig.js.map +1 -0
  391. package/dist/shared/utils/readRtf.js +216 -0
  392. package/dist/shared/utils/readRtf.js.map +1 -0
  393. package/dist/shared/utils/softmax.js +26 -0
  394. package/dist/shared/utils/softmax.js.map +1 -0
  395. package/dist/types/ContentClass.js +3 -0
  396. package/dist/types/ContentClass.js.map +1 -0
  397. package/dist/types/CorpusProfile.js +3 -0
  398. package/dist/types/CorpusProfile.js.map +1 -0
  399. package/dist/types/IContradictionChecker.js +3 -0
  400. package/dist/types/IContradictionChecker.js.map +1 -0
  401. package/dist/types/ICorpusAnalyzer.js +3 -0
  402. package/dist/types/ICorpusAnalyzer.js.map +1 -0
  403. package/dist/types/IDirectoryProcessor.js +3 -0
  404. package/dist/types/IDirectoryProcessor.js.map +1 -0
  405. package/dist/types/IEmbeddingProvider.js +3 -0
  406. package/dist/types/IEmbeddingProvider.js.map +1 -0
  407. package/dist/types/IEmbeddingService.js +6 -0
  408. package/dist/types/IEmbeddingService.js.map +1 -0
  409. package/dist/types/IFileProcessor.js +3 -0
  410. package/dist/types/IFileProcessor.js.map +1 -0
  411. package/dist/types/IGroundingChecker.js +3 -0
  412. package/dist/types/IGroundingChecker.js.map +1 -0
  413. package/dist/types/IKnowledgeGraphBuilder.js +3 -0
  414. package/dist/types/IKnowledgeGraphBuilder.js.map +1 -0
  415. package/dist/types/IKnowledgeGraphExporter.js +3 -0
  416. package/dist/types/IKnowledgeGraphExporter.js.map +1 -0
  417. package/dist/types/IKnowledgeGraphMerger.js +3 -0
  418. package/dist/types/IKnowledgeGraphMerger.js.map +1 -0
  419. package/dist/types/IKnowledgeGraphSearch.js +3 -0
  420. package/dist/types/IKnowledgeGraphSearch.js.map +1 -0
  421. package/dist/types/ILLMProvider.js +3 -0
  422. package/dist/types/ILLMProvider.js.map +1 -0
  423. package/dist/types/ILLMService.js +3 -0
  424. package/dist/types/ILLMService.js.map +1 -0
  425. package/dist/types/IObjectDetector.js +3 -0
  426. package/dist/types/IObjectDetector.js.map +1 -0
  427. package/dist/types/IProcessingService.js +3 -0
  428. package/dist/types/IProcessingService.js.map +1 -0
  429. package/dist/types/IProgressEmitter.js +3 -0
  430. package/dist/types/IProgressEmitter.js.map +1 -0
  431. package/dist/types/IPromptManager.js +3 -0
  432. package/dist/types/IPromptManager.js.map +1 -0
  433. package/dist/types/KnowledgeGraph.js +3 -0
  434. package/dist/types/KnowledgeGraph.js.map +1 -0
  435. package/dist/types/MCPKnowledgeGraph.js +3 -0
  436. package/dist/types/MCPKnowledgeGraph.js.map +1 -0
  437. package/dist/types/Observation.js +21 -0
  438. package/dist/types/Observation.js.map +1 -0
  439. package/dist/types/ProcessingOptions.js +3 -0
  440. package/dist/types/ProcessingOptions.js.map +1 -0
  441. package/dist/types/index.js +40 -0
  442. package/dist/types/index.js.map +1 -0
  443. package/package.json +122 -0
@@ -0,0 +1,153 @@
1
+ ### Example 1: Service agreement (date → observation on the document)
2
+
3
+ Input:
4
+
5
+ ## File information
6
+
7
+ Path: `contracts/services-001.txt`
8
+
9
+ ## Content to analyze
10
+ ```
11
+ This Agreement ("Agreement") is made and entered into on February 10, 2022
12
+ ("Effective Date") by and between John Smith ("John") and Jane Doe ("Jane") for
13
+ the provision of services.
14
+ ```
15
+
16
+ Output:
17
+
18
+ ```json
19
+ {
20
+ "entities": [
21
+ {
22
+ "name": "Services Agreement",
23
+ "entityType": "document",
24
+ "observations": [
25
+ "Service-provision contract effective 2022-02-10",
26
+ "Between John Smith and Jane Doe"
27
+ ]
28
+ },
29
+ {
30
+ "name": "John Smith",
31
+ "entityType": "person",
32
+ "observations": ["Signatory to the Services Agreement"]
33
+ },
34
+ {
35
+ "name": "Jane Doe",
36
+ "entityType": "person",
37
+ "observations": ["Signatory to the Services Agreement"]
38
+ }
39
+ ],
40
+ "relations": [
41
+ { "from": "John Smith", "to": "Services Agreement", "relationType": ["signed"] },
42
+ { "from": "Jane Doe", "to": "Services Agreement", "relationType": ["signed"] }
43
+ ]
44
+ }
45
+ ```
46
+
47
+ Note: the effective date is an observation on the document, not a `February 10, 2022`
48
+ node. The relationship between the two parties is captured by each being a signatory
49
+ to the shared document — no need for a separate symmetric `counterparties` edge in
50
+ both directions. Type is `person`, not `Person` (lowercase, reused).
51
+
52
+ ---
53
+
54
+ ### Example 2: Court filing
55
+
56
+ Input:
57
+
58
+ ## File information
59
+
60
+ Path: `cases/acme-vs-globex.txt`
61
+
62
+ ## Content to analyze
63
+ ```
64
+ Case No. 22-CV-0148, Acme Corp. v. Globex Inc., filed in the Northern District of
65
+ California. Acme alleges breach of the Master Supply Agreement.
66
+ ```
67
+
68
+ Output:
69
+
70
+ ```json
71
+ {
72
+ "entities": [
73
+ {
74
+ "name": "Acme Corp. v. Globex Inc.",
75
+ "entityType": "document",
76
+ "observations": [
77
+ "Civil case No. 22-CV-0148",
78
+ "Alleges breach of the Master Supply Agreement"
79
+ ]
80
+ },
81
+ {
82
+ "name": "Acme Corp.",
83
+ "entityType": "organization",
84
+ "observations": ["Plaintiff alleging breach of contract"]
85
+ },
86
+ {
87
+ "name": "Globex Inc.",
88
+ "entityType": "organization",
89
+ "observations": ["Defendant"]
90
+ },
91
+ {
92
+ "name": "Northern District of California",
93
+ "entityType": "court",
94
+ "observations": ["Venue for case 22-CV-0148"]
95
+ },
96
+ {
97
+ "name": "Master Supply Agreement",
98
+ "entityType": "document",
99
+ "observations": ["Contract alleged to have been breached"]
100
+ }
101
+ ],
102
+ "relations": [
103
+ { "from": "Acme Corp.", "to": "Acme Corp. v. Globex Inc.", "relationType": ["party_to"] },
104
+ { "from": "Globex Inc.", "to": "Acme Corp. v. Globex Inc.", "relationType": ["party_to"] },
105
+ { "from": "Acme Corp. v. Globex Inc.", "to": "Northern District of California", "relationType": ["filed_in"] },
106
+ { "from": "Acme Corp. v. Globex Inc.", "to": "Master Supply Agreement", "relationType": ["references"] }
107
+ ]
108
+ }
109
+ ```
110
+
111
+ ---
112
+
113
+ ### Example 3: Statute definition
114
+
115
+ Input:
116
+
117
+ ## File information
118
+
119
+ Path: `statutes/privacy-act-s4.txt`
120
+
121
+ ## Content to analyze
122
+ ```
123
+ Section 4 of the Privacy Act defines "personal data" as any information relating to
124
+ an identified or identifiable natural person.
125
+ ```
126
+
127
+ Output:
128
+
129
+ ```json
130
+ {
131
+ "entities": [
132
+ {
133
+ "name": "Privacy Act",
134
+ "entityType": "statute",
135
+ "observations": ["Section 4 defines the term 'personal data'"]
136
+ },
137
+ {
138
+ "name": "personal data",
139
+ "entityType": "term",
140
+ "observations": ["Any information relating to an identified or identifiable natural person"]
141
+ }
142
+ ],
143
+ "relations": [
144
+ { "from": "Privacy Act", "to": "personal data", "relationType": ["defines"] }
145
+ ]
146
+ }
147
+ ```
148
+
149
+ Note: type set is small and reused (`document, person, organization, court, statute,
150
+ term`). Every predicate is one canonical verb from the legal set (`signed`,
151
+ `party_to`, `filed_in`, `references`, `defines`) — none restates endpoint types, none
152
+ is a two-synonym array, and direction is consistent (party → document, document →
153
+ court).
@@ -0,0 +1,127 @@
1
+ ### Example 1: Apache access log (timestamp → observation, real predicate)
2
+
3
+ Input:
4
+
5
+ ## File information
6
+
7
+ Path: `logs/apache_access.log`
8
+
9
+ ## Content to analyze
10
+ ```
11
+ 192.168.1.100 - - [10/Oct/2022:14:30:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"
12
+ ```
13
+
14
+ Output:
15
+
16
+ ```json
17
+ {
18
+ "entities": [
19
+ {
20
+ "name": "192.168.1.100",
21
+ "entityType": "ip_address",
22
+ "observations": ["Client that issued a GET request", "User agent Mozilla/5.0"]
23
+ },
24
+ {
25
+ "name": "GET /index.html",
26
+ "entityType": "request",
27
+ "observations": [
28
+ "HTTP/1.1 GET for /index.html at 2022-10-10 14:30:00 UTC",
29
+ "Responded 200 with 1234 bytes"
30
+ ]
31
+ }
32
+ ],
33
+ "relations": [
34
+ { "from": "192.168.1.100", "to": "GET /index.html", "relationType": ["requests"] }
35
+ ]
36
+ }
37
+ ```
38
+
39
+ Note: the timestamp is an observation on the request, not a `TIMESTAMP` hub node that
40
+ every edge points at. The relation is one real interaction with a verb-like predicate
41
+ (`requests`) — never `["SOURCE_IP", "REQUEST_METHOD"]`, which just restates the two
42
+ endpoint types and is not a predicate at all. Types are lowercase and reused.
43
+
44
+ ---
45
+
46
+ ### Example 2: Auth failure (actors + error)
47
+
48
+ Input:
49
+
50
+ ## File information
51
+
52
+ Path: `logs/auth.log`
53
+
54
+ ## Content to analyze
55
+ ```
56
+ 2022-10-10 14:32:11 sshd[2451]: Failed password for invalid user admin from 203.0.113.7 port 52344
57
+ ```
58
+
59
+ Output:
60
+
61
+ ```json
62
+ {
63
+ "entities": [
64
+ {
65
+ "name": "203.0.113.7",
66
+ "entityType": "ip_address",
67
+ "observations": ["Source of a failed SSH login attempt at 2022-10-10 14:32:11"]
68
+ },
69
+ {
70
+ "name": "admin",
71
+ "entityType": "user",
72
+ "observations": ["Invalid user targeted in the failed login"]
73
+ },
74
+ {
75
+ "name": "sshd",
76
+ "entityType": "service",
77
+ "observations": ["SSH daemon (pid 2451) that rejected the login"]
78
+ }
79
+ ],
80
+ "relations": [
81
+ { "from": "203.0.113.7", "to": "admin", "relationType": ["authenticates_as"] },
82
+ { "from": "203.0.113.7", "to": "sshd", "relationType": ["targets"] }
83
+ ]
84
+ }
85
+ ```
86
+
87
+ ---
88
+
89
+ ### Example 3: Application error
90
+
91
+ Input:
92
+
93
+ ## File information
94
+
95
+ Path: `logs/app.log`
96
+
97
+ ## Content to analyze
98
+ ```
99
+ 2022-10-10 14:35:02 ERROR OrderService - NullPointerException while processing order 8841
100
+ ```
101
+
102
+ Output:
103
+
104
+ ```json
105
+ {
106
+ "entities": [
107
+ {
108
+ "name": "OrderService",
109
+ "entityType": "service",
110
+ "observations": ["Raised a NullPointerException while processing order 8841 at 2022-10-10 14:35:02"]
111
+ },
112
+ {
113
+ "name": "NullPointerException",
114
+ "entityType": "error",
115
+ "observations": ["Thrown during order processing in OrderService"]
116
+ }
117
+ ],
118
+ "relations": [
119
+ { "from": "OrderService", "to": "NullPointerException", "relationType": ["reports"] }
120
+ ]
121
+ }
122
+ ```
123
+
124
+ Note: across the file the type set stays small (`ip_address, user, service, request,
125
+ error`) and every relation is a single real interaction — `requests`,
126
+ `authenticates_as`, `targets`, `reports`. No timestamp nodes, no type-pair
127
+ "predicates", no self-loops, consistent actor → object direction.
@@ -0,0 +1,218 @@
1
+ ### Example 1: Patient record with vitals (literals → observations)
2
+
3
+ Input:
4
+
5
+ ## File information
6
+
7
+ Path: `records/john-smith.txt`
8
+
9
+ ## Content to analyze
10
+ ```
11
+ Patient Name: John Smith, Date: 2022-01-01, Test Result: Blood Pressure 120/80 mmHg,
12
+ Heart Rate 70 bpm.
13
+ ```
14
+
15
+ Output:
16
+
17
+ ```json
18
+ {
19
+ "entities": [
20
+ {
21
+ "name": "John Smith",
22
+ "entityType": "person",
23
+ "observations": [
24
+ "Patient; record dated 2022-01-01",
25
+ "Blood pressure 120/80 mmHg",
26
+ "Heart rate 70 bpm"
27
+ ]
28
+ }
29
+ ],
30
+ "relations": []
31
+ }
32
+ ```
33
+
34
+ Note: `120/80 mmHg` and `70 bpm` are measured values, not nodes. They live as
35
+ observations on the patient. The type is `person`, not `Patient Name` — the name is
36
+ the patient's name, the type is the category. One real entity → empty `relations`.
37
+
38
+ ---
39
+
40
+ ### Example 2: Prescription
41
+
42
+ Input:
43
+
44
+ ## File information
45
+
46
+ Path: `rx/jane-doe.txt`
47
+
48
+ ## Content to analyze
49
+ ```
50
+ Patient Name: Jane Doe, Medication: Amoxicillin 500mg, Dosage: 2 tablets,
51
+ Frequency: 3 times a day.
52
+ ```
53
+
54
+ Output:
55
+
56
+ ```json
57
+ {
58
+ "entities": [
59
+ {
60
+ "name": "Jane Doe",
61
+ "entityType": "person",
62
+ "observations": ["Patient prescribed Amoxicillin"]
63
+ },
64
+ {
65
+ "name": "Amoxicillin",
66
+ "entityType": "medication",
67
+ "observations": ["Prescribed at 500mg, 2 tablets, 3 times a day"]
68
+ }
69
+ ],
70
+ "relations": [
71
+ { "from": "Jane Doe", "to": "Amoxicillin", "relationType": ["prescribed"] }
72
+ ]
73
+ }
74
+ ```
75
+
76
+ Note: dosage and frequency are observations on the medication, not `2 tablets` /
77
+ `3 times a day` nodes. The medication name is canonical (`Amoxicillin`), with the
78
+ specific strength in the observation so the same drug merges across records.
79
+
80
+ ---
81
+
82
+ ### Example 3: Medical record (diagnosis + medication)
83
+
84
+ Input:
85
+
86
+ ## File information
87
+
88
+ Path: `records/bob-johnson.txt`
89
+
90
+ ## Content to analyze
91
+ ```
92
+ Patient Name: Bob Johnson, Date: 2020-01-01, Diagnosis: Diabetes Type 2,
93
+ Medication: Metformin 1000mg.
94
+ ```
95
+
96
+ Output:
97
+
98
+ ```json
99
+ {
100
+ "entities": [
101
+ {
102
+ "name": "Bob Johnson",
103
+ "entityType": "person",
104
+ "observations": ["Patient; record dated 2020-01-01"]
105
+ },
106
+ {
107
+ "name": "Diabetes Type 2",
108
+ "entityType": "condition",
109
+ "observations": ["Chronic metabolic condition"]
110
+ },
111
+ {
112
+ "name": "Metformin",
113
+ "entityType": "medication",
114
+ "observations": ["Prescribed at 1000mg"]
115
+ }
116
+ ],
117
+ "relations": [
118
+ { "from": "Bob Johnson", "to": "Diabetes Type 2", "relationType": ["diagnosed_with"] },
119
+ { "from": "Bob Johnson", "to": "Metformin", "relationType": ["prescribed"] }
120
+ ]
121
+ }
122
+ ```
123
+
124
+ ---
125
+
126
+ ### Example 4: Lab report (test → result)
127
+
128
+ Input:
129
+
130
+ ## File information
131
+
132
+ Path: `labs/rachel-kim.txt`
133
+
134
+ ## Content to analyze
135
+ ```
136
+ Patient Name: Rachel Kim, Date: 2022-10-01, Test: Blood Test, Result: Abnormal —
137
+ elevated white cell count.
138
+ ```
139
+
140
+ Output:
141
+
142
+ ```json
143
+ {
144
+ "entities": [
145
+ {
146
+ "name": "Rachel Kim",
147
+ "entityType": "person",
148
+ "observations": ["Patient; blood test dated 2022-10-01"]
149
+ },
150
+ {
151
+ "name": "Blood Test",
152
+ "entityType": "test",
153
+ "observations": ["Result abnormal: elevated white cell count"]
154
+ }
155
+ ],
156
+ "relations": [
157
+ { "from": "Rachel Kim", "to": "Blood Test", "relationType": ["underwent"] }
158
+ ]
159
+ }
160
+ ```
161
+
162
+ Note: `Abnormal` is not a `result` node — the finding is an observation on the test.
163
+ A bare result value with no further entity to anchor it is just an observation.
164
+
165
+ ---
166
+
167
+ ### Example 5: Allergy + procedure with provider
168
+
169
+ Input:
170
+
171
+ ## File information
172
+
173
+ Path: `records/laura-davis.txt`
174
+
175
+ ## Content to analyze
176
+ ```
177
+ Patient Name: Laura Davis, Date: 2022-08-01, Allergy: Penicillin (causes rash),
178
+ Procedure: Appendectomy performed by Dr. Johnson.
179
+ ```
180
+
181
+ Output:
182
+
183
+ ```json
184
+ {
185
+ "entities": [
186
+ {
187
+ "name": "Laura Davis",
188
+ "entityType": "person",
189
+ "observations": ["Patient; record dated 2022-08-01", "Allergic to penicillin (rash)"]
190
+ },
191
+ {
192
+ "name": "Penicillin",
193
+ "entityType": "medication",
194
+ "observations": ["Causes a rash in this patient"]
195
+ },
196
+ {
197
+ "name": "Appendectomy",
198
+ "entityType": "procedure",
199
+ "observations": ["Surgical removal of the appendix"]
200
+ },
201
+ {
202
+ "name": "Dr. Johnson",
203
+ "entityType": "provider",
204
+ "observations": ["Surgeon who performed the appendectomy"]
205
+ }
206
+ ],
207
+ "relations": [
208
+ { "from": "Laura Davis", "to": "Penicillin", "relationType": ["allergic_to"] },
209
+ { "from": "Laura Davis", "to": "Appendectomy", "relationType": ["underwent"] },
210
+ { "from": "Laura Davis", "to": "Dr. Johnson", "relationType": ["treated_by"] }
211
+ ]
212
+ }
213
+ ```
214
+
215
+ Note: type set stays small and reused (`person, medication, condition, test,
216
+ procedure, provider`). Every predicate is a single verb-like label from the medical
217
+ set; none restates the endpoint types (`has Blood Pressure` is gone). Direction is
218
+ consistent: patient → thing.
@@ -0,0 +1,201 @@
1
+ ### Example 1
2
+
3
+ Input:
4
+
5
+ ## File Information
6
+
7
+ Path: `docs/api-glossary.md`
8
+
9
+ ## Content to Analyze
10
+
11
+ ```
12
+ # REST API Glossary
13
+
14
+ ## Idempotency
15
+ An operation is **idempotent** if performing it multiple times produces the same result as
16
+ performing it once. In REST, PUT and DELETE are required to be idempotent by the HTTP spec;
17
+ POST is not. Clients should retry idempotent requests on network failure.
18
+ Related: safe method, retry semantics, PUT vs PATCH.
19
+
20
+ ## Rate Limiting
21
+ The server enforces a maximum number of requests per time window per API key. Exceeded
22
+ limits return HTTP 429 Too Many Requests with a `Retry-After` header.
23
+ Standards: IETF RFC 6585 (additional HTTP status codes).
24
+ Related: quota, throttling, backoff strategy.
25
+
26
+ ## Pagination (Cursor-based)
27
+ Results are returned in pages using an opaque cursor token rather than page numbers.
28
+ The response includes a `next_cursor` field; pass it as `?cursor=<value>` in the next
29
+ request. Cursors are stable against concurrent inserts.
30
+ Related: offset pagination, keyset pagination, infinite scroll.
31
+
32
+ ## Webhook
33
+ An HTTP callback registered by the client to receive event notifications from the server.
34
+ The server POSTs a signed JSON payload to the client's endpoint when the event fires.
35
+ Clients should verify the `X-Signature-256` header to authenticate payloads.
36
+ Related: polling, event streaming, idempotency key.
37
+ ```
38
+
39
+ Output:
40
+
41
+ ```json
42
+ {
43
+ "entities": [
44
+ {
45
+ "name": "Idempotency",
46
+ "entityType": "concept",
47
+ "observations": [
48
+ "Property where repeated operations produce the same result as one execution",
49
+ "Required by HTTP spec for PUT and DELETE methods",
50
+ "Enables safe client retries on network failures"
51
+ ]
52
+ },
53
+ {
54
+ "name": "Rate Limiting",
55
+ "entityType": "concept",
56
+ "observations": [
57
+ "Server-enforced cap on requests per time window per API key",
58
+ "Returns HTTP 429 with Retry-After header when exceeded",
59
+ "Governed by IETF RFC 6585"
60
+ ]
61
+ },
62
+ {
63
+ "name": "Cursor-based Pagination",
64
+ "entityType": "concept",
65
+ "observations": [
66
+ "Uses opaque cursor token instead of page numbers",
67
+ "Cursor passed via ?cursor= query parameter",
68
+ "Stable against concurrent inserts unlike offset pagination"
69
+ ]
70
+ },
71
+ {
72
+ "name": "Webhook",
73
+ "entityType": "concept",
74
+ "observations": [
75
+ "HTTP callback for server-to-client event notifications",
76
+ "Server POSTs signed JSON payload to registered client endpoint",
77
+ "Signature verified via X-Signature-256 header"
78
+ ]
79
+ },
80
+ {
81
+ "name": "IETF RFC 6585",
82
+ "entityType": "standard",
83
+ "observations": ["Defines additional HTTP status codes including 429 Too Many Requests"]
84
+ }
85
+ ],
86
+ "relations": [
87
+ { "from": "Rate Limiting", "to": "IETF RFC 6585", "relationType": ["governed_by"] },
88
+ { "from": "Idempotency", "to": "Webhook", "relationType": ["related_to"] }
89
+ ]
90
+ }
91
+ ```
92
+
93
+ ---
94
+
95
+ ### Example 2
96
+
97
+ Input:
98
+
99
+ ## File Information
100
+
101
+ Path: `docs/feature-catalog.md`
102
+
103
+ ## Content to Analyze
104
+
105
+ ```
106
+ # Product Feature Catalog — Nexus Platform
107
+
108
+ ## Real-time Collaboration
109
+ Simultaneous editing with conflict-free replicated data types (CRDTs).
110
+ Tiers: Pro, Enterprise.
111
+ Requires: WebSocket support in client environment.
112
+ Compatible with: Document Editor, Whiteboard, Code Review modules.
113
+
114
+ ## SSO / SAML 2.0
115
+ Single sign-on via SAML 2.0 identity providers (Okta, Azure AD, Google Workspace).
116
+ Tiers: Enterprise only.
117
+ Requires: Admin configuration of IdP metadata URL and certificate.
118
+ Compatible with: All modules. Incompatible with: Legacy API key auth (cannot be used simultaneously).
119
+
120
+ ## Audit Log
121
+ Immutable, tamper-evident log of all user actions and configuration changes.
122
+ Exported as JSON or CSV. Retention: 90 days (Pro), 2 years (Enterprise).
123
+ Tiers: Pro, Enterprise.
124
+ Compatible with: SIEM integrations (Splunk, Datadog).
125
+
126
+ ## AI Summarization
127
+ One-click summary of documents, threads, and activity feeds using LLM backend.
128
+ Tiers: Pro (10 summaries/day), Enterprise (unlimited).
129
+ Requires: Nexus AI add-on license.
130
+ Compatible with: Document Editor, Inbox modules.
131
+ ```
132
+
133
+ Output:
134
+
135
+ ```json
136
+ {
137
+ "entities": [
138
+ {
139
+ "name": "Real-time Collaboration",
140
+ "entityType": "feature",
141
+ "observations": [
142
+ "Uses CRDTs for conflict-free simultaneous editing",
143
+ "Available on Pro and Enterprise tiers",
144
+ "Requires WebSocket support in client environment"
145
+ ]
146
+ },
147
+ {
148
+ "name": "SSO / SAML 2.0",
149
+ "entityType": "feature",
150
+ "observations": [
151
+ "Enterprise-only feature",
152
+ "Supports Okta, Azure AD, Google Workspace as identity providers",
153
+ "Incompatible with legacy API key authentication when enabled"
154
+ ]
155
+ },
156
+ {
157
+ "name": "Audit Log",
158
+ "entityType": "feature",
159
+ "observations": [
160
+ "Immutable, tamper-evident action log",
161
+ "Retention: 90 days (Pro), 2 years (Enterprise)",
162
+ "Exportable as JSON or CSV; compatible with SIEM tools"
163
+ ]
164
+ },
165
+ {
166
+ "name": "AI Summarization",
167
+ "entityType": "feature",
168
+ "observations": [
169
+ "LLM-backed document and thread summarization",
170
+ "Pro tier: 10 summaries/day; Enterprise: unlimited",
171
+ "Requires Nexus AI add-on license"
172
+ ]
173
+ },
174
+ {
175
+ "name": "Pro",
176
+ "entityType": "tier",
177
+ "observations": ["Mid-tier plan; includes Real-time Collaboration, Audit Log, AI Summarization (limited)"]
178
+ },
179
+ {
180
+ "name": "Enterprise",
181
+ "entityType": "tier",
182
+ "observations": ["Top-tier plan; includes all features with extended limits"]
183
+ },
184
+ {
185
+ "name": "Nexus AI add-on",
186
+ "entityType": "dependency",
187
+ "observations": ["Required license for AI Summarization feature"]
188
+ }
189
+ ],
190
+ "relations": [
191
+ { "from": "Real-time Collaboration", "to": "Pro", "relationType": ["included_in"] },
192
+ { "from": "Real-time Collaboration", "to": "Enterprise", "relationType": ["included_in"] },
193
+ { "from": "SSO / SAML 2.0", "to": "Enterprise", "relationType": ["included_in"] },
194
+ { "from": "Audit Log", "to": "Pro", "relationType": ["included_in"] },
195
+ { "from": "Audit Log", "to": "Enterprise", "relationType": ["included_in"] },
196
+ { "from": "AI Summarization", "to": "Pro", "relationType": ["included_in"] },
197
+ { "from": "AI Summarization", "to": "Enterprise", "relationType": ["included_in"] },
198
+ { "from": "AI Summarization", "to": "Nexus AI add-on", "relationType": ["requires"] }
199
+ ]
200
+ }
201
+ ```