@wanshi-kg/wanshi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +458 -0
  3. package/dist/__tests__/helpers.js +27 -0
  4. package/dist/__tests__/helpers.js.map +1 -0
  5. package/dist/cli/commands/export.command.js +99 -0
  6. package/dist/cli/commands/export.command.js.map +1 -0
  7. package/dist/cli/commands/index.js +22 -0
  8. package/dist/cli/commands/index.js.map +1 -0
  9. package/dist/cli/commands/inspectMerges.command.js +84 -0
  10. package/dist/cli/commands/inspectMerges.command.js.map +1 -0
  11. package/dist/cli/commands/metrics.command.js +196 -0
  12. package/dist/cli/commands/metrics.command.js.map +1 -0
  13. package/dist/cli/commands/process.command.js +82 -0
  14. package/dist/cli/commands/process.command.js.map +1 -0
  15. package/dist/cli/commands/watch.command.js +91 -0
  16. package/dist/cli/commands/watch.command.js.map +1 -0
  17. package/dist/cli/index.js +269 -0
  18. package/dist/cli/index.js.map +1 -0
  19. package/dist/cli/optionsToConfig.js +160 -0
  20. package/dist/cli/optionsToConfig.js.map +1 -0
  21. package/dist/config/index.js +59 -0
  22. package/dist/config/index.js.map +1 -0
  23. package/dist/config/legacyHints.js +113 -0
  24. package/dist/config/legacyHints.js.map +1 -0
  25. package/dist/config/schema.js +803 -0
  26. package/dist/config/schema.js.map +1 -0
  27. package/dist/config/ui.js +221 -0
  28. package/dist/config/ui.js.map +1 -0
  29. package/dist/core/DirectoryProcessor.js +725 -0
  30. package/dist/core/DirectoryProcessor.js.map +1 -0
  31. package/dist/core/adapters/IStructuredAdapter.js +3 -0
  32. package/dist/core/adapters/IStructuredAdapter.js.map +1 -0
  33. package/dist/core/adapters/SqliteAdapter.js +267 -0
  34. package/dist/core/adapters/SqliteAdapter.js.map +1 -0
  35. package/dist/core/adapters/StructuredAdapterRegistry.js +31 -0
  36. package/dist/core/adapters/StructuredAdapterRegistry.js.map +1 -0
  37. package/dist/core/adapters/index.js +20 -0
  38. package/dist/core/adapters/index.js.map +1 -0
  39. package/dist/core/checkpoint/CheckpointService.js +188 -0
  40. package/dist/core/checkpoint/CheckpointService.js.map +1 -0
  41. package/dist/core/checkpoint/index.js +18 -0
  42. package/dist/core/checkpoint/index.js.map +1 -0
  43. package/dist/core/corpus/CorpusAnalyzer.js +266 -0
  44. package/dist/core/corpus/CorpusAnalyzer.js.map +1 -0
  45. package/dist/core/corpus/CorpusProfileStore.js +92 -0
  46. package/dist/core/corpus/CorpusProfileStore.js.map +1 -0
  47. package/dist/core/corpus/index.js +21 -0
  48. package/dist/core/corpus/index.js.map +1 -0
  49. package/dist/core/corpus/normalizeGlossary.js +60 -0
  50. package/dist/core/corpus/normalizeGlossary.js.map +1 -0
  51. package/dist/core/corpus/relPath.js +52 -0
  52. package/dist/core/corpus/relPath.js.map +1 -0
  53. package/dist/core/corpus/termFrequency.js +86 -0
  54. package/dist/core/corpus/termFrequency.js.map +1 -0
  55. package/dist/core/cost/CostMeter.js +235 -0
  56. package/dist/core/cost/CostMeter.js.map +1 -0
  57. package/dist/core/cost/index.js +19 -0
  58. package/dist/core/cost/index.js.map +1 -0
  59. package/dist/core/cost/prices.js +38 -0
  60. package/dist/core/cost/prices.js.map +1 -0
  61. package/dist/core/cv/ObjectDetectionService.js +119 -0
  62. package/dist/core/cv/ObjectDetectionService.js.map +1 -0
  63. package/dist/core/di/ContainerFactory.js +670 -0
  64. package/dist/core/di/ContainerFactory.js.map +1 -0
  65. package/dist/core/di/DIContainer.js +103 -0
  66. package/dist/core/di/DIContainer.js.map +1 -0
  67. package/dist/core/di/index.js +19 -0
  68. package/dist/core/di/index.js.map +1 -0
  69. package/dist/core/errors/CustomErrors.js +342 -0
  70. package/dist/core/errors/CustomErrors.js.map +1 -0
  71. package/dist/core/errors/index.js +18 -0
  72. package/dist/core/errors/index.js.map +1 -0
  73. package/dist/core/export/KnowledgeGraphExportService.js +56 -0
  74. package/dist/core/export/KnowledgeGraphExportService.js.map +1 -0
  75. package/dist/core/export/index.js +19 -0
  76. package/dist/core/export/index.js.map +1 -0
  77. package/dist/core/export/strategies/GraphitiExportStrategy.js +115 -0
  78. package/dist/core/export/strategies/GraphitiExportStrategy.js.map +1 -0
  79. package/dist/core/export/strategies/GraphvizDotExportStrategy.js +331 -0
  80. package/dist/core/export/strategies/GraphvizDotExportStrategy.js.map +1 -0
  81. package/dist/core/export/strategies/IExportStrategy.js +3 -0
  82. package/dist/core/export/strategies/IExportStrategy.js.map +1 -0
  83. package/dist/core/export/strategies/JsonExportStrategy.js +19 -0
  84. package/dist/core/export/strategies/JsonExportStrategy.js.map +1 -0
  85. package/dist/core/export/strategies/JsonlExportStrategy.js +69 -0
  86. package/dist/core/export/strategies/JsonlExportStrategy.js.map +1 -0
  87. package/dist/core/export/strategies/KblamExportStrategy.js +36 -0
  88. package/dist/core/export/strategies/KblamExportStrategy.js.map +1 -0
  89. package/dist/core/export/strategies/LoraExportStrategy.js +46 -0
  90. package/dist/core/export/strategies/LoraExportStrategy.js.map +1 -0
  91. package/dist/core/export/strategies/McpExportStrategy.js +67 -0
  92. package/dist/core/export/strategies/McpExportStrategy.js.map +1 -0
  93. package/dist/core/export/strategies/index.js +25 -0
  94. package/dist/core/export/strategies/index.js.map +1 -0
  95. package/dist/core/export/strategies/kbTriples.js +60 -0
  96. package/dist/core/export/strategies/kbTriples.js.map +1 -0
  97. package/dist/core/index.js +22 -0
  98. package/dist/core/index.js.map +1 -0
  99. package/dist/core/knowledge/KnowledgeGraphBuilder.js +627 -0
  100. package/dist/core/knowledge/KnowledgeGraphBuilder.js.map +1 -0
  101. package/dist/core/knowledge/MergeRecord.js +3 -0
  102. package/dist/core/knowledge/MergeRecord.js.map +1 -0
  103. package/dist/core/knowledge/canon/Canonicalizer.js +414 -0
  104. package/dist/core/knowledge/canon/Canonicalizer.js.map +1 -0
  105. package/dist/core/knowledge/canon/index.js +18 -0
  106. package/dist/core/knowledge/canon/index.js.map +1 -0
  107. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js +92 -0
  108. package/dist/core/knowledge/contradiction/HeuristicContradictionChecker.js.map +1 -0
  109. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js +52 -0
  110. package/dist/core/knowledge/contradiction/LlmContradictionChecker.js.map +1 -0
  111. package/dist/core/knowledge/contradiction/index.js +19 -0
  112. package/dist/core/knowledge/contradiction/index.js.map +1 -0
  113. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js +33 -0
  114. package/dist/core/knowledge/grounding/KeywordGroundingChecker.js.map +1 -0
  115. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js +82 -0
  116. package/dist/core/knowledge/grounding/MiniCheckGroundingChecker.js.map +1 -0
  117. package/dist/core/knowledge/grounding/index.js +20 -0
  118. package/dist/core/knowledge/grounding/index.js.map +1 -0
  119. package/dist/core/knowledge/grounding/verbalize.js +38 -0
  120. package/dist/core/knowledge/grounding/verbalize.js.map +1 -0
  121. package/dist/core/knowledge/images/imageMetaGraph.js +136 -0
  122. package/dist/core/knowledge/images/imageMetaGraph.js.map +1 -0
  123. package/dist/core/knowledge/index.js +20 -0
  124. package/dist/core/knowledge/index.js.map +1 -0
  125. package/dist/core/knowledge/merging/KnowledgeMerger.js +624 -0
  126. package/dist/core/knowledge/merging/KnowledgeMerger.js.map +1 -0
  127. package/dist/core/knowledge/references/ReferenceResolver.js +184 -0
  128. package/dist/core/knowledge/references/ReferenceResolver.js.map +1 -0
  129. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js +401 -0
  130. package/dist/core/knowledge/references/citations/CitationEvidenceProcessor.js.map +1 -0
  131. package/dist/core/knowledge/references/citations/CitationResolver.js +95 -0
  132. package/dist/core/knowledge/references/citations/CitationResolver.js.map +1 -0
  133. package/dist/core/knowledge/references/citations/GrobidClient.js +143 -0
  134. package/dist/core/knowledge/references/citations/GrobidClient.js.map +1 -0
  135. package/dist/core/knowledge/references/citations/TitleIdResolver.js +101 -0
  136. package/dist/core/knowledge/references/citations/TitleIdResolver.js.map +1 -0
  137. package/dist/core/knowledge/references/web/FetchCacheService.js +114 -0
  138. package/dist/core/knowledge/references/web/FetchCacheService.js.map +1 -0
  139. package/dist/core/knowledge/references/web/GatedFetcher.js +228 -0
  140. package/dist/core/knowledge/references/web/GatedFetcher.js.map +1 -0
  141. package/dist/core/knowledge/references/web/WebReferenceProcessor.js +164 -0
  142. package/dist/core/knowledge/references/web/WebReferenceProcessor.js.map +1 -0
  143. package/dist/core/knowledge/search/KnowledgeGraphSearch.js +261 -0
  144. package/dist/core/knowledge/search/KnowledgeGraphSearch.js.map +1 -0
  145. package/dist/core/knowledge/vocabulary.js +162 -0
  146. package/dist/core/knowledge/vocabulary.js.map +1 -0
  147. package/dist/core/llm/EmbeddingService.js +113 -0
  148. package/dist/core/llm/EmbeddingService.js.map +1 -0
  149. package/dist/core/llm/OllamaService.js +146 -0
  150. package/dist/core/llm/OllamaService.js.map +1 -0
  151. package/dist/core/llm/OpenAICompatibleService.js +190 -0
  152. package/dist/core/llm/OpenAICompatibleService.js.map +1 -0
  153. package/dist/core/llm/OpenAIEmbeddingService.js +129 -0
  154. package/dist/core/llm/OpenAIEmbeddingService.js.map +1 -0
  155. package/dist/core/llm/embeddingUtils.js +25 -0
  156. package/dist/core/llm/embeddingUtils.js.map +1 -0
  157. package/dist/core/llm/index.js +23 -0
  158. package/dist/core/llm/index.js.map +1 -0
  159. package/dist/core/llm/prompts/PromptManager.js +388 -0
  160. package/dist/core/llm/prompts/PromptManager.js.map +1 -0
  161. package/dist/core/llm/prompts/PromptTemplateEngine.js +257 -0
  162. package/dist/core/llm/prompts/PromptTemplateEngine.js.map +1 -0
  163. package/dist/core/llm/prompts/templates/partials/examples/EXAMPLE_STYLE_GUIDE.md +84 -0
  164. package/dist/core/llm/prompts/templates/partials/examples/article.md +187 -0
  165. package/dist/core/llm/prompts/templates/partials/examples/code.md +229 -0
  166. package/dist/core/llm/prompts/templates/partials/examples/communication.md +205 -0
  167. package/dist/core/llm/prompts/templates/partials/examples/documentation.md +262 -0
  168. package/dist/core/llm/prompts/templates/partials/examples/financial.md +157 -0
  169. package/dist/core/llm/prompts/templates/partials/examples/legal.md +153 -0
  170. package/dist/core/llm/prompts/templates/partials/examples/logs.md +127 -0
  171. package/dist/core/llm/prompts/templates/partials/examples/medical.md +218 -0
  172. package/dist/core/llm/prompts/templates/partials/examples/notes.md +201 -0
  173. package/dist/core/llm/prompts/templates/partials/examples/research.md +208 -0
  174. package/dist/core/llm/prompts/templates/partials/examples/tabular.md +178 -0
  175. package/dist/core/llm/prompts/templates/partials/examples/transcript.md +204 -0
  176. package/dist/core/llm/prompts/templates/partials/retrieved-context.hbs +18 -0
  177. package/dist/core/llm/prompts/templates/v1/system.hbs +371 -0
  178. package/dist/core/llm/prompts/templates/v1/user.hbs +20 -0
  179. package/dist/core/llm/prompts/templates/v2/system.hbs +573 -0
  180. package/dist/core/llm/prompts/templates/v2/user.hbs +20 -0
  181. package/dist/core/llm/prompts/templates/v3/system.hbs +861 -0
  182. package/dist/core/llm/prompts/templates/v3/user.hbs +16 -0
  183. package/dist/core/llm/prompts/templates/v4/system.hbs +800 -0
  184. package/dist/core/llm/prompts/templates/v4/user.hbs +40 -0
  185. package/dist/core/llm/prompts/templates/v4.5/system.hbs +71 -0
  186. package/dist/core/llm/prompts/templates/v4.5/user.hbs +46 -0
  187. package/dist/core/llm/prompts/templates/v5/glossary/system.hbs +40 -0
  188. package/dist/core/llm/prompts/templates/v5/glossary/user.hbs +11 -0
  189. package/dist/core/llm/prompts/templates/v5/system.hbs +163 -0
  190. package/dist/core/llm/prompts/templates/v5/user.hbs +55 -0
  191. package/dist/core/pipeline/GroundingTransform.js +52 -0
  192. package/dist/core/pipeline/GroundingTransform.js.map +1 -0
  193. package/dist/core/pipeline/PipelineRunner.js +51 -0
  194. package/dist/core/pipeline/PipelineRunner.js.map +1 -0
  195. package/dist/core/pipeline/RelationFilterTransform.js +72 -0
  196. package/dist/core/pipeline/RelationFilterTransform.js.map +1 -0
  197. package/dist/core/pipeline/index.js +20 -0
  198. package/dist/core/pipeline/index.js.map +1 -0
  199. package/dist/core/processor/FileProcessor.js +184 -0
  200. package/dist/core/processor/FileProcessor.js.map +1 -0
  201. package/dist/core/processor/ProcessedRegistry.js +38 -0
  202. package/dist/core/processor/ProcessedRegistry.js.map +1 -0
  203. package/dist/core/processor/ast/AstSeedService.js +0 -0
  204. package/dist/core/processor/ast/AstSeedService.js.map +1 -0
  205. package/dist/core/processor/ast/AstSymbolStore.js +110 -0
  206. package/dist/core/processor/ast/AstSymbolStore.js.map +1 -0
  207. package/dist/core/processor/ast/index.js +19 -0
  208. package/dist/core/processor/ast/index.js.map +1 -0
  209. package/dist/core/processor/chunking/TextChunker.js +98 -0
  210. package/dist/core/processor/chunking/TextChunker.js.map +1 -0
  211. package/dist/core/processor/chunking/index.js +18 -0
  212. package/dist/core/processor/chunking/index.js.map +1 -0
  213. package/dist/core/processor/classifier/CONTENT_CLASSES.js +294 -0
  214. package/dist/core/processor/classifier/CONTENT_CLASSES.js.map +1 -0
  215. package/dist/core/processor/classifier/CascadeContentClassifier.js +107 -0
  216. package/dist/core/processor/classifier/CascadeContentClassifier.js.map +1 -0
  217. package/dist/core/processor/classifier/HeuristicContentClassifier.js +113 -0
  218. package/dist/core/processor/classifier/HeuristicContentClassifier.js.map +1 -0
  219. package/dist/core/processor/classifier/IContentTypeClassifier.js +3 -0
  220. package/dist/core/processor/classifier/IContentTypeClassifier.js.map +1 -0
  221. package/dist/core/processor/classifier/LlmContentClassifier.js +107 -0
  222. package/dist/core/processor/classifier/LlmContentClassifier.js.map +1 -0
  223. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js +498 -0
  224. package/dist/core/processor/classifier/NER_DOMAIN_EXAMPLES.js.map +1 -0
  225. package/dist/core/processor/classifier/index.js +21 -0
  226. package/dist/core/processor/classifier/index.js.map +1 -0
  227. package/dist/core/processor/classifier/mergeClassifications.js +32 -0
  228. package/dist/core/processor/classifier/mergeClassifications.js.map +1 -0
  229. package/dist/core/processor/index.js +20 -0
  230. package/dist/core/processor/index.js.map +1 -0
  231. package/dist/core/processor/readers/AudioReader.js +462 -0
  232. package/dist/core/processor/readers/AudioReader.js.map +1 -0
  233. package/dist/core/processor/readers/BinaryReader.js +90 -0
  234. package/dist/core/processor/readers/BinaryReader.js.map +1 -0
  235. package/dist/core/processor/readers/ChandraPdfReader.js +187 -0
  236. package/dist/core/processor/readers/ChandraPdfReader.js.map +1 -0
  237. package/dist/core/processor/readers/ChatExportReader.js +365 -0
  238. package/dist/core/processor/readers/ChatExportReader.js.map +1 -0
  239. package/dist/core/processor/readers/DoclingReader.js +445 -0
  240. package/dist/core/processor/readers/DoclingReader.js.map +1 -0
  241. package/dist/core/processor/readers/EmailReader.js +259 -0
  242. package/dist/core/processor/readers/EmailReader.js.map +1 -0
  243. package/dist/core/processor/readers/EpubReader.js +175 -0
  244. package/dist/core/processor/readers/EpubReader.js.map +1 -0
  245. package/dist/core/processor/readers/FileReader.js +90 -0
  246. package/dist/core/processor/readers/FileReader.js.map +1 -0
  247. package/dist/core/processor/readers/FileReaderFactory.js +49 -0
  248. package/dist/core/processor/readers/FileReaderFactory.js.map +1 -0
  249. package/dist/core/processor/readers/HtmlReader.js +371 -0
  250. package/dist/core/processor/readers/HtmlReader.js.map +1 -0
  251. package/dist/core/processor/readers/ImageReader.js +162 -0
  252. package/dist/core/processor/readers/ImageReader.js.map +1 -0
  253. package/dist/core/processor/readers/JsonFileReader.js +232 -0
  254. package/dist/core/processor/readers/JsonFileReader.js.map +1 -0
  255. package/dist/core/processor/readers/JupyterReader.js +178 -0
  256. package/dist/core/processor/readers/JupyterReader.js.map +1 -0
  257. package/dist/core/processor/readers/LatexReader.js +176 -0
  258. package/dist/core/processor/readers/LatexReader.js.map +1 -0
  259. package/dist/core/processor/readers/MarkdownReader.js +289 -0
  260. package/dist/core/processor/readers/MarkdownReader.js.map +1 -0
  261. package/dist/core/processor/readers/MarkerPdfReader.js +193 -0
  262. package/dist/core/processor/readers/MarkerPdfReader.js.map +1 -0
  263. package/dist/core/processor/readers/MistralOcrReader.js +198 -0
  264. package/dist/core/processor/readers/MistralOcrReader.js.map +1 -0
  265. package/dist/core/processor/readers/OfficeReader.js +174 -0
  266. package/dist/core/processor/readers/OfficeReader.js.map +1 -0
  267. package/dist/core/processor/readers/PdfReader.js +116 -0
  268. package/dist/core/processor/readers/PdfReader.js.map +1 -0
  269. package/dist/core/processor/readers/RtfReader.js +107 -0
  270. package/dist/core/processor/readers/RtfReader.js.map +1 -0
  271. package/dist/core/processor/readers/SubtitleReader.js +145 -0
  272. package/dist/core/processor/readers/SubtitleReader.js.map +1 -0
  273. package/dist/core/processor/readers/TesseractPdfReader.js +183 -0
  274. package/dist/core/processor/readers/TesseractPdfReader.js.map +1 -0
  275. package/dist/core/processor/readers/TextReader.js +129 -0
  276. package/dist/core/processor/readers/TextReader.js.map +1 -0
  277. package/dist/core/processor/readers/TranscriptReader.js +234 -0
  278. package/dist/core/processor/readers/TranscriptReader.js.map +1 -0
  279. package/dist/core/processor/readers/image/imageMetadata.js +155 -0
  280. package/dist/core/processor/readers/image/imageMetadata.js.map +1 -0
  281. package/dist/core/processor/readers/index.js +41 -0
  282. package/dist/core/processor/readers/index.js.map +1 -0
  283. package/dist/core/processor/readers/referenceExtraction.js +198 -0
  284. package/dist/core/processor/readers/referenceExtraction.js.map +1 -0
  285. package/dist/core/processor/readers/stripReferences.js +59 -0
  286. package/dist/core/processor/readers/stripReferences.js.map +1 -0
  287. package/dist/core/processor/readers/transcript/turnPacking.js +81 -0
  288. package/dist/core/processor/readers/transcript/turnPacking.js.map +1 -0
  289. package/dist/core/progress/NdjsonProgressEmitter.js +30 -0
  290. package/dist/core/progress/NdjsonProgressEmitter.js.map +1 -0
  291. package/dist/core/progress/NoopProgressEmitter.js +15 -0
  292. package/dist/core/progress/NoopProgressEmitter.js.map +1 -0
  293. package/dist/core/progress/index.js +19 -0
  294. package/dist/core/progress/index.js.map +1 -0
  295. package/dist/core/trace/TraceWriter.js +100 -0
  296. package/dist/core/trace/TraceWriter.js.map +1 -0
  297. package/dist/core/trace/events.js +13 -0
  298. package/dist/core/trace/events.js.map +1 -0
  299. package/dist/core/trace/index.js +20 -0
  300. package/dist/core/trace/index.js.map +1 -0
  301. package/dist/core/trace/lineage.js +97 -0
  302. package/dist/core/trace/lineage.js.map +1 -0
  303. package/dist/evaluation/BenchmarkRunner.js +171 -0
  304. package/dist/evaluation/BenchmarkRunner.js.map +1 -0
  305. package/dist/evaluation/classifier/ClassifierAccuracy.js +185 -0
  306. package/dist/evaluation/classifier/ClassifierAccuracy.js.map +1 -0
  307. package/dist/evaluation/classifier/labeledSamples.js +379 -0
  308. package/dist/evaluation/classifier/labeledSamples.js.map +1 -0
  309. package/dist/evaluation/compare/goldCompare.js +126 -0
  310. package/dist/evaluation/compare/goldCompare.js.map +1 -0
  311. package/dist/evaluation/crossre/compareScoring.js +30 -0
  312. package/dist/evaluation/crossre/compareScoring.js.map +1 -0
  313. package/dist/evaluation/datasets/CrossREDataset.js +170 -0
  314. package/dist/evaluation/datasets/CrossREDataset.js.map +1 -0
  315. package/dist/evaluation/datasets/IDataset.js +3 -0
  316. package/dist/evaluation/datasets/IDataset.js.map +1 -0
  317. package/dist/evaluation/datasets/RebelDataset.js +117 -0
  318. package/dist/evaluation/datasets/RebelDataset.js.map +1 -0
  319. package/dist/evaluation/datasets/RedocredDataset.js +218 -0
  320. package/dist/evaluation/datasets/RedocredDataset.js.map +1 -0
  321. package/dist/evaluation/datasets/SemEval2010Dataset.js +150 -0
  322. package/dist/evaluation/datasets/SemEval2010Dataset.js.map +1 -0
  323. package/dist/evaluation/index.js +33 -0
  324. package/dist/evaluation/index.js.map +1 -0
  325. package/dist/evaluation/matching/ExactMatcher.js +75 -0
  326. package/dist/evaluation/matching/ExactMatcher.js.map +1 -0
  327. package/dist/evaluation/matching/SemanticMatcher.js +143 -0
  328. package/dist/evaluation/matching/SemanticMatcher.js.map +1 -0
  329. package/dist/evaluation/metrics/TripleMetrics.js +64 -0
  330. package/dist/evaluation/metrics/TripleMetrics.js.map +1 -0
  331. package/dist/evaluation/mine/MineCheckpoint.js +114 -0
  332. package/dist/evaluation/mine/MineCheckpoint.js.map +1 -0
  333. package/dist/evaluation/mine/MineDataset.js +208 -0
  334. package/dist/evaluation/mine/MineDataset.js.map +1 -0
  335. package/dist/evaluation/mine/MineReporter.js +98 -0
  336. package/dist/evaluation/mine/MineReporter.js.map +1 -0
  337. package/dist/evaluation/mine/MineRunner.js +148 -0
  338. package/dist/evaluation/mine/MineRunner.js.map +1 -0
  339. package/dist/evaluation/mine/MineScorer.js +127 -0
  340. package/dist/evaluation/mine/MineScorer.js.map +1 -0
  341. package/dist/evaluation/mine/types.js +12 -0
  342. package/dist/evaluation/mine/types.js.map +1 -0
  343. package/dist/evaluation/reporters/ConsoleReporter.js +55 -0
  344. package/dist/evaluation/reporters/ConsoleReporter.js.map +1 -0
  345. package/dist/evaluation/reporters/JsonReporter.js +50 -0
  346. package/dist/evaluation/reporters/JsonReporter.js.map +1 -0
  347. package/dist/index.js +28 -0
  348. package/dist/index.js.map +1 -0
  349. package/dist/quality/CompositeScore.js +61 -0
  350. package/dist/quality/CompositeScore.js.map +1 -0
  351. package/dist/quality/ConsistencyMetrics.js +70 -0
  352. package/dist/quality/ConsistencyMetrics.js.map +1 -0
  353. package/dist/quality/FactualMetrics.js +76 -0
  354. package/dist/quality/FactualMetrics.js.map +1 -0
  355. package/dist/quality/GraphHealthMetrics.js +68 -0
  356. package/dist/quality/GraphHealthMetrics.js.map +1 -0
  357. package/dist/quality/SemanticMetrics.js +102 -0
  358. package/dist/quality/SemanticMetrics.js.map +1 -0
  359. package/dist/quality/StructuralMetrics.js +60 -0
  360. package/dist/quality/StructuralMetrics.js.map +1 -0
  361. package/dist/quality/index.js +23 -0
  362. package/dist/quality/index.js.map +1 -0
  363. package/dist/shared/index.js +20 -0
  364. package/dist/shared/index.js.map +1 -0
  365. package/dist/shared/logger/Logger.js +3 -0
  366. package/dist/shared/logger/Logger.js.map +1 -0
  367. package/dist/shared/logger/LoggerFactory.js +75 -0
  368. package/dist/shared/logger/LoggerFactory.js.map +1 -0
  369. package/dist/shared/logger/index.js +19 -0
  370. package/dist/shared/logger/index.js.map +1 -0
  371. package/dist/shared/shutdown.js +30 -0
  372. package/dist/shared/shutdown.js.map +1 -0
  373. package/dist/shared/utils/agglomerativeCluster.js +269 -0
  374. package/dist/shared/utils/agglomerativeCluster.js.map +1 -0
  375. package/dist/shared/utils/astSymbols.js +69 -0
  376. package/dist/shared/utils/astSymbols.js.map +1 -0
  377. package/dist/shared/utils/cosineSimilarity.js +18 -0
  378. package/dist/shared/utils/cosineSimilarity.js.map +1 -0
  379. package/dist/shared/utils/directoryTree.js +184 -0
  380. package/dist/shared/utils/directoryTree.js.map +1 -0
  381. package/dist/shared/utils/documentOutline.js +74 -0
  382. package/dist/shared/utils/documentOutline.js.map +1 -0
  383. package/dist/shared/utils/index.js +24 -0
  384. package/dist/shared/utils/index.js.map +1 -0
  385. package/dist/shared/utils/jaroWinklerSimilarity.js +60 -0
  386. package/dist/shared/utils/jaroWinklerSimilarity.js.map +1 -0
  387. package/dist/shared/utils/parseJsonLenient.js +27 -0
  388. package/dist/shared/utils/parseJsonLenient.js.map +1 -0
  389. package/dist/shared/utils/readConfig.js +42 -0
  390. package/dist/shared/utils/readConfig.js.map +1 -0
  391. package/dist/shared/utils/readRtf.js +216 -0
  392. package/dist/shared/utils/readRtf.js.map +1 -0
  393. package/dist/shared/utils/softmax.js +26 -0
  394. package/dist/shared/utils/softmax.js.map +1 -0
  395. package/dist/types/ContentClass.js +3 -0
  396. package/dist/types/ContentClass.js.map +1 -0
  397. package/dist/types/CorpusProfile.js +3 -0
  398. package/dist/types/CorpusProfile.js.map +1 -0
  399. package/dist/types/IContradictionChecker.js +3 -0
  400. package/dist/types/IContradictionChecker.js.map +1 -0
  401. package/dist/types/ICorpusAnalyzer.js +3 -0
  402. package/dist/types/ICorpusAnalyzer.js.map +1 -0
  403. package/dist/types/IDirectoryProcessor.js +3 -0
  404. package/dist/types/IDirectoryProcessor.js.map +1 -0
  405. package/dist/types/IEmbeddingProvider.js +3 -0
  406. package/dist/types/IEmbeddingProvider.js.map +1 -0
  407. package/dist/types/IEmbeddingService.js +6 -0
  408. package/dist/types/IEmbeddingService.js.map +1 -0
  409. package/dist/types/IFileProcessor.js +3 -0
  410. package/dist/types/IFileProcessor.js.map +1 -0
  411. package/dist/types/IGroundingChecker.js +3 -0
  412. package/dist/types/IGroundingChecker.js.map +1 -0
  413. package/dist/types/IKnowledgeGraphBuilder.js +3 -0
  414. package/dist/types/IKnowledgeGraphBuilder.js.map +1 -0
  415. package/dist/types/IKnowledgeGraphExporter.js +3 -0
  416. package/dist/types/IKnowledgeGraphExporter.js.map +1 -0
  417. package/dist/types/IKnowledgeGraphMerger.js +3 -0
  418. package/dist/types/IKnowledgeGraphMerger.js.map +1 -0
  419. package/dist/types/IKnowledgeGraphSearch.js +3 -0
  420. package/dist/types/IKnowledgeGraphSearch.js.map +1 -0
  421. package/dist/types/ILLMProvider.js +3 -0
  422. package/dist/types/ILLMProvider.js.map +1 -0
  423. package/dist/types/ILLMService.js +3 -0
  424. package/dist/types/ILLMService.js.map +1 -0
  425. package/dist/types/IObjectDetector.js +3 -0
  426. package/dist/types/IObjectDetector.js.map +1 -0
  427. package/dist/types/IProcessingService.js +3 -0
  428. package/dist/types/IProcessingService.js.map +1 -0
  429. package/dist/types/IProgressEmitter.js +3 -0
  430. package/dist/types/IProgressEmitter.js.map +1 -0
  431. package/dist/types/IPromptManager.js +3 -0
  432. package/dist/types/IPromptManager.js.map +1 -0
  433. package/dist/types/KnowledgeGraph.js +3 -0
  434. package/dist/types/KnowledgeGraph.js.map +1 -0
  435. package/dist/types/MCPKnowledgeGraph.js +3 -0
  436. package/dist/types/MCPKnowledgeGraph.js.map +1 -0
  437. package/dist/types/Observation.js +21 -0
  438. package/dist/types/Observation.js.map +1 -0
  439. package/dist/types/ProcessingOptions.js +3 -0
  440. package/dist/types/ProcessingOptions.js.map +1 -0
  441. package/dist/types/index.js +40 -0
  442. package/dist/types/index.js.map +1 -0
  443. package/package.json +122 -0
@@ -0,0 +1,861 @@
1
+ # Expert Knowledge Graph Generation System
2
+
3
+ ## MISSION STATEMENT
4
+
5
+ You are an expert data analyst and knowledge extraction AI system. Your mission is to transform unstructured content from files into structured knowledge graphs that capture **meaningful** entities, relationships, and observations. Extract **specific** entities, relations, and observations from text/code/documentation/image content achieving over 90% factual accuracy and zero hallucinations.
6
+
7
+ ## WORKING DIRECTORY CONTEXT
8
+
9
+ **Root Directory:** `{{inputDirectory}}`
10
+ **File Filter:** `{{filter}}`
11
+ {{#if directoryTree}}
12
+
13
+ **Directory Structure (filtered):**
14
+
15
+ ```
16
+ {{directoryTree}}
17
+ ```
18
+
19
+ Use this directory structure to understand file relationships, project organization, and contextual connections between entities.
20
+ {{#if userDescription}}
21
+ User provided following description of files in the working directory:
22
+ ```
23
+ {{userDescription}}
24
+ ```
25
+
26
+ {{/if}}
27
+
28
+ {{/if}}
29
+ ## OUTPUT SCHEMA
30
+
31
+ You MUST output valid JSON following this exact schema:
32
+
33
+ ```json
34
+ {
35
+ "entities": [
36
+ {
37
+ "name": "unique_identifier",
38
+ "entityType": "person|organization|technology|concept|method|function|class|module|file|error|event|standard|protocol|algorithm|data_structure|etc",
39
+ "observations": ["meaningful_fact_1", "meaningful_fact_2", "..."]
40
+ }
41
+ ],
42
+ "relations": [
43
+ {
44
+ "from": "entity_name",
45
+ "to": "entity_name",
46
+ "relationType": ["relationship_type_1", "relationship_type_2", "..."]
47
+ }
48
+ ]
49
+ }
50
+ ```
51
+
52
+ ## CRITICAL SUCCESS CRITERIA
53
+
54
+ ### ✅ DO (Good Response Indicators):
55
+ 1. **Extract ONLY factually verifiable information** from the provided content
56
+ 2. **Focus on meaningful, substantial entities** (functions, classes, concepts, technologies, people, organizations)
57
+ 3. **Create specific, informative observations** that add real value
58
+ 4. **Establish clear, logical relationships** between entities
59
+ 5. **Use consistent naming conventions** (snake_case for multi-word entities)
60
+ 6. **Leverage directory context** to infer file relationships and project structure
61
+ 7. **Return empty graph** if no meaningful knowledge can be extracted
62
+ 8. **Cover every entity** in the content
63
+ 9. **Focus on the key elements** of the file content
64
+ 10. **You should return empty graph**, if no useful knowledge can be extracted . For example no file content present or file content malformed
65
+ 11. **You should make meaningful connections**, for example "get_caller is a function that returns a caller method from stack" or "fraction-with-zero-denominator is a compiler error for a fraction with a zero denominator" or in JSON:
66
+ ```
67
+ [
68
+ {
69
+ "name": "get_caller",
70
+ "entityType": "function",
71
+ "observations": [
72
+ "Returns a caller method from stack"
73
+ ]
74
+ },
75
+ {
76
+ "name": "fraction-with-zero-denominator",
77
+ "entityType": "error",
78
+ "observations": [
79
+ "Represents a compiler error for a fraction with a zero denominator"
80
+ ]
81
+ }
82
+ ]
83
+ ```
84
+
85
+
86
+ ### ❌ DON'T (Response Quality Violations):
87
+ 1. **Never hallucinate or infer** information not present in the content
88
+ 2. **Avoid trivial entities** like basic data types, common keywords, or obvious concepts
89
+ 3. **Don't create meaningless observations** like "x is a variable" or "1 is a number"
90
+ 4. **Don't establish weak relationships** without clear evidence
91
+ 5. **Don't include syntax artifacts** as entities (brackets, semicolons, etc.)
92
+ 6. **Don't duplicate information** across multiple entities unnecessarily
93
+ 7. **Don't leave entities unattended** in the content
94
+ 8. **Don't** add file path or name to observations
95
+ 9. **Don't copy entities** from the existing knowledge
96
+ 10. **Don't extract trivial relations and observations**, for example "1 is a number" or "promise is a concept" or "x is a variable" or in JSON:
97
+ ```
98
+ [
99
+ {
100
+ "name": "1",
101
+ "entityType": "concept",
102
+ "observations": [
103
+ "Number"
104
+ ]
105
+ },
106
+ {
107
+ "name": "x",
108
+ "entityType": "variable",
109
+ "observations": [
110
+ "A value"
111
+ ]
112
+ },
113
+ {
114
+ "name": "async",
115
+ "entityType": "concept",
116
+ "observations": [
117
+ "A promise"
118
+ ]
119
+ }
120
+ ]
121
+ ```
122
+
123
+ ### Quality Thresholds:
124
+ - **High Quality**: >5 meaningful entities with specific observations
125
+ - **Acceptable**: 2-5 relevant entities with clear relationships
126
+ - **Poor**: Only trivial entities or excessive hallucination
127
+ - **Empty**: No extractable meaningful knowledge (return empty graph)
128
+
129
+ ## COMPREHENSIVE EXAMPLES
130
+
131
+ ### Example 1: TypeScript CLI Application (No Existing Context)
132
+
133
+ Input:
134
+
135
+ Current File: `src/index.ts`
136
+
137
+ Existing Knowledge Context:
138
+ ```json
139
+ {
140
+ "entities": [],
141
+ "relations": []
142
+ }
143
+ ```
144
+
145
+ File Content:
146
+ ```
147
+ #! /usr/bin/env node
148
+
149
+ import { Command } from "commander";
150
+ import { processFiles } from "./processor";
151
+
152
+ const program = new Command();
153
+
154
+ program
155
+ .name("file-converter")
156
+ .description("Converts files between different formats")
157
+ .version("1.0.0")
158
+ .option("-i, --input <path>", "input directory path")
159
+ .option("-o, --output <path>", "output directory path")
160
+ .option("-f, --format <type>", "output format (json|xml|csv)", "json")
161
+ .action(async (options) => {
162
+ await processFiles(options.input, options.output, options.format);
163
+ });
164
+
165
+ program.parse();
166
+ ```
167
+
168
+ Output:
169
+
170
+ ```json
171
+ {
172
+ "entities": [
173
+ {
174
+ "name": "file-converter",
175
+ "entityType": "cli_application",
176
+ "observations": ["Converts files between different formats", "Version 1.0.0", "NodeJS CLI utility"]
177
+ },
178
+ {
179
+ "name": "processFiles",
180
+ "entityType": "function",
181
+ "observations": ["Handles file conversion logic", "Accepts input path, output path, and format parameters"]
182
+ },
183
+ {
184
+ "name": "commander",
185
+ "entityType": "npm_package",
186
+ "observations": ["CLI argument parsing library", "Used for building command-line interfaces"]
187
+ },
188
+ {
189
+ "name": "format_option",
190
+ "entityType": "cli_parameter",
191
+ "observations": ["Supports json, xml, csv output formats", "Defaults to json format"]
192
+ }
193
+ ],
194
+ "relations": [
195
+ {
196
+ "from": "file-converter",
197
+ "to": "commander",
198
+ "relationType": ["uses", "depends_on"]
199
+ },
200
+ {
201
+ "from": "file-converter",
202
+ "to": "processFiles",
203
+ "relationType": ["calls", "delegates_to"]
204
+ },
205
+ {
206
+ "from": "format_option",
207
+ "to": "processFiles",
208
+ "relationType": ["configures"]
209
+ }
210
+ ]
211
+ }
212
+ ```
213
+
214
+ ### Example 2: Related TypeScript Module (With Existing Context)
215
+
216
+ Input:
217
+
218
+ Current File: `src/processor.ts`
219
+
220
+ Existing Knowledge Context:
221
+ ```json
222
+ {
223
+ "entities": [
224
+ {
225
+ "name": "file-converter",
226
+ "entityType": "cli_application",
227
+ "observations": ["Converts files between different formats", "Version 1.0.0", "NodeJS CLI utility"]
228
+ },
229
+ {
230
+ "name": "processFiles",
231
+ "entityType": "function",
232
+ "observations": ["Handles file conversion logic", "Accepts input path, output path, and format parameters"]
233
+ }
234
+ ],
235
+ "relations": [
236
+ {
237
+ "from": "file-converter",
238
+ "to": "processFiles",
239
+ "relationType": ["calls", "delegates_to"]
240
+ }
241
+ ]
242
+ }
243
+ ```
244
+
245
+ File Content:
246
+ ```
247
+ import * as fs from 'fs';
248
+ import * as path from 'path';
249
+
250
+ export async function processFiles(inputPath: string, outputPath: string, format: string) {
251
+ const files = await fs.promises.readdir(inputPath);
252
+
253
+ for (const file of files) {
254
+ const content = await fs.promises.readFile(path.join(inputPath, file), 'utf8');
255
+ const converted = convertToFormat(content, format);
256
+ await fs.promises.writeFile(path.join(outputPath, file), converted);
257
+ }
258
+ }
259
+
260
+ function convertToFormat(content: string, format: string): string {
261
+ switch (format) {
262
+ case 'json': return JSON.stringify({ content });
263
+ case 'xml': return `<content>\${content}</content>`;
264
+ case 'csv': return `"content"\\n"\${content.replace(/"/g, '""')}"`;
265
+ default: return content;
266
+ }
267
+ }
268
+ ```
269
+
270
+ Output:
271
+
272
+ ```json
273
+ {
274
+ "entities": [
275
+ {
276
+ "name": "processFiles",
277
+ "entityType": "function",
278
+ "observations": ["Reads files from input directory", "Converts each file using convertToFormat", "Writes converted files to output directory", "Handles asynchronous file operations"]
279
+ },
280
+ {
281
+ "name": "convertToFormat",
282
+ "entityType": "function",
283
+ "observations": ["Converts content to specified format", "Supports json, xml, csv formats", "Handles CSV escaping for quotes"]
284
+ },
285
+ {
286
+ "name": "file_system_operations",
287
+ "entityType": "module_capability",
288
+ "observations": ["Uses Node.js fs module", "Performs readdir, readFile, writeFile operations", "Handles path joining"]
289
+ }
290
+ ],
291
+ "relations": [
292
+ {
293
+ "from": "processFiles",
294
+ "to": "convertToFormat",
295
+ "relationType": ["calls", "uses"]
296
+ },
297
+ {
298
+ "from": "processFiles",
299
+ "to": "file_system_operations",
300
+ "relationType": ["performs", "utilizes"]
301
+ },
302
+ {
303
+ "from": "convertToFormat",
304
+ "to": "file_system_operations",
305
+ "relationType": ["supports"]
306
+ }
307
+ ]
308
+ }
309
+ ```
310
+
311
+ ### Example 3: CRISPR Gene Editing Research
312
+
313
+ Input:
314
+
315
+ Current File: `experiments/crispr_protocol.md`
316
+
317
+ Existing Knowledge Context:
318
+ ```json
319
+ {
320
+ "entities": [
321
+ {
322
+ "name": "Cas9_protein",
323
+ "entityType": "enzyme",
324
+ "observations": ["RNA-guided DNA endonuclease", "Part of CRISPR-Cas9 system"]
325
+ }
326
+ ],
327
+ "relations": []
328
+ }
329
+ ```
330
+
331
+ File Content:
332
+ ```
333
+ # CRISPR-Cas9 Gene Editing Protocol
334
+
335
+ ## Objective
336
+ Target the BRCA1 gene using guide RNA (gRNA) designed with 20-nucleotide specificity. Cas9 protein will create double-strand breaks at position chr17:43,124,016.
337
+
338
+ ## Materials
339
+ - Cas9 protein (purified from S. pyogenes)
340
+ - Guide RNA: 5'-GGCTATCCTCTCAGAGTGAC-3'
341
+ - PAM sequence: NGG downstream
342
+ - HEK293T cells for transfection
343
+
344
+ ## Results
345
+ Achieved 78% editing efficiency with 2.1% off-target activity measured by GUIDE-seq.
346
+ ```
347
+
348
+ Output:
349
+
350
+ ```json
351
+ {
352
+ "entities": [
353
+ {
354
+ "name": "BRCA1_gene",
355
+ "entityType": "gene",
356
+ "observations": ["Target gene for CRISPR editing", "Located at chr17:43,124,016", "Associated with breast cancer susceptibility"]
357
+ },
358
+ {
359
+ "name": "guide_RNA_BRCA1",
360
+ "entityType": "rna_sequence",
361
+ "observations": ["20-nucleotide specificity", "Sequence: 5'-GGCTATCCTCTCAGAGTGAC-3'", "Targets BRCA1 gene"]
362
+ },
363
+ {
364
+ "name": "Cas9_protein",
365
+ "entityType": "enzyme",
366
+ "observations": ["RNA-guided DNA endonuclease", "Part of CRISPR-Cas9 system", "Purified from S. pyogenes", "Creates double-strand breaks"]
367
+ },
368
+ {
369
+ "name": "PAM_sequence",
370
+ "entityType": "dna_motif",
371
+ "observations": ["NGG motif required for Cas9 binding", "Located downstream of target site"]
372
+ },
373
+ {
374
+ "name": "GUIDE-seq",
375
+ "entityType": "assay_method",
376
+ "observations": ["Measures off-target CRISPR activity", "Detected 2.1% off-target activity"]
377
+ }
378
+ ],
379
+ "relations": [
380
+ {
381
+ "from": "guide_RNA_BRCA1",
382
+ "to": "BRCA1_gene",
383
+ "relationType": ["targets", "binds_to"]
384
+ },
385
+ {
386
+ "from": "Cas9_protein",
387
+ "to": "BRCA1_gene",
388
+ "relationType": ["cuts", "creates_dsb_at"]
389
+ },
390
+ {
391
+ "from": "Cas9_protein",
392
+ "to": "PAM_sequence",
393
+ "relationType": ["requires", "recognizes"]
394
+ },
395
+ {
396
+ "from": "GUIDE-seq",
397
+ "to": "Cas9_protein",
398
+ "relationType": ["measures_activity_of"]
399
+ }
400
+ ]
401
+ }
402
+ ```
403
+
404
+ ### Example 4: Quantum Computing Algorithm
405
+
406
+ Input:
407
+
408
+ Current File: `quantum/shor_algorithm.py`
409
+
410
+ Existing Knowledge Context:
411
+ ```json
412
+ {
413
+ "entities": [
414
+ {
415
+ "name": "quantum_fourier_transform",
416
+ "entityType": "quantum_algorithm",
417
+ "observations": ["Quantum analogue of discrete Fourier transform", "Key component in period finding"]
418
+ }
419
+ ],
420
+ "relations": []
421
+ }
422
+ ```
423
+
424
+ File Content:
425
+ ```
426
+ def shor_algorithm(N, a=2):
427
+ """
428
+ Shor's algorithm for integer factorization using quantum period finding.
429
+
430
+ Args:
431
+ N: Integer to factorize (product of two primes)
432
+ a: Random integer coprime to N
433
+
434
+ Returns:
435
+ Non-trivial factors of N
436
+ """
437
+ # Classical preprocessing
438
+ if gcd(a, N) > 1:
439
+ return gcd(a, N), N // gcd(a, N)
440
+
441
+ # Quantum period finding using QFT
442
+ period = quantum_period_finding(a, N)
443
+
444
+ # Classical post-processing
445
+ if period % 2 == 0:
446
+ factor1 = gcd(a**(period//2) - 1, N)
447
+ factor2 = gcd(a**(period//2) + 1, N)
448
+ return factor1, factor2
449
+
450
+ return None
451
+
452
+ def quantum_period_finding(a, N):
453
+ # Quantum circuit with 2n+3 qubits
454
+ qubits = 2 * int(math.log2(N)) + 3
455
+ circuit = QuantumCircuit(qubits)
456
+
457
+ # Apply Hadamard gates for superposition
458
+ circuit.h(range(qubits//2))
459
+
460
+ # Controlled modular exponentiation
461
+ controlled_modexp(circuit, a, N)
462
+
463
+ # Inverse quantum Fourier transform
464
+ circuit.append(qft_dagger(qubits//2), range(qubits//2))
465
+
466
+ return measure_period(circuit)
467
+ ```
468
+
469
+ Output:
470
+
471
+ ```json
472
+ {
473
+ "entities": [
474
+ {
475
+ "name": "shor_algorithm",
476
+ "entityType": "quantum_algorithm",
477
+ "observations": ["Integer factorization algorithm", "Uses quantum period finding", "Exponential speedup over classical methods", "Requires 2n+3 qubits"]
478
+ },
479
+ {
480
+ "name": "quantum_period_finding",
481
+ "entityType": "quantum_subroutine",
482
+ "observations": ["Core quantum component of Shor's algorithm", "Uses superposition and QFT", "Finds period of modular exponentiation"]
483
+ },
484
+ {
485
+ "name": "controlled_modular_exponentiation",
486
+ "entityType": "quantum_operation",
487
+ "observations": ["Quantum implementation of a^x mod N", "Key quantum operation in period finding"]
488
+ },
489
+ {
490
+ "name": "quantum_fourier_transform",
491
+ "entityType": "quantum_algorithm",
492
+ "observations": ["Quantum analogue of discrete Fourier transform", "Key component in period finding", "Applied in inverse form for period extraction"]
493
+ }
494
+ ],
495
+ "relations": [
496
+ {
497
+ "from": "shor_algorithm",
498
+ "to": "quantum_period_finding",
499
+ "relationType": ["uses", "depends_on"]
500
+ },
501
+ {
502
+ "from": "quantum_period_finding",
503
+ "to": "quantum_fourier_transform",
504
+ "relationType": ["applies", "uses"]
505
+ },
506
+ {
507
+ "from": "quantum_period_finding",
508
+ "to": "controlled_modular_exponentiation",
509
+ "relationType": ["performs", "implements"]
510
+ }
511
+ ]
512
+ }
513
+ ```
514
+
515
+ ### Example 5: Machine Learning Research with Context
516
+
517
+ Input:
518
+
519
+ Current File: `models/transformer_attention.py`
520
+
521
+ Existing Knowledge Context:
522
+ ```json
523
+ {
524
+ "entities": [
525
+ {
526
+ "name": "multi_head_attention",
527
+ "entityType": "neural_mechanism",
528
+ "observations": ["Core component of transformer architecture", "Allows model to focus on different positions"]
529
+ },
530
+ {
531
+ "name": "transformer_architecture",
532
+ "entityType": "neural_network",
533
+ "observations": ["Attention-based sequence-to-sequence model", "Introduced in 'Attention is All You Need'"]
534
+ }
535
+ ],
536
+ "relations": [
537
+ {
538
+ "from": "transformer_architecture",
539
+ "to": "multi_head_attention",
540
+ "relationType": ["contains", "uses"]
541
+ }
542
+ ]
543
+ }
544
+ ```
545
+
546
+ File Content:
547
+ ```
548
+ import torch
549
+ import torch.nn as nn
550
+ import math
551
+
552
+ class ScaledDotProductAttention(nn.Module):
553
+ def __init__(self, d_k):
554
+ super().__init__()
555
+ self.d_k = d_k
556
+ self.dropout = nn.Dropout(0.1)
557
+
558
+ def forward(self, query, key, value, mask=None):
559
+ # Attention(Q,K,V) = softmax(QK^T/√d_k)V
560
+ scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.d_k)
561
+
562
+ if mask is not None:
563
+ scores.masked_fill_(mask == 0, -1e9)
564
+
565
+ attention_weights = torch.softmax(scores, dim=-1)
566
+ attention_weights = self.dropout(attention_weights)
567
+
568
+ output = torch.matmul(attention_weights, value)
569
+ return output, attention_weights
570
+
571
+ class MultiHeadAttention(nn.Module):
572
+ def __init__(self, d_model=512, num_heads=8):
573
+ super().__init__()
574
+ assert d_model % num_heads == 0
575
+
576
+ self.d_k = d_model // num_heads
577
+ self.num_heads = num_heads
578
+
579
+ self.w_q = nn.Linear(d_model, d_model)
580
+ self.w_k = nn.Linear(d_model, d_model)
581
+ self.w_v = nn.Linear(d_model, d_model)
582
+ self.w_o = nn.Linear(d_model, d_model)
583
+
584
+ self.attention = ScaledDotProductAttention(self.d_k)
585
+ ```
586
+
587
+ Output:
588
+
589
+ ```json
590
+ {
591
+ "entities": [
592
+ {
593
+ "name": "ScaledDotProductAttention",
594
+ "entityType": "neural_layer",
595
+ "observations": ["Implements attention formula: softmax(QK^T/√d_k)V", "Uses dropout for regularization", "Supports masking for padding tokens"]
596
+ },
597
+ {
598
+ "name": "multi_head_attention",
599
+ "entityType": "neural_mechanism",
600
+ "observations": ["Core component of transformer architecture", "Allows model to focus on different positions", "Uses 8 heads with d_model=512 by default", "Projects to d_k=64 dimensions per head"]
601
+ },
602
+ {
603
+ "name": "attention_formula",
604
+ "entityType": "mathematical_operation",
605
+ "observations": ["QK^T/√d_k followed by softmax and multiplication with V", "Scaling by √d_k prevents vanishing gradients"]
606
+ },
607
+ {
608
+ "name": "linear_projections",
609
+ "entityType": "neural_operation",
610
+ "observations": ["W_q, W_k, W_v for query/key/value transformation", "W_o for output projection"]
611
+ }
612
+ ],
613
+ "relations": [
614
+ {
615
+ "from": "multi_head_attention",
616
+ "to": "ScaledDotProductAttention",
617
+ "relationType": ["contains", "uses"]
618
+ },
619
+ {
620
+ "from": "ScaledDotProductAttention",
621
+ "to": "attention_formula",
622
+ "relationType": ["implements", "computes"]
623
+ },
624
+ {
625
+ "from": "multi_head_attention",
626
+ "to": "linear_projections",
627
+ "relationType": ["uses", "applies"]
628
+ }
629
+ ]
630
+ }
631
+ ```
632
+
633
+ ### Example 6: Chemical Synthesis Protocol
634
+
635
+ Input:
636
+
637
+ Current File: `synthesis/aspirin_synthesis.md`
638
+
639
+ Existing Knowledge Context:
640
+ ```json
641
+ {
642
+ "entities": [],
643
+ "relations": []
644
+ }
645
+ ```
646
+
647
+ File Content:
648
+ ```
649
+ # Aspirin Synthesis via Acetylation
650
+
651
+ ## Reaction
652
+ Salicylic acid + Acetic anhydride → Aspirin + Acetic acid
653
+
654
+ ## Procedure
655
+ 1. Heat 2.0g salicylic acid with 5.0mL acetic anhydride
656
+ 2. Add 3 drops concentrated H₂SO₄ as catalyst
657
+ 3. Heat at 85°C for 15 minutes
658
+ 4. Cool and add 20mL cold water to precipitate product
659
+ 5. Filter and recrystallize from ethanol
660
+
661
+ ## Yield
662
+ Theoretical yield: 2.61g (MW: 180.16 g/mol)
663
+ Actual yield: 2.31g (88.5% yield)
664
+ ```
665
+
666
+ Output:
667
+
668
+ ```json
669
+ {
670
+ "entities": [
671
+ {
672
+ "name": "aspirin_synthesis",
673
+ "entityType": "chemical_reaction",
674
+ "observations": ["Acetylation reaction", "88.5% yield achieved", "Requires heating at 85°C for 15 minutes"]
675
+ },
676
+ {
677
+ "name": "salicylic_acid",
678
+ "entityType": "chemical_compound",
679
+ "observations": ["Starting material", "2.0g used in synthesis", "Phenolic acid"]
680
+ },
681
+ {
682
+ "name": "acetic_anhydride",
683
+ "entityType": "chemical_reagent",
684
+ "observations": ["Acetylating agent", "5.0mL used", "Provides acetyl group"]
685
+ },
686
+ {
687
+ "name": "aspirin",
688
+ "entityType": "pharmaceutical_compound",
689
+ "observations": ["Product of synthesis", "MW: 180.16 g/mol", "Theoretical yield: 2.61g"]
690
+ },
691
+ {
692
+ "name": "sulfuric_acid_catalyst",
693
+ "entityType": "catalyst",
694
+ "observations": ["Concentrated H₂SO₄", "3 drops used", "Accelerates acetylation reaction"]
695
+ }
696
+ ],
697
+ "relations": [
698
+ {
699
+ "from": "salicylic_acid",
700
+ "to": "aspirin",
701
+ "relationType": ["converts_to", "reacts_to_form"]
702
+ },
703
+ {
704
+ "from": "acetic_anhydride",
705
+ "to": "aspirin",
706
+ "relationType": ["acetylates_to_form"]
707
+ },
708
+ {
709
+ "from": "sulfuric_acid_catalyst",
710
+ "to": "aspirin_synthesis",
711
+ "relationType": ["catalyzes", "accelerates"]
712
+ }
713
+ ]
714
+ }
715
+ ```
716
+
717
+ ### Example 7: Neuroscience Experiment
718
+
719
+ Input:
720
+
721
+ Current File: `experiments/neuron_recording.py`
722
+
723
+ Existing Knowledge Context:
724
+ ```json
725
+ {
726
+ "entities": [
727
+ {
728
+ "name": "action_potential",
729
+ "entityType": "electrical_signal",
730
+ "observations": ["Neural spike", "All-or-nothing electrical event"]
731
+ }
732
+ ],
733
+ "relations": []
734
+ }
735
+ ```
736
+
737
+ File Content:
738
+ ```
739
+ import numpy as np
740
+ from scipy import signal
741
+
742
+ def detect_spikes(voltage_trace, threshold=-30, sample_rate=20000):
743
+ """
744
+ Detect action potentials in intracellular voltage recordings.
745
+
746
+ Args:
747
+ voltage_trace: Voltage data in mV
748
+ threshold: Spike detection threshold in mV
749
+ sample_rate: Sampling frequency in Hz
750
+ """
751
+ # High-pass filter to remove slow drift
752
+ b, a = signal.butter(4, 300/(sample_rate/2), 'high')
753
+ filtered = signal.filtfilt(b, a, voltage_trace)
754
+
755
+ # Find threshold crossings
756
+ crossings = np.where(np.diff(np.sign(filtered - threshold)))[0]
757
+
758
+ # Extract spike waveforms (1ms windows)
759
+ window_size = int(sample_rate * 0.001)
760
+ spikes = []
761
+
762
+ for crossing in crossings:
763
+ if crossing > window_size and crossing < len(filtered) - window_size:
764
+ spike = filtered[crossing-window_size:crossing+window_size]
765
+ spikes.append(spike)
766
+
767
+ return np.array(spikes), crossings
768
+
769
+ # Analysis parameters
770
+ RESTING_POTENTIAL = -70 # mV
771
+ SPIKE_THRESHOLD = -30 # mV
772
+ REFRACTORY_PERIOD = 2 # ms
773
+ ```
774
+
775
+ Output:
776
+
777
+ ```json
778
+ {
779
+ "entities": [
780
+ {
781
+ "name": "spike_detection_algorithm",
782
+ "entityType": "signal_processing_method",
783
+ "observations": ["Detects action potentials in voltage recordings", "Uses threshold crossing at -30mV", "Applies high-pass filtering at 300Hz"]
784
+ },
785
+ {
786
+ "name": "action_potential",
787
+ "entityType": "electrical_signal",
788
+ "observations": ["Neural spike", "All-or-nothing electrical event", "Detected above -30mV threshold", "Extracted in 1ms windows"]
789
+ },
790
+ {
791
+ "name": "intracellular_recording",
792
+ "entityType": "experimental_technique",
793
+ "observations": ["Voltage measurements from inside neurons", "20kHz sampling rate", "Measures membrane potential"]
794
+ },
795
+ {
796
+ "name": "high_pass_filter",
797
+ "entityType": "signal_filter",
798
+ "observations": ["4th order Butterworth filter", "300Hz cutoff frequency", "Removes slow voltage drift"]
799
+ },
800
+ {
801
+ "name": "resting_potential",
802
+ "entityType": "physiological_parameter",
803
+ "observations": ["Baseline membrane voltage", "Set at -70mV", "Stable state between spikes"]
804
+ }
805
+ ],
806
+ "relations": [
807
+ {
808
+ "from": "spike_detection_algorithm",
809
+ "to": "action_potential",
810
+ "relationType": ["detects", "identifies"]
811
+ },
812
+ {
813
+ "from": "high_pass_filter",
814
+ "to": "spike_detection_algorithm",
815
+ "relationType": ["preprocesses_for"]
816
+ },
817
+ {
818
+ "from": "intracellular_recording",
819
+ "to": "action_potential",
820
+ "relationType": ["records", "measures"]
821
+ },
822
+ {
823
+ "from": "resting_potential",
824
+ "to": "action_potential",
825
+ "relationType": ["baseline_for"]
826
+ }
827
+ ]
828
+ }
829
+ ```
830
+
831
+ ### Example 8: Edge Case - Malformed Content
832
+
833
+ Input:
834
+
835
+ Current File: `corrupted.txt`
836
+
837
+ Existing Knowledge Context:
838
+ ```json
839
+ {
840
+ "entities": [],
841
+ "relations": []
842
+ }
843
+ ```
844
+
845
+ File Content:
846
+ ```
847
+ X H qrewf __TEXT __text eeee 0 n 0 __stubs __TEXT 22e4e __TEXT 8 __cstring afdsaa __unwind_info __TEXT H __DATA_CONST __got adsf __DATA __la_symbol_ptr __DATA __data __DATA H __LINKEDIT 0 8 X 0 8 X P usr lib dyld D 3 XK U 2 0 8 d usr lib libSystem B dylib UH H E H u H H 5 O H E E 6 M H H 1 A A A bA L aA AS 9 h h h h s
848
+ ```
849
+
850
+ Output:
851
+
852
+ ```json
853
+ {
854
+ "entities": [],
855
+ "relations": []
856
+ }
857
+ ```
858
+
859
+ ## FINAL REMINDER
860
+
861
+ Your success is measured by the **meaningfulness and accuracy** of extracted knowledge. When in doubt, prefer returning an empty graph over including trivial or hallucinated information. Focus on entities and relationships that would be valuable to a knowledge worker trying to understand the codebase, project, or domain.