@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Source Code Processor
3
+ *
4
+ * Processes source code files for 50+ programming languages.
5
+ * Uses extension-based detection as primary method (more reliable than MIME types for code).
6
+ *
7
+ * Key features:
8
+ * - Supports 50+ programming languages via extension detection
9
+ * - Handles exact filename matches (Dockerfile, Makefile, etc.)
10
+ * - Line count truncation to prevent token overflow
11
+ * - Language detection for syntax highlighting metadata
12
+ *
13
+ * Priority: 120 (lower priority - text-based content, processed after binary/document formats)
14
+ *
15
+ * @module processors/code/SourceCodeProcessor
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * import { sourceCodeProcessor, processSourceCode, isSourceCodeFile } from "./code/index.js";
20
+ *
21
+ * // Check if a file is source code
22
+ * if (isSourceCodeFile("text/plain", "app.ts")) {
23
+ * const result = await processSourceCode({
24
+ * id: "file-123",
25
+ * name: "app.ts",
26
+ * mimetype: "text/plain",
27
+ * size: 1024,
28
+ * buffer: codeBuffer,
29
+ * });
30
+ *
31
+ * if (result.success) {
32
+ * console.log(`Language: ${result.data.language}`);
33
+ * console.log(`Lines: ${result.data.lineCount}`);
34
+ * }
35
+ * }
36
+ * ```
37
+ */
38
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
39
+ import type { FileInfo, FileProcessingResult, ProcessOptions } from "../base/types.js";
40
+ import { detectLanguageFromFilename } from "../config/languageMap.js";
41
+ export type { ProcessedSourceCode } from "../base/types.js";
42
+ import type { ProcessedSourceCode } from "../base/types.js";
43
+ /**
44
+ * Source Code Processor - handles 50+ programming languages.
45
+ *
46
+ * Uses extension-based detection as the primary method since MIME types
47
+ * for source code are often unreliable (many are just "text/plain").
48
+ *
49
+ * Priority: 120 (lower priority than binary/document formats)
50
+ *
51
+ * @example
52
+ * ```typescript
53
+ * const processor = new SourceCodeProcessor();
54
+ *
55
+ * const result = await processor.processFile({
56
+ * id: "file-123",
57
+ * name: "main.py",
58
+ * mimetype: "text/plain",
59
+ * size: 2048,
60
+ * buffer: pythonCodeBuffer,
61
+ * });
62
+ *
63
+ * if (result.success) {
64
+ * console.log(`Language: ${result.data.language}`); // "Python"
65
+ * }
66
+ * ```
67
+ */
68
+ export declare class SourceCodeProcessor extends BaseFileProcessor<ProcessedSourceCode> {
69
+ /**
70
+ * Supported file extensions for source code.
71
+ * Includes 50+ extensions covering all major programming languages.
72
+ */
73
+ private static readonly supportedExtensions;
74
+ /**
75
+ * Common MIME types for source code files.
76
+ * Note: Extension-based detection is preferred as MIME types are often unreliable.
77
+ */
78
+ private static readonly supportedMimeTypes;
79
+ constructor();
80
+ /**
81
+ * Override to use extension-based detection as primary method.
82
+ * Source code MIME types are often unreliable (e.g., "text/plain" for .ts files),
83
+ * so we check extensions first.
84
+ *
85
+ * Also handles exact filename matches for special files like Dockerfile, Makefile.
86
+ *
87
+ * @param mimetype - MIME type of the file (often unreliable for source code)
88
+ * @param filename - Filename for extension-based detection
89
+ * @returns true if the file is a supported source code file
90
+ */
91
+ isFileSupported(mimetype: string, filename: string): boolean;
92
+ /**
93
+ * Build the processed source code result.
94
+ * Decodes the buffer as UTF-8, detects language, and truncates if needed.
95
+ *
96
+ * @param buffer - Raw file content
97
+ * @param fileInfo - Original file information
98
+ * @returns Processed source code with metadata
99
+ */
100
+ protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedSourceCode;
101
+ /**
102
+ * Extract file extension from filename.
103
+ *
104
+ * @param filename - Filename to extract extension from
105
+ * @returns Extension with leading dot (e.g., ".ts") or null if no extension
106
+ */
107
+ private getExtension;
108
+ }
109
+ /**
110
+ * Singleton instance of the SourceCodeProcessor.
111
+ * Use this for all source code processing to share configuration.
112
+ */
113
+ export declare const sourceCodeProcessor: SourceCodeProcessor;
114
+ /**
115
+ * Check if a file is a source code file.
116
+ *
117
+ * @param mimetype - MIME type of the file
118
+ * @param filename - Filename for extension-based detection
119
+ * @returns true if the file is a supported source code file
120
+ *
121
+ * @example
122
+ * ```typescript
123
+ * if (isSourceCodeFile("text/plain", "app.ts")) {
124
+ * console.log("This is a TypeScript file");
125
+ * }
126
+ * ```
127
+ */
128
+ export declare function isSourceCodeFile(mimetype: string, filename: string): boolean;
129
+ /**
130
+ * Validate source code file size against configured limit.
131
+ *
132
+ * @param sizeBytes - File size in bytes
133
+ * @returns true if the file size is within limits
134
+ */
135
+ export declare function validateSourceCodeSize(sizeBytes: number): boolean;
136
+ /**
137
+ * Process a source code file.
138
+ *
139
+ * @param fileInfo - File information (can include URL or buffer)
140
+ * @param options - Optional processing options
141
+ * @returns Processing result with success flag and either data or error
142
+ *
143
+ * @example
144
+ * ```typescript
145
+ * const result = await processSourceCode({
146
+ * id: "file-123",
147
+ * name: "main.py",
148
+ * mimetype: "text/plain",
149
+ * size: 2048,
150
+ * buffer: pythonCodeBuffer,
151
+ * });
152
+ *
153
+ * if (result.success) {
154
+ * console.log(`Detected language: ${result.data.language}`);
155
+ * console.log(`Line count: ${result.data.lineCount}`);
156
+ * console.log(`Truncated: ${result.data.truncated}`);
157
+ * }
158
+ * ```
159
+ */
160
+ export declare function processSourceCode(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedSourceCode>>;
161
+ /**
162
+ * Alias for backward compatibility with Curator codebase.
163
+ * Detects programming language from a filename.
164
+ *
165
+ * @param filename - The filename to detect language from
166
+ * @returns The detected language name or 'Unknown'
167
+ *
168
+ * @example
169
+ * ```typescript
170
+ * detectLanguage("app.ts") // Returns "TypeScript"
171
+ * detectLanguage("Dockerfile") // Returns "Dockerfile"
172
+ * ```
173
+ */
174
+ export declare const detectLanguage: typeof detectLanguageFromFilename;
@@ -0,0 +1,304 @@
1
+ /**
2
+ * Source Code Processor
3
+ *
4
+ * Processes source code files for 50+ programming languages.
5
+ * Uses extension-based detection as primary method (more reliable than MIME types for code).
6
+ *
7
+ * Key features:
8
+ * - Supports 50+ programming languages via extension detection
9
+ * - Handles exact filename matches (Dockerfile, Makefile, etc.)
10
+ * - Line count truncation to prevent token overflow
11
+ * - Language detection for syntax highlighting metadata
12
+ *
13
+ * Priority: 120 (lower priority - text-based content, processed after binary/document formats)
14
+ *
15
+ * @module processors/code/SourceCodeProcessor
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * import { sourceCodeProcessor, processSourceCode, isSourceCodeFile } from "./code/index.js";
20
+ *
21
+ * // Check if a file is source code
22
+ * if (isSourceCodeFile("text/plain", "app.ts")) {
23
+ * const result = await processSourceCode({
24
+ * id: "file-123",
25
+ * name: "app.ts",
26
+ * mimetype: "text/plain",
27
+ * size: 1024,
28
+ * buffer: codeBuffer,
29
+ * });
30
+ *
31
+ * if (result.success) {
32
+ * console.log(`Language: ${result.data.language}`);
33
+ * console.log(`Lines: ${result.data.lineCount}`);
34
+ * }
35
+ * }
36
+ * ```
37
+ */
38
+ import { basename as pathBasename } from "node:path";
39
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
40
+ import { EXACT_FILENAME_MAP, SIZE_LIMITS, SOURCE_CODE_EXTENSIONS, } from "../config/index.js";
41
+ import { detectLanguageFromFilename } from "../config/languageMap.js";
42
+ // =============================================================================
43
+ // SOURCE CODE PROCESSOR
44
+ // =============================================================================
45
+ /**
46
+ * Source Code Processor - handles 50+ programming languages.
47
+ *
48
+ * Uses extension-based detection as the primary method since MIME types
49
+ * for source code are often unreliable (many are just "text/plain").
50
+ *
51
+ * Priority: 120 (lower priority than binary/document formats)
52
+ *
53
+ * @example
54
+ * ```typescript
55
+ * const processor = new SourceCodeProcessor();
56
+ *
57
+ * const result = await processor.processFile({
58
+ * id: "file-123",
59
+ * name: "main.py",
60
+ * mimetype: "text/plain",
61
+ * size: 2048,
62
+ * buffer: pythonCodeBuffer,
63
+ * });
64
+ *
65
+ * if (result.success) {
66
+ * console.log(`Language: ${result.data.language}`); // "Python"
67
+ * }
68
+ * ```
69
+ */
70
+ export class SourceCodeProcessor extends BaseFileProcessor {
71
+ /**
72
+ * Supported file extensions for source code.
73
+ * Includes 50+ extensions covering all major programming languages.
74
+ */
75
+ static supportedExtensions = [
76
+ ...SOURCE_CODE_EXTENSIONS,
77
+ ];
78
+ /**
79
+ * Common MIME types for source code files.
80
+ * Note: Extension-based detection is preferred as MIME types are often unreliable.
81
+ */
82
+ static supportedMimeTypes = [
83
+ "text/plain",
84
+ "text/x-python",
85
+ "text/javascript",
86
+ "text/typescript",
87
+ "application/javascript",
88
+ "application/typescript",
89
+ "application/x-javascript",
90
+ "text/x-java",
91
+ "text/x-java-source",
92
+ "text/x-c",
93
+ "text/x-csrc",
94
+ "text/x-c++",
95
+ "text/x-c++src",
96
+ "text/x-csharp",
97
+ "text/x-go",
98
+ "text/x-rust",
99
+ "text/x-ruby",
100
+ "text/x-php",
101
+ "text/x-sh",
102
+ "text/x-shellscript",
103
+ "application/x-sh",
104
+ "text/x-perl",
105
+ "text/x-lua",
106
+ "text/x-sql",
107
+ "text/x-swift",
108
+ "text/x-kotlin",
109
+ "text/x-scala",
110
+ "text/x-haskell",
111
+ "text/x-elixir",
112
+ "text/x-erlang",
113
+ "text/x-clojure",
114
+ "text/x-fsharp",
115
+ "text/x-ocaml",
116
+ "text/x-lisp",
117
+ "text/x-scheme",
118
+ "text/x-groovy",
119
+ "text/x-powershell",
120
+ "text/x-r",
121
+ "text/x-julia",
122
+ "text/x-nim",
123
+ "text/x-zig",
124
+ "text/x-dart",
125
+ "text/x-crystal",
126
+ "text/x-d",
127
+ "text/x-asm",
128
+ "text/x-fortran",
129
+ "text/x-cobol",
130
+ "text/x-pascal",
131
+ "text/x-ada",
132
+ "text/css",
133
+ "text/x-scss",
134
+ "text/x-sass",
135
+ "text/x-less",
136
+ "application/x-httpd-php",
137
+ ];
138
+ constructor() {
139
+ super({
140
+ maxSizeMB: SIZE_LIMITS.SOURCE_CODE_MAX_MB,
141
+ timeoutMs: 30000,
142
+ supportedMimeTypes: SourceCodeProcessor.supportedMimeTypes,
143
+ supportedExtensions: SourceCodeProcessor.supportedExtensions,
144
+ fileTypeName: "SourceCode",
145
+ defaultFilename: "code.txt",
146
+ });
147
+ }
148
+ /**
149
+ * Override to use extension-based detection as primary method.
150
+ * Source code MIME types are often unreliable (e.g., "text/plain" for .ts files),
151
+ * so we check extensions first.
152
+ *
153
+ * Also handles exact filename matches for special files like Dockerfile, Makefile.
154
+ *
155
+ * @param mimetype - MIME type of the file (often unreliable for source code)
156
+ * @param filename - Filename for extension-based detection
157
+ * @returns true if the file is a supported source code file
158
+ */
159
+ isFileSupported(mimetype, filename) {
160
+ if (!filename) {
161
+ return false;
162
+ }
163
+ // Check exact filename matches first (Dockerfile, Makefile, etc.)
164
+ if (EXACT_FILENAME_MAP[filename]) {
165
+ return true;
166
+ }
167
+ // Also check basename for exact matches (in case full path is passed)
168
+ const basename = pathBasename(filename);
169
+ if (EXACT_FILENAME_MAP[basename]) {
170
+ return true;
171
+ }
172
+ // Check by extension (more reliable for source code than MIME type)
173
+ const ext = this.getExtension(filename);
174
+ if (ext &&
175
+ SourceCodeProcessor.supportedExtensions.includes(ext.toLowerCase())) {
176
+ return true;
177
+ }
178
+ // Fall back to MIME type check
179
+ return super.isFileSupported(mimetype, filename);
180
+ }
181
+ /**
182
+ * Build the processed source code result.
183
+ * Decodes the buffer as UTF-8, detects language, and truncates if needed.
184
+ *
185
+ * @param buffer - Raw file content
186
+ * @param fileInfo - Original file information
187
+ * @returns Processed source code with metadata
188
+ */
189
+ buildProcessedResult(buffer, fileInfo) {
190
+ const content = buffer.toString("utf-8");
191
+ const lines = content.split("\n");
192
+ const originalLineCount = lines.length;
193
+ const language = detectLanguageFromFilename(fileInfo.name || "");
194
+ const maxLines = SIZE_LIMITS.MAX_SOURCE_CODE_LINES;
195
+ // Truncate if too many lines
196
+ let finalContent = content;
197
+ let truncated = false;
198
+ if (lines.length > maxLines) {
199
+ truncated = true;
200
+ finalContent = lines.slice(0, maxLines).join("\n");
201
+ finalContent += `\n\n// ... truncated at ${maxLines} lines, total ${originalLineCount} lines ...`;
202
+ }
203
+ return {
204
+ content: finalContent,
205
+ language,
206
+ lineCount: Math.min(lines.length, maxLines),
207
+ truncated,
208
+ encoding: "utf-8",
209
+ buffer,
210
+ mimetype: fileInfo.mimetype || "text/plain",
211
+ size: fileInfo.size,
212
+ filename: this.getFilename(fileInfo),
213
+ };
214
+ }
215
+ /**
216
+ * Extract file extension from filename.
217
+ *
218
+ * @param filename - Filename to extract extension from
219
+ * @returns Extension with leading dot (e.g., ".ts") or null if no extension
220
+ */
221
+ getExtension(filename) {
222
+ const match = filename.toLowerCase().match(/\.[^.]+$/);
223
+ return match ? match[0] : null;
224
+ }
225
+ }
226
+ // =============================================================================
227
+ // SINGLETON INSTANCE
228
+ // =============================================================================
229
+ /**
230
+ * Singleton instance of the SourceCodeProcessor.
231
+ * Use this for all source code processing to share configuration.
232
+ */
233
+ export const sourceCodeProcessor = new SourceCodeProcessor();
234
+ // =============================================================================
235
+ // HELPER FUNCTIONS
236
+ // =============================================================================
237
+ /**
238
+ * Check if a file is a source code file.
239
+ *
240
+ * @param mimetype - MIME type of the file
241
+ * @param filename - Filename for extension-based detection
242
+ * @returns true if the file is a supported source code file
243
+ *
244
+ * @example
245
+ * ```typescript
246
+ * if (isSourceCodeFile("text/plain", "app.ts")) {
247
+ * console.log("This is a TypeScript file");
248
+ * }
249
+ * ```
250
+ */
251
+ export function isSourceCodeFile(mimetype, filename) {
252
+ return sourceCodeProcessor.isFileSupported(mimetype, filename);
253
+ }
254
+ /**
255
+ * Validate source code file size against configured limit.
256
+ *
257
+ * @param sizeBytes - File size in bytes
258
+ * @returns true if the file size is within limits
259
+ */
260
+ export function validateSourceCodeSize(sizeBytes) {
261
+ const maxBytes = SIZE_LIMITS.SOURCE_CODE_MAX_MB * 1024 * 1024;
262
+ return sizeBytes <= maxBytes;
263
+ }
264
+ /**
265
+ * Process a source code file.
266
+ *
267
+ * @param fileInfo - File information (can include URL or buffer)
268
+ * @param options - Optional processing options
269
+ * @returns Processing result with success flag and either data or error
270
+ *
271
+ * @example
272
+ * ```typescript
273
+ * const result = await processSourceCode({
274
+ * id: "file-123",
275
+ * name: "main.py",
276
+ * mimetype: "text/plain",
277
+ * size: 2048,
278
+ * buffer: pythonCodeBuffer,
279
+ * });
280
+ *
281
+ * if (result.success) {
282
+ * console.log(`Detected language: ${result.data.language}`);
283
+ * console.log(`Line count: ${result.data.lineCount}`);
284
+ * console.log(`Truncated: ${result.data.truncated}`);
285
+ * }
286
+ * ```
287
+ */
288
+ export async function processSourceCode(fileInfo, options) {
289
+ return sourceCodeProcessor.processFile(fileInfo, options);
290
+ }
291
+ /**
292
+ * Alias for backward compatibility with Curator codebase.
293
+ * Detects programming language from a filename.
294
+ *
295
+ * @param filename - The filename to detect language from
296
+ * @returns The detected language name or 'Unknown'
297
+ *
298
+ * @example
299
+ * ```typescript
300
+ * detectLanguage("app.ts") // Returns "TypeScript"
301
+ * detectLanguage("Dockerfile") // Returns "Dockerfile"
302
+ * ```
303
+ */
304
+ export const detectLanguage = detectLanguageFromFilename;
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Code Processors Module
3
+ *
4
+ * Provides file processors for source code files across 50+ programming languages.
5
+ * Uses extension-based detection as primary method for reliable identification.
6
+ *
7
+ * @module processors/code
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * import {
12
+ * // Processor class and singleton
13
+ * SourceCodeProcessor,
14
+ * sourceCodeProcessor,
15
+ *
16
+ * // Helper functions
17
+ * isSourceCodeFile,
18
+ * processSourceCode,
19
+ * validateSourceCodeSize,
20
+ * detectLanguage,
21
+ *
22
+ * // Types
23
+ * type ProcessedSourceCode,
24
+ * } from "./code/index.js";
25
+ *
26
+ * // Check if a file is source code
27
+ * if (isSourceCodeFile("text/plain", "main.py")) {
28
+ * const result = await processSourceCode({
29
+ * id: "file-123",
30
+ * name: "main.py",
31
+ * mimetype: "text/plain",
32
+ * size: 1024,
33
+ * buffer: codeBuffer,
34
+ * });
35
+ *
36
+ * if (result.success) {
37
+ * console.log(`Language: ${result.data.language}`); // "Python"
38
+ * console.log(`Lines: ${result.data.lineCount}`);
39
+ * }
40
+ * }
41
+ * ```
42
+ */
43
+ export { detectLanguage, isSourceCodeFile, type ProcessedSourceCode, processSourceCode, SourceCodeProcessor, sourceCodeProcessor, validateSourceCodeSize, } from "./SourceCodeProcessor.js";
44
+ export { ConfigProcessor, configProcessor, isConfigFile, type ProcessedConfig, processConfig, } from "./ConfigProcessor.js";
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Code Processors Module
3
+ *
4
+ * Provides file processors for source code files across 50+ programming languages.
5
+ * Uses extension-based detection as primary method for reliable identification.
6
+ *
7
+ * @module processors/code
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * import {
12
+ * // Processor class and singleton
13
+ * SourceCodeProcessor,
14
+ * sourceCodeProcessor,
15
+ *
16
+ * // Helper functions
17
+ * isSourceCodeFile,
18
+ * processSourceCode,
19
+ * validateSourceCodeSize,
20
+ * detectLanguage,
21
+ *
22
+ * // Types
23
+ * type ProcessedSourceCode,
24
+ * } from "./code/index.js";
25
+ *
26
+ * // Check if a file is source code
27
+ * if (isSourceCodeFile("text/plain", "main.py")) {
28
+ * const result = await processSourceCode({
29
+ * id: "file-123",
30
+ * name: "main.py",
31
+ * mimetype: "text/plain",
32
+ * size: 1024,
33
+ * buffer: codeBuffer,
34
+ * });
35
+ *
36
+ * if (result.success) {
37
+ * console.log(`Language: ${result.data.language}`); // "Python"
38
+ * console.log(`Lines: ${result.data.lineCount}`);
39
+ * }
40
+ * }
41
+ * ```
42
+ */
43
+ // =============================================================================
44
+ // SOURCE CODE PROCESSOR
45
+ // =============================================================================
46
+ export {
47
+ // Helper functions
48
+ detectLanguage, isSourceCodeFile, processSourceCode,
49
+ // Processor class and singleton
50
+ SourceCodeProcessor, sourceCodeProcessor, validateSourceCodeSize, } from "./SourceCodeProcessor.js";
51
+ // =============================================================================
52
+ // CONFIG PROCESSOR
53
+ // =============================================================================
54
+ export {
55
+ // Processor class
56
+ ConfigProcessor,
57
+ // Singleton instance
58
+ configProcessor,
59
+ // Helper functions
60
+ isConfigFile, processConfig, } from "./ConfigProcessor.js";