@framers/agentos 0.1.120 → 0.1.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (405) hide show
  1. package/README.md +21 -0
  2. package/dist/api/agency.d.ts.map +1 -1
  3. package/dist/api/agency.js +227 -84
  4. package/dist/api/agency.js.map +1 -1
  5. package/dist/api/analyzeVideo.d.ts +127 -0
  6. package/dist/api/analyzeVideo.d.ts.map +1 -0
  7. package/dist/api/analyzeVideo.js +136 -0
  8. package/dist/api/analyzeVideo.js.map +1 -0
  9. package/dist/api/detectScenes.d.ts +82 -0
  10. package/dist/api/detectScenes.d.ts.map +1 -0
  11. package/dist/api/detectScenes.js +67 -0
  12. package/dist/api/detectScenes.js.map +1 -0
  13. package/dist/api/generateImage.d.ts +7 -0
  14. package/dist/api/generateImage.d.ts.map +1 -1
  15. package/dist/api/generateImage.js +133 -9
  16. package/dist/api/generateImage.js.map +1 -1
  17. package/dist/api/generateMusic.d.ts +98 -0
  18. package/dist/api/generateMusic.d.ts.map +1 -0
  19. package/dist/api/generateMusic.js +319 -0
  20. package/dist/api/generateMusic.js.map +1 -0
  21. package/dist/api/generateSFX.d.ts +96 -0
  22. package/dist/api/generateSFX.d.ts.map +1 -0
  23. package/dist/api/generateSFX.js +317 -0
  24. package/dist/api/generateSFX.js.map +1 -0
  25. package/dist/api/generateVideo.d.ts +113 -0
  26. package/dist/api/generateVideo.d.ts.map +1 -0
  27. package/dist/api/generateVideo.js +342 -0
  28. package/dist/api/generateVideo.js.map +1 -0
  29. package/dist/api/model.d.ts.map +1 -1
  30. package/dist/api/model.js +8 -4
  31. package/dist/api/model.js.map +1 -1
  32. package/dist/api/performOCR.d.ts +169 -0
  33. package/dist/api/performOCR.d.ts.map +1 -0
  34. package/dist/api/performOCR.js +198 -0
  35. package/dist/api/performOCR.js.map +1 -0
  36. package/dist/api/provider-defaults.d.ts +7 -5
  37. package/dist/api/provider-defaults.d.ts.map +1 -1
  38. package/dist/api/provider-defaults.js +32 -10
  39. package/dist/api/provider-defaults.js.map +1 -1
  40. package/dist/api/strategies/debate.d.ts.map +1 -1
  41. package/dist/api/strategies/debate.js +1 -0
  42. package/dist/api/strategies/debate.js.map +1 -1
  43. package/dist/api/strategies/graph.d.ts.map +1 -1
  44. package/dist/api/strategies/graph.js +69 -13
  45. package/dist/api/strategies/graph.js.map +1 -1
  46. package/dist/api/strategies/hierarchical.d.ts.map +1 -1
  47. package/dist/api/strategies/hierarchical.js +1 -0
  48. package/dist/api/strategies/hierarchical.js.map +1 -1
  49. package/dist/api/strategies/parallel.d.ts.map +1 -1
  50. package/dist/api/strategies/parallel.js +1 -0
  51. package/dist/api/strategies/parallel.js.map +1 -1
  52. package/dist/api/strategies/review-loop.d.ts.map +1 -1
  53. package/dist/api/strategies/review-loop.js +1 -0
  54. package/dist/api/strategies/review-loop.js.map +1 -1
  55. package/dist/api/strategies/sequential.d.ts.map +1 -1
  56. package/dist/api/strategies/sequential.js +54 -48
  57. package/dist/api/strategies/sequential.js.map +1 -1
  58. package/dist/api/streamBuffer.d.ts +20 -0
  59. package/dist/api/streamBuffer.d.ts.map +1 -0
  60. package/dist/api/streamBuffer.js +81 -0
  61. package/dist/api/streamBuffer.js.map +1 -0
  62. package/dist/api/types.d.ts +145 -5
  63. package/dist/api/types.d.ts.map +1 -1
  64. package/dist/api/types.js.map +1 -1
  65. package/dist/channels/adapters/RedditChannelAdapter.js.map +1 -1
  66. package/dist/core/audio/AudioProcessor.d.ts.map +1 -1
  67. package/dist/core/audio/AudioProcessor.js +1 -0
  68. package/dist/core/audio/AudioProcessor.js.map +1 -1
  69. package/dist/core/audio/EnvironmentalCalibrator.d.ts.map +1 -1
  70. package/dist/core/audio/EnvironmentalCalibrator.js +1 -0
  71. package/dist/core/audio/EnvironmentalCalibrator.js.map +1 -1
  72. package/dist/core/audio/FallbackAudioProxy.d.ts +169 -0
  73. package/dist/core/audio/FallbackAudioProxy.d.ts.map +1 -0
  74. package/dist/core/audio/FallbackAudioProxy.js +236 -0
  75. package/dist/core/audio/FallbackAudioProxy.js.map +1 -0
  76. package/dist/core/audio/IAudioGenerator.d.ts +103 -0
  77. package/dist/core/audio/IAudioGenerator.d.ts.map +1 -0
  78. package/dist/core/audio/IAudioGenerator.js +24 -0
  79. package/dist/core/audio/IAudioGenerator.js.map +1 -0
  80. package/dist/core/audio/index.d.ts +54 -0
  81. package/dist/core/audio/index.d.ts.map +1 -1
  82. package/dist/core/audio/index.js +93 -0
  83. package/dist/core/audio/index.js.map +1 -1
  84. package/dist/core/audio/providers/AudioGenLocalProvider.d.ts +136 -0
  85. package/dist/core/audio/providers/AudioGenLocalProvider.d.ts.map +1 -0
  86. package/dist/core/audio/providers/AudioGenLocalProvider.js +235 -0
  87. package/dist/core/audio/providers/AudioGenLocalProvider.js.map +1 -0
  88. package/dist/core/audio/providers/ElevenLabsSFXProvider.d.ts +107 -0
  89. package/dist/core/audio/providers/ElevenLabsSFXProvider.d.ts.map +1 -0
  90. package/dist/core/audio/providers/ElevenLabsSFXProvider.js +154 -0
  91. package/dist/core/audio/providers/ElevenLabsSFXProvider.js.map +1 -0
  92. package/dist/core/audio/providers/FalAudioProvider.d.ts +207 -0
  93. package/dist/core/audio/providers/FalAudioProvider.d.ts.map +1 -0
  94. package/dist/core/audio/providers/FalAudioProvider.js +315 -0
  95. package/dist/core/audio/providers/FalAudioProvider.js.map +1 -0
  96. package/dist/core/audio/providers/MusicGenLocalProvider.d.ts +136 -0
  97. package/dist/core/audio/providers/MusicGenLocalProvider.d.ts.map +1 -0
  98. package/dist/core/audio/providers/MusicGenLocalProvider.js +235 -0
  99. package/dist/core/audio/providers/MusicGenLocalProvider.js.map +1 -0
  100. package/dist/core/audio/providers/ReplicateAudioProvider.d.ts +200 -0
  101. package/dist/core/audio/providers/ReplicateAudioProvider.d.ts.map +1 -0
  102. package/dist/core/audio/providers/ReplicateAudioProvider.js +346 -0
  103. package/dist/core/audio/providers/ReplicateAudioProvider.js.map +1 -0
  104. package/dist/core/audio/providers/StableAudioProvider.d.ts +138 -0
  105. package/dist/core/audio/providers/StableAudioProvider.d.ts.map +1 -0
  106. package/dist/core/audio/providers/StableAudioProvider.js +192 -0
  107. package/dist/core/audio/providers/StableAudioProvider.js.map +1 -0
  108. package/dist/core/audio/providers/SunoProvider.d.ts +182 -0
  109. package/dist/core/audio/providers/SunoProvider.d.ts.map +1 -0
  110. package/dist/core/audio/providers/SunoProvider.js +312 -0
  111. package/dist/core/audio/providers/SunoProvider.js.map +1 -0
  112. package/dist/core/audio/providers/UdioProvider.d.ts +177 -0
  113. package/dist/core/audio/providers/UdioProvider.d.ts.map +1 -0
  114. package/dist/core/audio/providers/UdioProvider.js +305 -0
  115. package/dist/core/audio/providers/UdioProvider.js.map +1 -0
  116. package/dist/core/audio/types.d.ts +257 -0
  117. package/dist/core/audio/types.d.ts.map +1 -0
  118. package/dist/core/audio/types.js +21 -0
  119. package/dist/core/audio/types.js.map +1 -0
  120. package/dist/core/images/FallbackImageProxy.d.ts +183 -0
  121. package/dist/core/images/FallbackImageProxy.d.ts.map +1 -0
  122. package/dist/core/images/FallbackImageProxy.js +283 -0
  123. package/dist/core/images/FallbackImageProxy.js.map +1 -0
  124. package/dist/core/images/IImageProvider.d.ts +1 -1
  125. package/dist/core/images/IImageProvider.d.ts.map +1 -1
  126. package/dist/core/images/index.d.ts +1 -0
  127. package/dist/core/images/index.d.ts.map +1 -1
  128. package/dist/core/images/index.js +1 -0
  129. package/dist/core/images/index.js.map +1 -1
  130. package/dist/core/llm/providers/AIModelProviderManager.d.ts +3 -1
  131. package/dist/core/llm/providers/AIModelProviderManager.d.ts.map +1 -1
  132. package/dist/core/llm/providers/AIModelProviderManager.js +8 -0
  133. package/dist/core/llm/providers/AIModelProviderManager.js.map +1 -1
  134. package/dist/core/llm/providers/errors/ClaudeCodeProviderError.d.ts +52 -0
  135. package/dist/core/llm/providers/errors/ClaudeCodeProviderError.d.ts.map +1 -0
  136. package/dist/core/llm/providers/errors/ClaudeCodeProviderError.js +36 -0
  137. package/dist/core/llm/providers/errors/ClaudeCodeProviderError.js.map +1 -0
  138. package/dist/core/llm/providers/errors/GeminiCLIProviderError.d.ts +32 -0
  139. package/dist/core/llm/providers/errors/GeminiCLIProviderError.d.ts.map +1 -0
  140. package/dist/core/llm/providers/errors/GeminiCLIProviderError.js +27 -0
  141. package/dist/core/llm/providers/errors/GeminiCLIProviderError.js.map +1 -0
  142. package/dist/core/llm/providers/implementations/ClaudeCodeCLIBridge.d.ts +38 -0
  143. package/dist/core/llm/providers/implementations/ClaudeCodeCLIBridge.d.ts.map +1 -0
  144. package/dist/core/llm/providers/implementations/ClaudeCodeCLIBridge.js +128 -0
  145. package/dist/core/llm/providers/implementations/ClaudeCodeCLIBridge.js.map +1 -0
  146. package/dist/core/llm/providers/implementations/ClaudeCodeProvider.d.ts +107 -0
  147. package/dist/core/llm/providers/implementations/ClaudeCodeProvider.d.ts.map +1 -0
  148. package/dist/core/llm/providers/implementations/ClaudeCodeProvider.js +504 -0
  149. package/dist/core/llm/providers/implementations/ClaudeCodeProvider.js.map +1 -0
  150. package/dist/core/llm/providers/implementations/GeminiCLIBridge.d.ts +60 -0
  151. package/dist/core/llm/providers/implementations/GeminiCLIBridge.d.ts.map +1 -0
  152. package/dist/core/llm/providers/implementations/GeminiCLIBridge.js +177 -0
  153. package/dist/core/llm/providers/implementations/GeminiCLIBridge.js.map +1 -0
  154. package/dist/core/llm/providers/implementations/GeminiCLIProvider.d.ts +55 -0
  155. package/dist/core/llm/providers/implementations/GeminiCLIProvider.d.ts.map +1 -0
  156. package/dist/core/llm/providers/implementations/GeminiCLIProvider.js +447 -0
  157. package/dist/core/llm/providers/implementations/GeminiCLIProvider.js.map +1 -0
  158. package/dist/core/media/ProviderPreferences.d.ts +158 -0
  159. package/dist/core/media/ProviderPreferences.d.ts.map +1 -0
  160. package/dist/core/media/ProviderPreferences.js +183 -0
  161. package/dist/core/media/ProviderPreferences.js.map +1 -0
  162. package/dist/core/subprocess/CLIRegistry.d.ts +71 -0
  163. package/dist/core/subprocess/CLIRegistry.d.ts.map +1 -0
  164. package/dist/core/subprocess/CLIRegistry.js +210 -0
  165. package/dist/core/subprocess/CLIRegistry.js.map +1 -0
  166. package/dist/core/subprocess/CLISubprocessBridge.d.ts +117 -0
  167. package/dist/core/subprocess/CLISubprocessBridge.d.ts.map +1 -0
  168. package/dist/core/subprocess/CLISubprocessBridge.js +199 -0
  169. package/dist/core/subprocess/CLISubprocessBridge.js.map +1 -0
  170. package/dist/core/subprocess/errors.d.ts +76 -0
  171. package/dist/core/subprocess/errors.d.ts.map +1 -0
  172. package/dist/core/subprocess/errors.js +75 -0
  173. package/dist/core/subprocess/errors.js.map +1 -0
  174. package/dist/core/subprocess/index.d.ts +11 -0
  175. package/dist/core/subprocess/index.d.ts.map +1 -0
  176. package/dist/core/subprocess/index.js +10 -0
  177. package/dist/core/subprocess/index.js.map +1 -0
  178. package/dist/core/subprocess/types.d.ts +100 -0
  179. package/dist/core/subprocess/types.d.ts.map +1 -0
  180. package/dist/core/subprocess/types.js +9 -0
  181. package/dist/core/subprocess/types.js.map +1 -0
  182. package/dist/core/video/FallbackVideoProxy.d.ts +166 -0
  183. package/dist/core/video/FallbackVideoProxy.d.ts.map +1 -0
  184. package/dist/core/video/FallbackVideoProxy.js +228 -0
  185. package/dist/core/video/FallbackVideoProxy.js.map +1 -0
  186. package/dist/core/video/IVideoAnalyzer.d.ts +29 -0
  187. package/dist/core/video/IVideoAnalyzer.d.ts.map +1 -0
  188. package/dist/core/video/IVideoAnalyzer.js +12 -0
  189. package/dist/core/video/IVideoAnalyzer.js.map +1 -0
  190. package/dist/core/video/IVideoGenerator.d.ts +76 -0
  191. package/dist/core/video/IVideoGenerator.d.ts.map +1 -0
  192. package/dist/core/video/IVideoGenerator.js +13 -0
  193. package/dist/core/video/IVideoGenerator.js.map +1 -0
  194. package/dist/core/video/VideoAnalyzer.d.ts +278 -0
  195. package/dist/core/video/VideoAnalyzer.d.ts.map +1 -0
  196. package/dist/core/video/VideoAnalyzer.js +648 -0
  197. package/dist/core/video/VideoAnalyzer.js.map +1 -0
  198. package/dist/core/video/index.d.ts +55 -0
  199. package/dist/core/video/index.d.ts.map +1 -0
  200. package/dist/core/video/index.js +78 -0
  201. package/dist/core/video/index.js.map +1 -0
  202. package/dist/core/video/providers/FalVideoProvider.d.ts +195 -0
  203. package/dist/core/video/providers/FalVideoProvider.d.ts.map +1 -0
  204. package/dist/core/video/providers/FalVideoProvider.js +322 -0
  205. package/dist/core/video/providers/FalVideoProvider.js.map +1 -0
  206. package/dist/core/video/providers/ReplicateVideoProvider.d.ts +194 -0
  207. package/dist/core/video/providers/ReplicateVideoProvider.d.ts.map +1 -0
  208. package/dist/core/video/providers/ReplicateVideoProvider.js +356 -0
  209. package/dist/core/video/providers/ReplicateVideoProvider.js.map +1 -0
  210. package/dist/core/video/providers/RunwayVideoProvider.d.ts +175 -0
  211. package/dist/core/video/providers/RunwayVideoProvider.d.ts.map +1 -0
  212. package/dist/core/video/providers/RunwayVideoProvider.js +293 -0
  213. package/dist/core/video/providers/RunwayVideoProvider.js.map +1 -0
  214. package/dist/core/video/types.d.ts +441 -0
  215. package/dist/core/video/types.d.ts.map +1 -0
  216. package/dist/core/video/types.js +10 -0
  217. package/dist/core/video/types.js.map +1 -0
  218. package/dist/core/vision/SceneDetector.d.ts +180 -0
  219. package/dist/core/vision/SceneDetector.d.ts.map +1 -0
  220. package/dist/core/vision/SceneDetector.js +366 -0
  221. package/dist/core/vision/SceneDetector.js.map +1 -0
  222. package/dist/core/vision/index.d.ts +2 -1
  223. package/dist/core/vision/index.d.ts.map +1 -1
  224. package/dist/core/vision/index.js +1 -0
  225. package/dist/core/vision/index.js.map +1 -1
  226. package/dist/core/vision/types.d.ts +125 -0
  227. package/dist/core/vision/types.d.ts.map +1 -1
  228. package/dist/discovery/CapabilityDiscoveryEngine.d.ts +32 -0
  229. package/dist/discovery/CapabilityDiscoveryEngine.d.ts.map +1 -1
  230. package/dist/discovery/CapabilityDiscoveryEngine.js +46 -0
  231. package/dist/discovery/CapabilityDiscoveryEngine.js.map +1 -1
  232. package/dist/extensions/MultiRegistryLoader.js.map +1 -1
  233. package/dist/index.d.ts +17 -2
  234. package/dist/index.d.ts.map +1 -1
  235. package/dist/index.js +12 -0
  236. package/dist/index.js.map +1 -1
  237. package/dist/memory/CognitiveMemoryManager.d.ts +40 -0
  238. package/dist/memory/CognitiveMemoryManager.d.ts.map +1 -1
  239. package/dist/memory/CognitiveMemoryManager.js +54 -1
  240. package/dist/memory/CognitiveMemoryManager.js.map +1 -1
  241. package/dist/memory/facade/Memory.d.ts +4 -0
  242. package/dist/memory/facade/Memory.d.ts.map +1 -1
  243. package/dist/memory/facade/Memory.js +140 -4
  244. package/dist/memory/facade/Memory.js.map +1 -1
  245. package/dist/memory/facade/types.d.ts +30 -2
  246. package/dist/memory/facade/types.d.ts.map +1 -1
  247. package/dist/memory/index.d.ts +1 -0
  248. package/dist/memory/index.d.ts.map +1 -1
  249. package/dist/memory/index.js +1 -0
  250. package/dist/memory/index.js.map +1 -1
  251. package/dist/memory/types.d.ts +15 -0
  252. package/dist/memory/types.d.ts.map +1 -1
  253. package/dist/query-router/QueryClassifier.d.ts +192 -21
  254. package/dist/query-router/QueryClassifier.d.ts.map +1 -1
  255. package/dist/query-router/QueryClassifier.js +604 -23
  256. package/dist/query-router/QueryClassifier.js.map +1 -1
  257. package/dist/query-router/QueryDispatcher.d.ts +106 -8
  258. package/dist/query-router/QueryDispatcher.d.ts.map +1 -1
  259. package/dist/query-router/QueryDispatcher.js +387 -8
  260. package/dist/query-router/QueryDispatcher.js.map +1 -1
  261. package/dist/query-router/QueryRouter.d.ts +198 -14
  262. package/dist/query-router/QueryRouter.d.ts.map +1 -1
  263. package/dist/query-router/QueryRouter.js +738 -50
  264. package/dist/query-router/QueryRouter.js.map +1 -1
  265. package/dist/query-router/index.d.ts +1 -1
  266. package/dist/query-router/index.d.ts.map +1 -1
  267. package/dist/query-router/index.js +1 -1
  268. package/dist/query-router/index.js.map +1 -1
  269. package/dist/query-router/types.d.ts +396 -3
  270. package/dist/query-router/types.d.ts.map +1 -1
  271. package/dist/query-router/types.js +35 -0
  272. package/dist/query-router/types.js.map +1 -1
  273. package/dist/rag/HydeRetriever.d.ts +108 -0
  274. package/dist/rag/HydeRetriever.d.ts.map +1 -1
  275. package/dist/rag/HydeRetriever.js +184 -0
  276. package/dist/rag/HydeRetriever.js.map +1 -1
  277. package/dist/rag/IRetrievalAugmentor.d.ts +15 -0
  278. package/dist/rag/IRetrievalAugmentor.d.ts.map +1 -1
  279. package/dist/rag/RetrievalAugmentor.d.ts +58 -0
  280. package/dist/rag/RetrievalAugmentor.d.ts.map +1 -1
  281. package/dist/rag/RetrievalAugmentor.js +200 -32
  282. package/dist/rag/RetrievalAugmentor.js.map +1 -1
  283. package/dist/rag/VectorStoreManager.js +1 -1
  284. package/dist/rag/audit/RAGAuditCollector.d.ts +7 -0
  285. package/dist/rag/audit/RAGAuditCollector.d.ts.map +1 -1
  286. package/dist/rag/audit/RAGAuditCollector.js +10 -0
  287. package/dist/rag/audit/RAGAuditCollector.js.map +1 -1
  288. package/dist/rag/audit/RAGAuditTypes.d.ts +10 -1
  289. package/dist/rag/audit/RAGAuditTypes.d.ts.map +1 -1
  290. package/dist/rag/chunking/SemanticChunker.d.ts +210 -0
  291. package/dist/rag/chunking/SemanticChunker.d.ts.map +1 -0
  292. package/dist/rag/chunking/SemanticChunker.js +460 -0
  293. package/dist/rag/chunking/SemanticChunker.js.map +1 -0
  294. package/dist/rag/chunking/index.d.ts +10 -0
  295. package/dist/rag/chunking/index.d.ts.map +1 -0
  296. package/dist/rag/chunking/index.js +10 -0
  297. package/dist/rag/chunking/index.js.map +1 -0
  298. package/dist/rag/implementations/vector_stores/PineconeVectorStore.d.ts +103 -0
  299. package/dist/rag/implementations/vector_stores/PineconeVectorStore.d.ts.map +1 -0
  300. package/dist/rag/implementations/vector_stores/PineconeVectorStore.js +315 -0
  301. package/dist/rag/implementations/vector_stores/PineconeVectorStore.js.map +1 -0
  302. package/dist/rag/implementations/vector_stores/PostgresVectorStore.d.ts +107 -0
  303. package/dist/rag/implementations/vector_stores/PostgresVectorStore.d.ts.map +1 -0
  304. package/dist/rag/implementations/vector_stores/PostgresVectorStore.js +438 -0
  305. package/dist/rag/implementations/vector_stores/PostgresVectorStore.js.map +1 -0
  306. package/dist/rag/index.d.ts +15 -1
  307. package/dist/rag/index.d.ts.map +1 -1
  308. package/dist/rag/index.js +32 -0
  309. package/dist/rag/index.js.map +1 -1
  310. package/dist/rag/migration/MigrationEngine.d.ts +47 -0
  311. package/dist/rag/migration/MigrationEngine.d.ts.map +1 -0
  312. package/dist/rag/migration/MigrationEngine.js +168 -0
  313. package/dist/rag/migration/MigrationEngine.js.map +1 -0
  314. package/dist/rag/migration/adapters/PineconeSourceAdapter.d.ts +23 -0
  315. package/dist/rag/migration/adapters/PineconeSourceAdapter.d.ts.map +1 -0
  316. package/dist/rag/migration/adapters/PineconeSourceAdapter.js +63 -0
  317. package/dist/rag/migration/adapters/PineconeSourceAdapter.js.map +1 -0
  318. package/dist/rag/migration/adapters/PostgresSourceAdapter.d.ts +30 -0
  319. package/dist/rag/migration/adapters/PostgresSourceAdapter.d.ts.map +1 -0
  320. package/dist/rag/migration/adapters/PostgresSourceAdapter.js +71 -0
  321. package/dist/rag/migration/adapters/PostgresSourceAdapter.js.map +1 -0
  322. package/dist/rag/migration/adapters/PostgresTargetAdapter.d.ts +38 -0
  323. package/dist/rag/migration/adapters/PostgresTargetAdapter.d.ts.map +1 -0
  324. package/dist/rag/migration/adapters/PostgresTargetAdapter.js +114 -0
  325. package/dist/rag/migration/adapters/PostgresTargetAdapter.js.map +1 -0
  326. package/dist/rag/migration/adapters/QdrantSourceAdapter.d.ts +36 -0
  327. package/dist/rag/migration/adapters/QdrantSourceAdapter.d.ts.map +1 -0
  328. package/dist/rag/migration/adapters/QdrantSourceAdapter.js +109 -0
  329. package/dist/rag/migration/adapters/QdrantSourceAdapter.js.map +1 -0
  330. package/dist/rag/migration/adapters/QdrantTargetAdapter.d.ts +35 -0
  331. package/dist/rag/migration/adapters/QdrantTargetAdapter.d.ts.map +1 -0
  332. package/dist/rag/migration/adapters/QdrantTargetAdapter.js +110 -0
  333. package/dist/rag/migration/adapters/QdrantTargetAdapter.js.map +1 -0
  334. package/dist/rag/migration/adapters/SqliteSourceAdapter.d.ts +37 -0
  335. package/dist/rag/migration/adapters/SqliteSourceAdapter.d.ts.map +1 -0
  336. package/dist/rag/migration/adapters/SqliteSourceAdapter.js +72 -0
  337. package/dist/rag/migration/adapters/SqliteSourceAdapter.js.map +1 -0
  338. package/dist/rag/migration/adapters/SqliteTargetAdapter.d.ts +47 -0
  339. package/dist/rag/migration/adapters/SqliteTargetAdapter.d.ts.map +1 -0
  340. package/dist/rag/migration/adapters/SqliteTargetAdapter.js +93 -0
  341. package/dist/rag/migration/adapters/SqliteTargetAdapter.js.map +1 -0
  342. package/dist/rag/migration/types.d.ts +108 -0
  343. package/dist/rag/migration/types.d.ts.map +1 -0
  344. package/dist/rag/migration/types.js +11 -0
  345. package/dist/rag/migration/types.js.map +1 -0
  346. package/dist/rag/multimodal/MultimodalIndexer.d.ts +35 -0
  347. package/dist/rag/multimodal/MultimodalIndexer.d.ts.map +1 -1
  348. package/dist/rag/multimodal/MultimodalIndexer.js +66 -1
  349. package/dist/rag/multimodal/MultimodalIndexer.js.map +1 -1
  350. package/dist/rag/multimodal/types.d.ts +24 -0
  351. package/dist/rag/multimodal/types.d.ts.map +1 -1
  352. package/dist/rag/raptor/RaptorTree.d.ts +268 -0
  353. package/dist/rag/raptor/RaptorTree.d.ts.map +1 -0
  354. package/dist/rag/raptor/RaptorTree.js +443 -0
  355. package/dist/rag/raptor/RaptorTree.js.map +1 -0
  356. package/dist/rag/raptor/index.d.ts +11 -0
  357. package/dist/rag/raptor/index.d.ts.map +1 -0
  358. package/dist/rag/raptor/index.js +11 -0
  359. package/dist/rag/raptor/index.js.map +1 -0
  360. package/dist/rag/reranking/providers/CohereReranker.js.map +1 -1
  361. package/dist/rag/search/BM25Index.d.ts +282 -0
  362. package/dist/rag/search/BM25Index.d.ts.map +1 -0
  363. package/dist/rag/search/BM25Index.js +344 -0
  364. package/dist/rag/search/BM25Index.js.map +1 -0
  365. package/dist/rag/search/HybridSearcher.d.ts +198 -0
  366. package/dist/rag/search/HybridSearcher.d.ts.map +1 -0
  367. package/dist/rag/search/HybridSearcher.js +316 -0
  368. package/dist/rag/search/HybridSearcher.js.map +1 -0
  369. package/dist/rag/search/index.d.ts +12 -0
  370. package/dist/rag/search/index.d.ts.map +1 -0
  371. package/dist/rag/search/index.js +12 -0
  372. package/dist/rag/search/index.js.map +1 -0
  373. package/dist/rag/setup/DockerDetector.d.ts +67 -0
  374. package/dist/rag/setup/DockerDetector.d.ts.map +1 -0
  375. package/dist/rag/setup/DockerDetector.js +125 -0
  376. package/dist/rag/setup/DockerDetector.js.map +1 -0
  377. package/dist/rag/setup/PostgresSetup.d.ts +20 -0
  378. package/dist/rag/setup/PostgresSetup.d.ts.map +1 -0
  379. package/dist/rag/setup/PostgresSetup.js +133 -0
  380. package/dist/rag/setup/PostgresSetup.js.map +1 -0
  381. package/dist/rag/setup/QdrantSetup.d.ts +26 -0
  382. package/dist/rag/setup/QdrantSetup.d.ts.map +1 -0
  383. package/dist/rag/setup/QdrantSetup.js +96 -0
  384. package/dist/rag/setup/QdrantSetup.js.map +1 -0
  385. package/dist/rag/setup/types.d.ts +55 -0
  386. package/dist/rag/setup/types.d.ts.map +1 -0
  387. package/dist/rag/setup/types.js +6 -0
  388. package/dist/rag/setup/types.js.map +1 -0
  389. package/dist/rag/unified/UnifiedRetriever.d.ts +472 -0
  390. package/dist/rag/unified/UnifiedRetriever.d.ts.map +1 -0
  391. package/dist/rag/unified/UnifiedRetriever.js +887 -0
  392. package/dist/rag/unified/UnifiedRetriever.js.map +1 -0
  393. package/dist/rag/unified/index.d.ts +24 -0
  394. package/dist/rag/unified/index.d.ts.map +1 -0
  395. package/dist/rag/unified/index.js +23 -0
  396. package/dist/rag/unified/index.js.map +1 -0
  397. package/dist/rag/unified/types.d.ts +546 -0
  398. package/dist/rag/unified/types.d.ts.map +1 -0
  399. package/dist/rag/unified/types.js +177 -0
  400. package/dist/rag/unified/types.js.map +1 -0
  401. package/dist/speech/providers/AssemblyAISTTProvider.js.map +1 -1
  402. package/dist/speech/providers/AzureSpeechSTTProvider.js.map +1 -1
  403. package/dist/speech/providers/BuiltInAdaptiveVadProvider.d.ts +1 -1
  404. package/dist/speech/providers/DeepgramBatchSTTProvider.js.map +1 -1
  405. package/package.json +5 -2
@@ -0,0 +1,210 @@
1
+ /**
2
+ * @fileoverview Semantic text chunker that splits on natural boundaries instead
3
+ * of fixed character counts.
4
+ *
5
+ * Three-tier splitting strategy:
6
+ * 1. **Heading boundaries** — Markdown headings (`# ## ###` etc.) start new chunks
7
+ * 2. **Paragraph boundaries** — Double newlines are the preferred split point
8
+ * 3. **Sentence boundaries** — Period/exclamation/question followed by whitespace
9
+ * 4. **Fixed-size fallback** — Only when paragraphs/sentences exceed `maxSize`
10
+ *
11
+ * Each chunk preserves complete sentences/paragraphs and includes configurable
12
+ * overlap context from the previous chunk for retrieval continuity.
13
+ *
14
+ * Special handling:
15
+ * - **Code blocks** (fenced with triple backticks) are kept intact when possible
16
+ * - **Markdown headings** always start new chunks for better section-level retrieval
17
+ * - **Small fragments** below `minSize` are merged with the previous chunk
18
+ *
19
+ * @module agentos/rag/chunking/SemanticChunker
20
+ * @see RetrievalAugmentor for integration with the RAG pipeline
21
+ */
22
+ /**
23
+ * Configuration for the semantic chunker.
24
+ *
25
+ * @interface SemanticChunkerConfig
26
+ */
27
+ export interface SemanticChunkerConfig {
28
+ /** Target chunk size in characters. Default: 1000. */
29
+ targetSize?: number;
30
+ /** Maximum chunk size — hard limit before forced splitting. Default: 2000. */
31
+ maxSize?: number;
32
+ /** Minimum chunk size — fragments below this merge with previous. Default: 200. */
33
+ minSize?: number;
34
+ /** Overlap characters from previous chunk prepended for context. Default: 100. */
35
+ overlap?: number;
36
+ /** Whether to detect and preserve fenced code blocks intact. Default: true. */
37
+ preserveCodeBlocks?: boolean;
38
+ /** Whether to detect markdown headings as chunk-start boundaries. Default: true. */
39
+ respectHeadings?: boolean;
40
+ }
41
+ /**
42
+ * A semantically coherent text chunk produced by the chunker.
43
+ *
44
+ * @interface SemanticChunk
45
+ * @property {string} text - The chunk text content (may include overlap prefix).
46
+ * @property {number} index - 0-based sequence index within the chunked document.
47
+ * @property {number} startOffset - Character offset in the original text where this chunk begins.
48
+ * @property {number} endOffset - Character offset in the original text where this chunk ends.
49
+ * @property {BoundaryType} boundaryType - Type of boundary that determined this chunk's split.
50
+ * @property {Record<string, unknown>} [metadata] - Pass-through metadata from the caller.
51
+ */
52
+ export interface SemanticChunk {
53
+ /** The chunk text content (may include overlap prefix from previous chunk). */
54
+ text: string;
55
+ /** 0-based sequence index within the chunked document. */
56
+ index: number;
57
+ /** Character offset in the original text where this chunk begins (before overlap). */
58
+ startOffset: number;
59
+ /** Character offset in the original text where this chunk ends. */
60
+ endOffset: number;
61
+ /** Type of boundary that determined this chunk's split point. */
62
+ boundaryType: BoundaryType;
63
+ /** Pass-through metadata from the caller. */
64
+ metadata?: Record<string, unknown>;
65
+ }
66
+ /**
67
+ * The type of boundary used to split a chunk.
68
+ */
69
+ export type BoundaryType = 'paragraph' | 'sentence' | 'heading' | 'code-block' | 'fixed';
70
+ /**
71
+ * Semantic text chunker that splits on natural boundaries instead of
72
+ * fixed character counts.
73
+ *
74
+ * Produces chunks that are more semantically coherent than fixed-size
75
+ * splitting, improving retrieval quality by keeping related ideas together.
76
+ *
77
+ * @example Basic usage
78
+ * ```typescript
79
+ * const chunker = new SemanticChunker({ targetSize: 800, overlap: 50 });
80
+ * const chunks = chunker.chunk(markdownDocument);
81
+ * for (const c of chunks) {
82
+ * console.log(`Chunk ${c.index} (${c.boundaryType}): ${c.text.length} chars`);
83
+ * }
84
+ * ```
85
+ *
86
+ * @example Preserving code blocks
87
+ * ```typescript
88
+ * const chunker = new SemanticChunker({
89
+ * targetSize: 1000,
90
+ * maxSize: 3000, // Allow larger chunks for code blocks
91
+ * preserveCodeBlocks: true,
92
+ * });
93
+ * const chunks = chunker.chunk(technicalDoc);
94
+ * ```
95
+ */
96
+ export declare class SemanticChunker {
97
+ /** Resolved configuration with defaults applied. */
98
+ private config;
99
+ /**
100
+ * Creates a new SemanticChunker.
101
+ *
102
+ * @param {SemanticChunkerConfig} [config] - Chunking configuration.
103
+ * @param {number} [config.targetSize=1000] - Target chunk size in characters.
104
+ * @param {number} [config.maxSize=2000] - Maximum chunk size (hard limit).
105
+ * @param {number} [config.minSize=200] - Minimum chunk size before merging.
106
+ * @param {number} [config.overlap=100] - Overlap characters from previous chunk.
107
+ * @param {boolean} [config.preserveCodeBlocks=true] - Keep code blocks intact.
108
+ * @param {boolean} [config.respectHeadings=true] - Start new chunks at headings.
109
+ *
110
+ * @example
111
+ * ```typescript
112
+ * const chunker = new SemanticChunker({
113
+ * targetSize: 800,
114
+ * maxSize: 1500,
115
+ * overlap: 80,
116
+ * });
117
+ * ```
118
+ */
119
+ constructor(config?: SemanticChunkerConfig);
120
+ /**
121
+ * Splits text into semantically coherent chunks.
122
+ *
123
+ * Pipeline:
124
+ * 1. Pre-process: extract code blocks (if `preserveCodeBlocks`)
125
+ * 2. Split by headings (if `respectHeadings`) — each heading starts a new section
126
+ * 3. Within sections, split by paragraphs (double newline)
127
+ * 4. If a paragraph exceeds `maxSize`, split by sentences
128
+ * 5. If a sentence exceeds `maxSize`, split at word boundaries (fixed fallback)
129
+ * 6. Merge small fragments (< `minSize`) with the previous chunk
130
+ * 7. Add overlap from the end of the previous chunk to each chunk
131
+ *
132
+ * @param {string} text - The full text to chunk.
133
+ * @param {Record<string, unknown>} [metadata] - Optional metadata attached to all chunks.
134
+ * @returns {SemanticChunk[]} Array of chunks in order.
135
+ * @throws {Error} If text is empty.
136
+ *
137
+ * @example
138
+ * ```typescript
139
+ * const chunks = chunker.chunk(
140
+ * '# Introduction\n\nFirst paragraph.\n\n## Details\n\nSecond paragraph.',
141
+ * { source: 'docs/readme.md' },
142
+ * );
143
+ * // chunks[0].boundaryType === 'heading'
144
+ * // chunks[0].text includes "# Introduction\n\nFirst paragraph."
145
+ * ```
146
+ */
147
+ chunk(text: string, metadata?: Record<string, unknown>): SemanticChunk[];
148
+ /**
149
+ * Splits text into structural segments based on headings and code blocks.
150
+ *
151
+ * This is the first pass that identifies major structural boundaries:
152
+ * - Markdown headings always start new segments
153
+ * - Fenced code blocks are kept as single segments when possible
154
+ * - Remaining text is split by paragraphs (double newline)
155
+ *
156
+ * @param {string} text - Full document text.
157
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Segments.
158
+ */
159
+ private splitByStructure;
160
+ /**
161
+ * Further splits an oversized segment by paragraph and sentence boundaries.
162
+ *
163
+ * Called when a structural segment exceeds `maxSize`. Tries progressively
164
+ * smaller split granularity:
165
+ * 1. Paragraph splits (double newline)
166
+ * 2. Sentence splits (period/exclamation/question + space + uppercase)
167
+ * 3. Word boundary splits (fixed-size fallback)
168
+ *
169
+ * @param {string} text - Oversized segment text.
170
+ * @param {number} baseOffset - Character offset of this segment in the original text.
171
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Sub-segments.
172
+ */
173
+ private splitOversizedSegment;
174
+ /**
175
+ * Splits text by sentence boundaries.
176
+ *
177
+ * Detects sentence endings (`.` `!` `?` followed by whitespace) and accumulates
178
+ * sentences until reaching `targetSize`. Falls back to word-boundary splitting
179
+ * for sentences exceeding `maxSize`.
180
+ *
181
+ * @param {string} text - Text to split by sentences.
182
+ * @param {number} baseOffset - Character offset in the original text.
183
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Sentence-split chunks.
184
+ */
185
+ private splitBySentences;
186
+ /**
187
+ * Last-resort fixed-size splitting at word boundaries.
188
+ *
189
+ * Splits text at the last space before `targetSize` to avoid breaking words.
190
+ * This is only used when no paragraph or sentence boundaries are available
191
+ * within a segment that exceeds `maxSize`.
192
+ *
193
+ * @param {string} text - Text to split at word boundaries.
194
+ * @param {number} baseOffset - Character offset in the original text.
195
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Fixed-size chunks.
196
+ */
197
+ private splitFixed;
198
+ /**
199
+ * Merges fragments smaller than `minSize` with the previous chunk.
200
+ *
201
+ * Small trailing fragments (e.g., a short concluding sentence) are merged
202
+ * backwards to prevent creating chunks that are too small for meaningful
203
+ * embedding.
204
+ *
205
+ * @param {Array<{ text: string; offset: number; boundary: BoundaryType }>} segments - Input segments.
206
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Segments with small ones merged.
207
+ */
208
+ private mergeSmallFragments;
209
+ }
210
+ //# sourceMappingURL=SemanticChunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SemanticChunker.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/SemanticChunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAIH;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACpC,sDAAsD;IACtD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,8EAA8E;IAC9E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mFAAmF;IACnF,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kFAAkF;IAClF,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+EAA+E;IAC/E,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,oFAAoF;IACpF,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,aAAa;IAC5B,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAC;IACd,sFAAsF;IACtF,WAAW,EAAE,MAAM,CAAC;IACpB,mEAAmE;IACnE,SAAS,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,YAAY,EAAE,YAAY,CAAC;IAC3B,6CAA6C;IAC7C,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,WAAW,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,GAAG,OAAO,CAAC;AAuBzF;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,qBAAa,eAAe;IAC1B,oDAAoD;IACpD,OAAO,CAAC,MAAM,CAAkC;IAEhD;;;;;;;;;;;;;;;;;;;OAmBG;gBACS,MAAM,CAAC,EAAE,qBAAqB;IAW1C;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,aAAa,EAAE;IAmDxE;;;;;;;;;;OAUG;IACH,OAAO,CAAC,gBAAgB;IAgHxB;;;;;;;;;;;;OAYG;IACH,OAAO,CAAC,qBAAqB;IAkD7B;;;;;;;;;;OAUG;IACH,OAAO,CAAC,gBAAgB;IA+CxB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,UAAU;IAqClB;;;;;;;;;OASG;IACH,OAAO,CAAC,mBAAmB;CAwB5B"}
@@ -0,0 +1,460 @@
1
+ /**
2
+ * @fileoverview Semantic text chunker that splits on natural boundaries instead
3
+ * of fixed character counts.
4
+ *
5
+ * Three-tier splitting strategy:
6
+ * 1. **Heading boundaries** — Markdown headings (`# ## ###` etc.) start new chunks
7
+ * 2. **Paragraph boundaries** — Double newlines are the preferred split point
8
+ * 3. **Sentence boundaries** — Period/exclamation/question followed by whitespace
9
+ * 4. **Fixed-size fallback** — Only when paragraphs/sentences exceed `maxSize`
10
+ *
11
+ * Each chunk preserves complete sentences/paragraphs and includes configurable
12
+ * overlap context from the previous chunk for retrieval continuity.
13
+ *
14
+ * Special handling:
15
+ * - **Code blocks** (fenced with triple backticks) are kept intact when possible
16
+ * - **Markdown headings** always start new chunks for better section-level retrieval
17
+ * - **Small fragments** below `minSize` are merged with the previous chunk
18
+ *
19
+ * @module agentos/rag/chunking/SemanticChunker
20
+ * @see RetrievalAugmentor for integration with the RAG pipeline
21
+ */
22
+ // ── Internal helpers ──────────────────────────────────────────────────────
23
+ /**
24
+ * Regex matching Markdown heading lines (# through ######).
25
+ * Captures the heading line at the start of a string or after a newline.
26
+ */
27
+ const HEADING_RE = /(?:^|\n)(#{1,6}\s+.+)/;
28
+ /**
29
+ * Regex for fenced code block start/end markers.
30
+ */
31
+ const CODE_FENCE_RE = /^```/;
32
+ /**
33
+ * Regex for sentence boundaries: `. ` or `! ` or `? ` followed by an
34
+ * uppercase letter or end of text. Also matches after newline.
35
+ */
36
+ const SENTENCE_BOUNDARY_RE = /[.!?]\s+(?=[A-Z\n])|[.!?]\s*$/;
37
+ // ── Semantic Chunker ──────────────────────────────────────────────────────
38
+ /**
39
+ * Semantic text chunker that splits on natural boundaries instead of
40
+ * fixed character counts.
41
+ *
42
+ * Produces chunks that are more semantically coherent than fixed-size
43
+ * splitting, improving retrieval quality by keeping related ideas together.
44
+ *
45
+ * @example Basic usage
46
+ * ```typescript
47
+ * const chunker = new SemanticChunker({ targetSize: 800, overlap: 50 });
48
+ * const chunks = chunker.chunk(markdownDocument);
49
+ * for (const c of chunks) {
50
+ * console.log(`Chunk ${c.index} (${c.boundaryType}): ${c.text.length} chars`);
51
+ * }
52
+ * ```
53
+ *
54
+ * @example Preserving code blocks
55
+ * ```typescript
56
+ * const chunker = new SemanticChunker({
57
+ * targetSize: 1000,
58
+ * maxSize: 3000, // Allow larger chunks for code blocks
59
+ * preserveCodeBlocks: true,
60
+ * });
61
+ * const chunks = chunker.chunk(technicalDoc);
62
+ * ```
63
+ */
64
+ export class SemanticChunker {
65
+ /**
66
+ * Creates a new SemanticChunker.
67
+ *
68
+ * @param {SemanticChunkerConfig} [config] - Chunking configuration.
69
+ * @param {number} [config.targetSize=1000] - Target chunk size in characters.
70
+ * @param {number} [config.maxSize=2000] - Maximum chunk size (hard limit).
71
+ * @param {number} [config.minSize=200] - Minimum chunk size before merging.
72
+ * @param {number} [config.overlap=100] - Overlap characters from previous chunk.
73
+ * @param {boolean} [config.preserveCodeBlocks=true] - Keep code blocks intact.
74
+ * @param {boolean} [config.respectHeadings=true] - Start new chunks at headings.
75
+ *
76
+ * @example
77
+ * ```typescript
78
+ * const chunker = new SemanticChunker({
79
+ * targetSize: 800,
80
+ * maxSize: 1500,
81
+ * overlap: 80,
82
+ * });
83
+ * ```
84
+ */
85
+ constructor(config) {
86
+ this.config = {
87
+ targetSize: config?.targetSize ?? 1000,
88
+ maxSize: config?.maxSize ?? 2000,
89
+ minSize: config?.minSize ?? 200,
90
+ overlap: config?.overlap ?? 100,
91
+ preserveCodeBlocks: config?.preserveCodeBlocks ?? true,
92
+ respectHeadings: config?.respectHeadings ?? true,
93
+ };
94
+ }
95
+ /**
96
+ * Splits text into semantically coherent chunks.
97
+ *
98
+ * Pipeline:
99
+ * 1. Pre-process: extract code blocks (if `preserveCodeBlocks`)
100
+ * 2. Split by headings (if `respectHeadings`) — each heading starts a new section
101
+ * 3. Within sections, split by paragraphs (double newline)
102
+ * 4. If a paragraph exceeds `maxSize`, split by sentences
103
+ * 5. If a sentence exceeds `maxSize`, split at word boundaries (fixed fallback)
104
+ * 6. Merge small fragments (< `minSize`) with the previous chunk
105
+ * 7. Add overlap from the end of the previous chunk to each chunk
106
+ *
107
+ * @param {string} text - The full text to chunk.
108
+ * @param {Record<string, unknown>} [metadata] - Optional metadata attached to all chunks.
109
+ * @returns {SemanticChunk[]} Array of chunks in order.
110
+ * @throws {Error} If text is empty.
111
+ *
112
+ * @example
113
+ * ```typescript
114
+ * const chunks = chunker.chunk(
115
+ * '# Introduction\n\nFirst paragraph.\n\n## Details\n\nSecond paragraph.',
116
+ * { source: 'docs/readme.md' },
117
+ * );
118
+ * // chunks[0].boundaryType === 'heading'
119
+ * // chunks[0].text includes "# Introduction\n\nFirst paragraph."
120
+ * ```
121
+ */
122
+ chunk(text, metadata) {
123
+ if (!text || text.trim().length === 0) {
124
+ return [];
125
+ }
126
+ // Step 1: Split into raw segments by headings and code blocks
127
+ const rawSegments = this.splitByStructure(text);
128
+ // Step 2: Split oversized segments further by paragraphs, sentences, or fixed
129
+ const refinedSegments = [];
130
+ for (const segment of rawSegments) {
131
+ if (segment.text.length <= this.config.maxSize) {
132
+ refinedSegments.push(segment);
133
+ }
134
+ else {
135
+ // Further split this oversized segment
136
+ const subSegments = this.splitOversizedSegment(segment.text, segment.offset);
137
+ refinedSegments.push(...subSegments);
138
+ }
139
+ }
140
+ // Step 3: Merge fragments smaller than minSize with the previous chunk
141
+ const merged = this.mergeSmallFragments(refinedSegments);
142
+ // Step 4: Build final chunks with overlap
143
+ const chunks = [];
144
+ for (let i = 0; i < merged.length; i++) {
145
+ const segment = merged[i];
146
+ let chunkText = segment.text;
147
+ // Add overlap from previous chunk
148
+ if (i > 0 && this.config.overlap > 0) {
149
+ const prevText = merged[i - 1].text;
150
+ const overlapText = prevText.slice(-this.config.overlap);
151
+ if (overlapText.length > 0) {
152
+ chunkText = overlapText + chunkText;
153
+ }
154
+ }
155
+ chunks.push({
156
+ text: chunkText,
157
+ index: i,
158
+ startOffset: segment.offset,
159
+ endOffset: segment.offset + segment.text.length,
160
+ boundaryType: segment.boundary,
161
+ metadata,
162
+ });
163
+ }
164
+ return chunks;
165
+ }
166
+ /**
167
+ * Splits text into structural segments based on headings and code blocks.
168
+ *
169
+ * This is the first pass that identifies major structural boundaries:
170
+ * - Markdown headings always start new segments
171
+ * - Fenced code blocks are kept as single segments when possible
172
+ * - Remaining text is split by paragraphs (double newline)
173
+ *
174
+ * @param {string} text - Full document text.
175
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Segments.
176
+ */
177
+ splitByStructure(text) {
178
+ const segments = [];
179
+ const lines = text.split('\n');
180
+ let currentSegment = '';
181
+ let currentOffset = 0;
182
+ let segmentStart = 0;
183
+ let currentBoundary = 'paragraph';
184
+ let inCodeBlock = false;
185
+ let codeBlockStart = 0;
186
+ let codeBlockContent = '';
187
+ for (let i = 0; i < lines.length; i++) {
188
+ const line = lines[i];
189
+ const lineOffset = currentOffset;
190
+ currentOffset += line.length + 1; // +1 for the newline
191
+ // Handle code block boundaries
192
+ if (this.config.preserveCodeBlocks && CODE_FENCE_RE.test(line.trim())) {
193
+ if (!inCodeBlock) {
194
+ // Starting a code block — flush current segment first
195
+ if (currentSegment.trim().length > 0) {
196
+ segments.push({
197
+ text: currentSegment,
198
+ offset: segmentStart,
199
+ boundary: currentBoundary,
200
+ });
201
+ }
202
+ inCodeBlock = true;
203
+ codeBlockStart = lineOffset;
204
+ codeBlockContent = line + '\n';
205
+ continue;
206
+ }
207
+ else {
208
+ // Ending a code block
209
+ codeBlockContent += line;
210
+ segments.push({
211
+ text: codeBlockContent,
212
+ offset: codeBlockStart,
213
+ boundary: 'code-block',
214
+ });
215
+ inCodeBlock = false;
216
+ codeBlockContent = '';
217
+ currentSegment = '';
218
+ segmentStart = currentOffset;
219
+ currentBoundary = 'paragraph';
220
+ continue;
221
+ }
222
+ }
223
+ if (inCodeBlock) {
224
+ codeBlockContent += line + '\n';
225
+ continue;
226
+ }
227
+ // Handle headings
228
+ if (this.config.respectHeadings && /^#{1,6}\s+/.test(line)) {
229
+ // Flush current segment
230
+ if (currentSegment.trim().length > 0) {
231
+ segments.push({
232
+ text: currentSegment,
233
+ offset: segmentStart,
234
+ boundary: currentBoundary,
235
+ });
236
+ }
237
+ currentSegment = line + '\n';
238
+ segmentStart = lineOffset;
239
+ currentBoundary = 'heading';
240
+ continue;
241
+ }
242
+ // Check for paragraph boundary (empty line)
243
+ if (line.trim() === '' && currentSegment.trim().length > 0) {
244
+ // Check if current segment is at or near target size — if so, split here
245
+ if (currentSegment.length >= this.config.targetSize) {
246
+ segments.push({
247
+ text: currentSegment,
248
+ offset: segmentStart,
249
+ boundary: currentBoundary,
250
+ });
251
+ currentSegment = '';
252
+ segmentStart = currentOffset;
253
+ currentBoundary = 'paragraph';
254
+ continue;
255
+ }
256
+ }
257
+ // Accumulate into current segment
258
+ currentSegment += line + '\n';
259
+ }
260
+ // Handle unclosed code block
261
+ if (inCodeBlock && codeBlockContent.trim().length > 0) {
262
+ segments.push({
263
+ text: codeBlockContent,
264
+ offset: codeBlockStart,
265
+ boundary: 'code-block',
266
+ });
267
+ }
268
+ // Flush remaining segment
269
+ if (currentSegment.trim().length > 0) {
270
+ segments.push({
271
+ text: currentSegment,
272
+ offset: segmentStart,
273
+ boundary: currentBoundary,
274
+ });
275
+ }
276
+ return segments;
277
+ }
278
+ /**
279
+ * Further splits an oversized segment by paragraph and sentence boundaries.
280
+ *
281
+ * Called when a structural segment exceeds `maxSize`. Tries progressively
282
+ * smaller split granularity:
283
+ * 1. Paragraph splits (double newline)
284
+ * 2. Sentence splits (period/exclamation/question + space + uppercase)
285
+ * 3. Word boundary splits (fixed-size fallback)
286
+ *
287
+ * @param {string} text - Oversized segment text.
288
+ * @param {number} baseOffset - Character offset of this segment in the original text.
289
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Sub-segments.
290
+ */
291
+ splitOversizedSegment(text, baseOffset) {
292
+ // Try paragraph splitting first
293
+ const paragraphs = text.split(/\n\s*\n/);
294
+ if (paragraphs.length > 1) {
295
+ const results = [];
296
+ let accumulated = '';
297
+ let accOffset = baseOffset;
298
+ let runningOffset = baseOffset;
299
+ for (const para of paragraphs) {
300
+ if (accumulated.length > 0 && accumulated.length + para.length + 2 > this.config.targetSize) {
301
+ // Flush accumulated
302
+ if (accumulated.length > this.config.maxSize) {
303
+ // Even accumulated is too large — split by sentences
304
+ results.push(...this.splitBySentences(accumulated, accOffset));
305
+ }
306
+ else {
307
+ results.push({ text: accumulated, offset: accOffset, boundary: 'paragraph' });
308
+ }
309
+ accumulated = para;
310
+ accOffset = runningOffset;
311
+ }
312
+ else {
313
+ if (accumulated.length > 0) {
314
+ accumulated += '\n\n' + para;
315
+ }
316
+ else {
317
+ accumulated = para;
318
+ accOffset = runningOffset;
319
+ }
320
+ }
321
+ runningOffset += para.length + 2; // +2 for the \n\n separator
322
+ }
323
+ // Flush remaining
324
+ if (accumulated.trim().length > 0) {
325
+ if (accumulated.length > this.config.maxSize) {
326
+ results.push(...this.splitBySentences(accumulated, accOffset));
327
+ }
328
+ else {
329
+ results.push({ text: accumulated, offset: accOffset, boundary: 'paragraph' });
330
+ }
331
+ }
332
+ return results;
333
+ }
334
+ // No paragraph boundaries — try sentences
335
+ return this.splitBySentences(text, baseOffset);
336
+ }
337
+ /**
338
+ * Splits text by sentence boundaries.
339
+ *
340
+ * Detects sentence endings (`.` `!` `?` followed by whitespace) and accumulates
341
+ * sentences until reaching `targetSize`. Falls back to word-boundary splitting
342
+ * for sentences exceeding `maxSize`.
343
+ *
344
+ * @param {string} text - Text to split by sentences.
345
+ * @param {number} baseOffset - Character offset in the original text.
346
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Sentence-split chunks.
347
+ */
348
+ splitBySentences(text, baseOffset) {
349
+ // Split on sentence boundaries
350
+ const sentences = text.split(/(?<=[.!?])\s+/);
351
+ if (sentences.length <= 1) {
352
+ // No sentence boundaries — fall back to fixed splitting
353
+ return this.splitFixed(text, baseOffset);
354
+ }
355
+ const results = [];
356
+ let accumulated = '';
357
+ let accOffset = baseOffset;
358
+ let runningOffset = baseOffset;
359
+ for (const sentence of sentences) {
360
+ if (accumulated.length > 0 && accumulated.length + sentence.length + 1 > this.config.targetSize) {
361
+ if (accumulated.length > this.config.maxSize) {
362
+ results.push(...this.splitFixed(accumulated, accOffset));
363
+ }
364
+ else {
365
+ results.push({ text: accumulated, offset: accOffset, boundary: 'sentence' });
366
+ }
367
+ accumulated = sentence;
368
+ accOffset = runningOffset;
369
+ }
370
+ else {
371
+ if (accumulated.length > 0) {
372
+ accumulated += ' ' + sentence;
373
+ }
374
+ else {
375
+ accumulated = sentence;
376
+ accOffset = runningOffset;
377
+ }
378
+ }
379
+ runningOffset += sentence.length + 1; // +1 for the space separator
380
+ }
381
+ if (accumulated.trim().length > 0) {
382
+ if (accumulated.length > this.config.maxSize) {
383
+ results.push(...this.splitFixed(accumulated, accOffset));
384
+ }
385
+ else {
386
+ results.push({ text: accumulated, offset: accOffset, boundary: 'sentence' });
387
+ }
388
+ }
389
+ return results;
390
+ }
391
+ /**
392
+ * Last-resort fixed-size splitting at word boundaries.
393
+ *
394
+ * Splits text at the last space before `targetSize` to avoid breaking words.
395
+ * This is only used when no paragraph or sentence boundaries are available
396
+ * within a segment that exceeds `maxSize`.
397
+ *
398
+ * @param {string} text - Text to split at word boundaries.
399
+ * @param {number} baseOffset - Character offset in the original text.
400
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Fixed-size chunks.
401
+ */
402
+ splitFixed(text, baseOffset) {
403
+ const results = [];
404
+ let position = 0;
405
+ while (position < text.length) {
406
+ let end = Math.min(position + this.config.targetSize, text.length);
407
+ // If not at the end, try to break at a word boundary
408
+ if (end < text.length) {
409
+ const lastSpace = text.lastIndexOf(' ', end);
410
+ if (lastSpace > position + this.config.minSize) {
411
+ end = lastSpace;
412
+ }
413
+ }
414
+ const chunk = text.slice(position, end).trim();
415
+ if (chunk.length > 0) {
416
+ results.push({
417
+ text: chunk,
418
+ offset: baseOffset + position,
419
+ boundary: 'fixed',
420
+ });
421
+ }
422
+ position = end;
423
+ // Skip whitespace after split point
424
+ while (position < text.length && text[position] === ' ') {
425
+ position++;
426
+ }
427
+ }
428
+ return results;
429
+ }
430
+ /**
431
+ * Merges fragments smaller than `minSize` with the previous chunk.
432
+ *
433
+ * Small trailing fragments (e.g., a short concluding sentence) are merged
434
+ * backwards to prevent creating chunks that are too small for meaningful
435
+ * embedding.
436
+ *
437
+ * @param {Array<{ text: string; offset: number; boundary: BoundaryType }>} segments - Input segments.
438
+ * @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Segments with small ones merged.
439
+ */
440
+ mergeSmallFragments(segments) {
441
+ if (segments.length <= 1)
442
+ return segments;
443
+ const merged = [];
444
+ for (const segment of segments) {
445
+ if (merged.length > 0 &&
446
+ segment.text.trim().length < this.config.minSize &&
447
+ segment.boundary !== 'heading' &&
448
+ segment.boundary !== 'code-block') {
449
+ // Merge with previous
450
+ const prev = merged[merged.length - 1];
451
+ prev.text += '\n\n' + segment.text;
452
+ }
453
+ else {
454
+ merged.push({ ...segment });
455
+ }
456
+ }
457
+ return merged;
458
+ }
459
+ }
460
+ //# sourceMappingURL=SemanticChunker.js.map