convex-cms 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. package/dist/cli/commands/admin.d.ts +16 -0
  2. package/dist/cli/commands/admin.d.ts.map +1 -0
  3. package/dist/cli/commands/admin.js +88 -0
  4. package/dist/cli/commands/admin.js.map +1 -0
  5. package/dist/cli/index.d.ts +3 -0
  6. package/dist/cli/index.d.ts.map +1 -0
  7. package/dist/cli/index.js +18 -0
  8. package/dist/cli/index.js.map +1 -0
  9. package/dist/cli/utils/detectConvexUrl.d.ts +13 -0
  10. package/dist/cli/utils/detectConvexUrl.d.ts.map +1 -0
  11. package/dist/cli/utils/detectConvexUrl.js +48 -0
  12. package/dist/cli/utils/detectConvexUrl.js.map +1 -0
  13. package/dist/cli/utils/openBrowser.d.ts +7 -0
  14. package/dist/cli/utils/openBrowser.d.ts.map +1 -0
  15. package/dist/cli/utils/openBrowser.js +17 -0
  16. package/dist/cli/utils/openBrowser.js.map +1 -0
  17. package/dist/client/admin-config.d.ts +126 -0
  18. package/dist/client/admin-config.d.ts.map +1 -0
  19. package/dist/client/admin-config.js +117 -0
  20. package/dist/client/admin-config.js.map +1 -0
  21. package/dist/client/adminApi.d.ts +2273 -0
  22. package/dist/client/adminApi.d.ts.map +1 -0
  23. package/dist/client/adminApi.js +716 -0
  24. package/dist/client/adminApi.js.map +1 -0
  25. package/dist/client/agentTools.d.ts +933 -0
  26. package/dist/client/agentTools.d.ts.map +1 -0
  27. package/dist/client/agentTools.js +1004 -0
  28. package/dist/client/agentTools.js.map +1 -0
  29. package/dist/client/argTypes.d.ts +212 -0
  30. package/dist/client/argTypes.d.ts.map +1 -0
  31. package/dist/client/argTypes.js +5 -0
  32. package/dist/client/argTypes.js.map +1 -0
  33. package/dist/client/field-types.d.ts +55 -0
  34. package/dist/client/field-types.d.ts.map +1 -0
  35. package/dist/client/field-types.js +152 -0
  36. package/dist/client/field-types.js.map +1 -0
  37. package/dist/client/index.d.ts +189 -0
  38. package/dist/client/index.d.ts.map +1 -0
  39. package/dist/client/index.js +668 -0
  40. package/dist/client/index.js.map +1 -0
  41. package/dist/client/queryBuilder.d.ts +765 -0
  42. package/dist/client/queryBuilder.d.ts.map +1 -0
  43. package/dist/client/queryBuilder.js +970 -0
  44. package/dist/client/queryBuilder.js.map +1 -0
  45. package/dist/client/schema/codegen.d.ts +128 -0
  46. package/dist/client/schema/codegen.d.ts.map +1 -0
  47. package/dist/client/schema/codegen.js +318 -0
  48. package/dist/client/schema/codegen.js.map +1 -0
  49. package/dist/client/schema/defineContentType.d.ts +221 -0
  50. package/dist/client/schema/defineContentType.d.ts.map +1 -0
  51. package/dist/client/schema/defineContentType.js +380 -0
  52. package/dist/client/schema/defineContentType.js.map +1 -0
  53. package/dist/client/schema/index.d.ts +85 -0
  54. package/dist/client/schema/index.d.ts.map +1 -0
  55. package/dist/client/schema/index.js +92 -0
  56. package/dist/client/schema/index.js.map +1 -0
  57. package/dist/client/schema/schemaDrift.d.ts +199 -0
  58. package/dist/client/schema/schemaDrift.d.ts.map +1 -0
  59. package/dist/client/schema/schemaDrift.js +340 -0
  60. package/dist/client/schema/schemaDrift.js.map +1 -0
  61. package/dist/client/schema/typedClient.d.ts +401 -0
  62. package/dist/client/schema/typedClient.d.ts.map +1 -0
  63. package/dist/client/schema/typedClient.js +269 -0
  64. package/dist/client/schema/typedClient.js.map +1 -0
  65. package/dist/client/schema/types.d.ts +477 -0
  66. package/dist/client/schema/types.d.ts.map +1 -0
  67. package/dist/client/schema/types.js +39 -0
  68. package/dist/client/schema/types.js.map +1 -0
  69. package/dist/client/types.d.ts +449 -0
  70. package/dist/client/types.d.ts.map +1 -0
  71. package/dist/client/types.js +149 -0
  72. package/dist/client/types.js.map +1 -0
  73. package/dist/client/workflows.d.ts +51 -0
  74. package/dist/client/workflows.d.ts.map +1 -0
  75. package/dist/client/workflows.js +103 -0
  76. package/dist/client/workflows.js.map +1 -0
  77. package/dist/client/wrapper.d.ts +2198 -0
  78. package/dist/client/wrapper.d.ts.map +1 -0
  79. package/dist/client/wrapper.js +2651 -0
  80. package/dist/client/wrapper.js.map +1 -0
  81. package/dist/component/_generated/api.d.ts +124 -0
  82. package/dist/component/_generated/api.d.ts.map +1 -0
  83. package/dist/component/_generated/api.js +31 -0
  84. package/dist/component/_generated/api.js.map +1 -0
  85. package/dist/component/_generated/component.d.ts +4321 -0
  86. package/dist/component/_generated/component.d.ts.map +1 -0
  87. package/dist/component/_generated/component.js +11 -0
  88. package/dist/component/_generated/component.js.map +1 -0
  89. package/dist/component/_generated/dataModel.d.ts +46 -0
  90. package/dist/component/_generated/dataModel.d.ts.map +1 -0
  91. package/dist/component/_generated/dataModel.js +11 -0
  92. package/dist/component/_generated/dataModel.js.map +1 -0
  93. package/dist/component/_generated/server.d.ts +121 -0
  94. package/dist/component/_generated/server.d.ts.map +1 -0
  95. package/dist/component/_generated/server.js +78 -0
  96. package/dist/component/_generated/server.js.map +1 -0
  97. package/dist/component/auditLog.d.ts +410 -0
  98. package/dist/component/auditLog.d.ts.map +1 -0
  99. package/dist/component/auditLog.js +607 -0
  100. package/dist/component/auditLog.js.map +1 -0
  101. package/dist/component/authorization.d.ts +323 -0
  102. package/dist/component/authorization.d.ts.map +1 -0
  103. package/dist/component/authorization.js +464 -0
  104. package/dist/component/authorization.js.map +1 -0
  105. package/dist/component/authorizationHooks.d.ts +184 -0
  106. package/dist/component/authorizationHooks.d.ts.map +1 -0
  107. package/dist/component/authorizationHooks.js +521 -0
  108. package/dist/component/authorizationHooks.js.map +1 -0
  109. package/dist/component/bulkOperations.d.ts +200 -0
  110. package/dist/component/bulkOperations.d.ts.map +1 -0
  111. package/dist/component/bulkOperations.js +568 -0
  112. package/dist/component/bulkOperations.js.map +1 -0
  113. package/dist/component/contentEntries.d.ts +719 -0
  114. package/dist/component/contentEntries.d.ts.map +1 -0
  115. package/dist/component/contentEntries.js +1617 -0
  116. package/dist/component/contentEntries.js.map +1 -0
  117. package/dist/component/contentEntryMutations.d.ts +505 -0
  118. package/dist/component/contentEntryMutations.d.ts.map +1 -0
  119. package/dist/component/contentEntryMutations.js +1009 -0
  120. package/dist/component/contentEntryMutations.js.map +1 -0
  121. package/dist/component/contentEntryValidation.d.ts +115 -0
  122. package/dist/component/contentEntryValidation.d.ts.map +1 -0
  123. package/dist/component/contentEntryValidation.js +546 -0
  124. package/dist/component/contentEntryValidation.js.map +1 -0
  125. package/dist/component/contentLock.d.ts +328 -0
  126. package/dist/component/contentLock.d.ts.map +1 -0
  127. package/dist/component/contentLock.js +471 -0
  128. package/dist/component/contentLock.js.map +1 -0
  129. package/dist/component/contentTypeMigration.d.ts +411 -0
  130. package/dist/component/contentTypeMigration.d.ts.map +1 -0
  131. package/dist/component/contentTypeMigration.js +805 -0
  132. package/dist/component/contentTypeMigration.js.map +1 -0
  133. package/dist/component/contentTypeMutations.d.ts +975 -0
  134. package/dist/component/contentTypeMutations.d.ts.map +1 -0
  135. package/dist/component/contentTypeMutations.js +768 -0
  136. package/dist/component/contentTypeMutations.js.map +1 -0
  137. package/dist/component/contentTypes.d.ts +538 -0
  138. package/dist/component/contentTypes.d.ts.map +1 -0
  139. package/dist/component/contentTypes.js +304 -0
  140. package/dist/component/contentTypes.js.map +1 -0
  141. package/dist/component/convex.config.d.ts +42 -0
  142. package/dist/component/convex.config.d.ts.map +1 -0
  143. package/dist/component/convex.config.js +43 -0
  144. package/dist/component/convex.config.js.map +1 -0
  145. package/dist/component/documentTypes.d.ts +186 -0
  146. package/dist/component/documentTypes.d.ts.map +1 -0
  147. package/dist/component/documentTypes.js +23 -0
  148. package/dist/component/documentTypes.js.map +1 -0
  149. package/dist/component/eventEmitter.d.ts +281 -0
  150. package/dist/component/eventEmitter.d.ts.map +1 -0
  151. package/dist/component/eventEmitter.js +300 -0
  152. package/dist/component/eventEmitter.js.map +1 -0
  153. package/dist/component/exportImport.d.ts +1120 -0
  154. package/dist/component/exportImport.d.ts.map +1 -0
  155. package/dist/component/exportImport.js +931 -0
  156. package/dist/component/exportImport.js.map +1 -0
  157. package/dist/component/index.d.ts +28 -0
  158. package/dist/component/index.d.ts.map +1 -0
  159. package/dist/component/index.js +142 -0
  160. package/dist/component/index.js.map +1 -0
  161. package/dist/component/lib/deepReferenceResolver.d.ts +252 -0
  162. package/dist/component/lib/deepReferenceResolver.d.ts.map +1 -0
  163. package/dist/component/lib/deepReferenceResolver.js +601 -0
  164. package/dist/component/lib/deepReferenceResolver.js.map +1 -0
  165. package/dist/component/lib/errors.d.ts +306 -0
  166. package/dist/component/lib/errors.d.ts.map +1 -0
  167. package/dist/component/lib/errors.js +407 -0
  168. package/dist/component/lib/errors.js.map +1 -0
  169. package/dist/component/lib/index.d.ts +10 -0
  170. package/dist/component/lib/index.d.ts.map +1 -0
  171. package/dist/component/lib/index.js +33 -0
  172. package/dist/component/lib/index.js.map +1 -0
  173. package/dist/component/lib/mediaReferenceResolver.d.ts +217 -0
  174. package/dist/component/lib/mediaReferenceResolver.d.ts.map +1 -0
  175. package/dist/component/lib/mediaReferenceResolver.js +326 -0
  176. package/dist/component/lib/mediaReferenceResolver.js.map +1 -0
  177. package/dist/component/lib/metadataExtractor.d.ts +245 -0
  178. package/dist/component/lib/metadataExtractor.d.ts.map +1 -0
  179. package/dist/component/lib/metadataExtractor.js +548 -0
  180. package/dist/component/lib/metadataExtractor.js.map +1 -0
  181. package/dist/component/lib/mutationAuth.d.ts +95 -0
  182. package/dist/component/lib/mutationAuth.d.ts.map +1 -0
  183. package/dist/component/lib/mutationAuth.js +146 -0
  184. package/dist/component/lib/mutationAuth.js.map +1 -0
  185. package/dist/component/lib/queries.d.ts +17 -0
  186. package/dist/component/lib/queries.d.ts.map +1 -0
  187. package/dist/component/lib/queries.js +49 -0
  188. package/dist/component/lib/queries.js.map +1 -0
  189. package/dist/component/lib/ragContentChunker.d.ts +423 -0
  190. package/dist/component/lib/ragContentChunker.d.ts.map +1 -0
  191. package/dist/component/lib/ragContentChunker.js +897 -0
  192. package/dist/component/lib/ragContentChunker.js.map +1 -0
  193. package/dist/component/lib/referenceResolver.d.ts +175 -0
  194. package/dist/component/lib/referenceResolver.d.ts.map +1 -0
  195. package/dist/component/lib/referenceResolver.js +293 -0
  196. package/dist/component/lib/referenceResolver.js.map +1 -0
  197. package/dist/component/lib/slugGenerator.d.ts +71 -0
  198. package/dist/component/lib/slugGenerator.d.ts.map +1 -0
  199. package/dist/component/lib/slugGenerator.js +207 -0
  200. package/dist/component/lib/slugGenerator.js.map +1 -0
  201. package/dist/component/lib/slugUniqueness.d.ts +131 -0
  202. package/dist/component/lib/slugUniqueness.d.ts.map +1 -0
  203. package/dist/component/lib/slugUniqueness.js +229 -0
  204. package/dist/component/lib/slugUniqueness.js.map +1 -0
  205. package/dist/component/lib/softDelete.d.ts +18 -0
  206. package/dist/component/lib/softDelete.d.ts.map +1 -0
  207. package/dist/component/lib/softDelete.js +29 -0
  208. package/dist/component/lib/softDelete.js.map +1 -0
  209. package/dist/component/localeFallbackChain.d.ts +410 -0
  210. package/dist/component/localeFallbackChain.d.ts.map +1 -0
  211. package/dist/component/localeFallbackChain.js +467 -0
  212. package/dist/component/localeFallbackChain.js.map +1 -0
  213. package/dist/component/localeFields.d.ts +508 -0
  214. package/dist/component/localeFields.d.ts.map +1 -0
  215. package/dist/component/localeFields.js +592 -0
  216. package/dist/component/localeFields.js.map +1 -0
  217. package/dist/component/mediaAssetMutations.d.ts +235 -0
  218. package/dist/component/mediaAssetMutations.d.ts.map +1 -0
  219. package/dist/component/mediaAssetMutations.js +558 -0
  220. package/dist/component/mediaAssetMutations.js.map +1 -0
  221. package/dist/component/mediaAssets.d.ts +168 -0
  222. package/dist/component/mediaAssets.d.ts.map +1 -0
  223. package/dist/component/mediaAssets.js +618 -0
  224. package/dist/component/mediaAssets.js.map +1 -0
  225. package/dist/component/mediaFolderMutations.d.ts +642 -0
  226. package/dist/component/mediaFolderMutations.d.ts.map +1 -0
  227. package/dist/component/mediaFolderMutations.js +849 -0
  228. package/dist/component/mediaFolderMutations.js.map +1 -0
  229. package/dist/component/mediaUploadMutations.d.ts +136 -0
  230. package/dist/component/mediaUploadMutations.d.ts.map +1 -0
  231. package/dist/component/mediaUploadMutations.js +205 -0
  232. package/dist/component/mediaUploadMutations.js.map +1 -0
  233. package/dist/component/mediaVariantMutations.d.ts +468 -0
  234. package/dist/component/mediaVariantMutations.d.ts.map +1 -0
  235. package/dist/component/mediaVariantMutations.js +737 -0
  236. package/dist/component/mediaVariantMutations.js.map +1 -0
  237. package/dist/component/mediaVariants.d.ts +525 -0
  238. package/dist/component/mediaVariants.d.ts.map +1 -0
  239. package/dist/component/mediaVariants.js +661 -0
  240. package/dist/component/mediaVariants.js.map +1 -0
  241. package/dist/component/ragContentIndexer.d.ts +595 -0
  242. package/dist/component/ragContentIndexer.d.ts.map +1 -0
  243. package/dist/component/ragContentIndexer.js +794 -0
  244. package/dist/component/ragContentIndexer.js.map +1 -0
  245. package/dist/component/rateLimitHooks.d.ts +266 -0
  246. package/dist/component/rateLimitHooks.d.ts.map +1 -0
  247. package/dist/component/rateLimitHooks.js +412 -0
  248. package/dist/component/rateLimitHooks.js.map +1 -0
  249. package/dist/component/roles.d.ts +649 -0
  250. package/dist/component/roles.d.ts.map +1 -0
  251. package/dist/component/roles.js +884 -0
  252. package/dist/component/roles.js.map +1 -0
  253. package/dist/component/scheduledPublish.d.ts +182 -0
  254. package/dist/component/scheduledPublish.d.ts.map +1 -0
  255. package/dist/component/scheduledPublish.js +304 -0
  256. package/dist/component/scheduledPublish.js.map +1 -0
  257. package/dist/component/schema.d.ts +4114 -0
  258. package/dist/component/schema.d.ts.map +1 -0
  259. package/dist/component/schema.js +469 -0
  260. package/dist/component/schema.js.map +1 -0
  261. package/dist/component/taxonomies.d.ts +476 -0
  262. package/dist/component/taxonomies.d.ts.map +1 -0
  263. package/dist/component/taxonomies.js +785 -0
  264. package/dist/component/taxonomies.js.map +1 -0
  265. package/dist/component/taxonomyMutations.d.ts +206 -0
  266. package/dist/component/taxonomyMutations.d.ts.map +1 -0
  267. package/dist/component/taxonomyMutations.js +1001 -0
  268. package/dist/component/taxonomyMutations.js.map +1 -0
  269. package/dist/component/trash.d.ts +265 -0
  270. package/dist/component/trash.d.ts.map +1 -0
  271. package/dist/component/trash.js +621 -0
  272. package/dist/component/trash.js.map +1 -0
  273. package/dist/component/types.d.ts +4 -0
  274. package/dist/component/types.d.ts.map +1 -0
  275. package/dist/component/types.js +2 -0
  276. package/dist/component/types.js.map +1 -0
  277. package/dist/component/userContext.d.ts +508 -0
  278. package/dist/component/userContext.d.ts.map +1 -0
  279. package/dist/component/userContext.js +615 -0
  280. package/dist/component/userContext.js.map +1 -0
  281. package/dist/component/validation.d.ts +387 -0
  282. package/dist/component/validation.d.ts.map +1 -0
  283. package/dist/component/validation.js +1052 -0
  284. package/dist/component/validation.js.map +1 -0
  285. package/dist/component/validators.d.ts +4645 -0
  286. package/dist/component/validators.d.ts.map +1 -0
  287. package/dist/component/validators.js +641 -0
  288. package/dist/component/validators.js.map +1 -0
  289. package/dist/component/versionMutations.d.ts +216 -0
  290. package/dist/component/versionMutations.d.ts.map +1 -0
  291. package/dist/component/versionMutations.js +321 -0
  292. package/dist/component/versionMutations.js.map +1 -0
  293. package/dist/component/webhookTrigger.d.ts +770 -0
  294. package/dist/component/webhookTrigger.d.ts.map +1 -0
  295. package/dist/component/webhookTrigger.js +1413 -0
  296. package/dist/component/webhookTrigger.js.map +1 -0
  297. package/dist/react/index.d.ts +316 -0
  298. package/dist/react/index.d.ts.map +1 -0
  299. package/dist/react/index.js +558 -0
  300. package/dist/react/index.js.map +1 -0
  301. package/dist/test.d.ts +2230 -0
  302. package/dist/test.d.ts.map +1 -0
  303. package/dist/test.js +1107 -0
  304. package/dist/test.js.map +1 -0
  305. package/package.json +95 -0
  306. package/src/cli/commands/admin.ts +104 -0
  307. package/src/cli/index.ts +21 -0
  308. package/src/cli/utils/detectConvexUrl.ts +54 -0
  309. package/src/cli/utils/openBrowser.ts +16 -0
  310. package/src/client/admin-config.ts +138 -0
  311. package/src/client/adminApi.ts +942 -0
  312. package/src/client/agentTools.ts +1311 -0
  313. package/src/client/argTypes.ts +316 -0
  314. package/src/client/field-types.ts +187 -0
  315. package/src/client/index.ts +1301 -0
  316. package/src/client/queryBuilder.ts +1100 -0
  317. package/src/client/schema/codegen.ts +500 -0
  318. package/src/client/schema/defineContentType.ts +501 -0
  319. package/src/client/schema/index.ts +169 -0
  320. package/src/client/schema/schemaDrift.ts +574 -0
  321. package/src/client/schema/typedClient.ts +688 -0
  322. package/src/client/schema/types.ts +666 -0
  323. package/src/client/types.ts +723 -0
  324. package/src/client/workflows.ts +141 -0
  325. package/src/client/wrapper.ts +4304 -0
  326. package/src/component/_generated/api.ts +140 -0
  327. package/src/component/_generated/component.ts +5029 -0
  328. package/src/component/_generated/dataModel.ts +60 -0
  329. package/src/component/_generated/server.ts +156 -0
  330. package/src/component/authorization.ts +647 -0
  331. package/src/component/authorizationHooks.ts +668 -0
  332. package/src/component/bulkOperations.ts +687 -0
  333. package/src/component/contentEntries.ts +1976 -0
  334. package/src/component/contentEntryMutations.ts +1223 -0
  335. package/src/component/contentEntryValidation.ts +707 -0
  336. package/src/component/contentLock.ts +550 -0
  337. package/src/component/contentTypeMigration.ts +1064 -0
  338. package/src/component/contentTypeMutations.ts +969 -0
  339. package/src/component/contentTypes.ts +346 -0
  340. package/src/component/convex.config.ts +44 -0
  341. package/src/component/documentTypes.ts +240 -0
  342. package/src/component/eventEmitter.ts +485 -0
  343. package/src/component/exportImport.ts +1169 -0
  344. package/src/component/index.ts +491 -0
  345. package/src/component/lib/deepReferenceResolver.ts +999 -0
  346. package/src/component/lib/errors.ts +816 -0
  347. package/src/component/lib/index.ts +145 -0
  348. package/src/component/lib/mediaReferenceResolver.ts +495 -0
  349. package/src/component/lib/metadataExtractor.ts +792 -0
  350. package/src/component/lib/mutationAuth.ts +199 -0
  351. package/src/component/lib/queries.ts +79 -0
  352. package/src/component/lib/ragContentChunker.ts +1371 -0
  353. package/src/component/lib/referenceResolver.ts +430 -0
  354. package/src/component/lib/slugGenerator.ts +262 -0
  355. package/src/component/lib/slugUniqueness.ts +333 -0
  356. package/src/component/lib/softDelete.ts +44 -0
  357. package/src/component/localeFallbackChain.ts +673 -0
  358. package/src/component/localeFields.ts +896 -0
  359. package/src/component/mediaAssetMutations.ts +725 -0
  360. package/src/component/mediaAssets.ts +932 -0
  361. package/src/component/mediaFolderMutations.ts +1046 -0
  362. package/src/component/mediaUploadMutations.ts +224 -0
  363. package/src/component/mediaVariantMutations.ts +900 -0
  364. package/src/component/mediaVariants.ts +793 -0
  365. package/src/component/ragContentIndexer.ts +1067 -0
  366. package/src/component/rateLimitHooks.ts +572 -0
  367. package/src/component/roles.ts +1360 -0
  368. package/src/component/scheduledPublish.ts +358 -0
  369. package/src/component/schema.ts +617 -0
  370. package/src/component/taxonomies.ts +949 -0
  371. package/src/component/taxonomyMutations.ts +1210 -0
  372. package/src/component/trash.ts +724 -0
  373. package/src/component/userContext.ts +898 -0
  374. package/src/component/validation.ts +1388 -0
  375. package/src/component/validators.ts +949 -0
  376. package/src/component/versionMutations.ts +392 -0
  377. package/src/component/webhookTrigger.ts +1922 -0
  378. package/src/react/index.ts +898 -0
  379. package/src/test.ts +1580 -0
@@ -0,0 +1,897 @@
1
+ /**
2
+ * RAG Content Chunker
3
+ *
4
+ * Utility to extract and structure content from CMS entries for @convex-dev/rag indexing.
5
+ * This module provides:
6
+ *
7
+ * 1. **Content Extraction**: Extracts text from various CMS field types (text, richText, json, etc.)
8
+ * 2. **Semantic Chunking**: Splits content into meaningful chunks optimized for embedding
9
+ * 3. **Metadata Tagging**: Attaches relevant metadata (content type, field source, locale, etc.)
10
+ * 4. **Reference Handling**: Processes embedded references and includes contextual information
11
+ *
12
+ * The output is designed to be directly compatible with @convex-dev/rag's `add()` function.
13
+ *
14
+ * @example
15
+ * ```typescript
16
+ * import { extractContentForRag, chunkContentEntry } from "@convex-cms/core/lib";
17
+ * import { rag } from "@convex-dev/rag";
18
+ *
19
+ * // In a Convex action
20
+ * const chunks = await chunkContentEntry(ctx, entry, contentType, {
21
+ * includeMetadata: true,
22
+ * chunkOptions: { maxCharsSoftLimit: 1000 },
23
+ * });
24
+ *
25
+ * await rag.add(ctx, {
26
+ * namespace: "cms-content",
27
+ * key: entry._id,
28
+ * chunks: chunks.map(c => c.text),
29
+ * title: chunks[0]?.metadata?.title,
30
+ * });
31
+ * ```
32
+ *
33
+ * @module
34
+ */
35
+ // =============================================================================
36
+ // Default Configuration
37
+ // =============================================================================
38
+ const DEFAULT_CHUNK_OPTIONS = {
39
+ minLines: 1,
40
+ minCharsSoftLimit: 100,
41
+ maxCharsSoftLimit: 1000,
42
+ maxCharsHardLimit: 4000,
43
+ delimiter: "\n\n",
44
+ fallbackDelimiters: ["\n", ". ", ", "],
45
+ preserveHeadingContext: true,
46
+ overlapChars: 50,
47
+ };
48
+ const DEFAULT_EXTRACTION_OPTIONS = {
49
+ includeMetadata: true,
50
+ includeFields: [],
51
+ excludeFields: [],
52
+ extractRichText: true,
53
+ extractJson: true,
54
+ includeReferenceContext: true,
55
+ chunkOptions: DEFAULT_CHUNK_OPTIONS,
56
+ chunkPrefix: "",
57
+ chunkSuffix: "",
58
+ createSummaryChunk: false,
59
+ summaryFields: [],
60
+ };
61
+ // =============================================================================
62
+ // Text Extraction Functions
63
+ // =============================================================================
64
+ /**
65
+ * Extracts plain text from a rich text field value.
66
+ *
67
+ * Handles common rich text formats:
68
+ * - HTML strings (strips tags)
69
+ * - ProseMirror/Tiptap JSON structure
70
+ * - Markdown strings
71
+ * - Plain text strings
72
+ *
73
+ * @param value - The rich text field value
74
+ * @returns Plain text content
75
+ */
76
+ export function extractTextFromRichText(value) {
77
+ if (value === null || value === undefined) {
78
+ return "";
79
+ }
80
+ // Handle string values (HTML, Markdown, or plain text)
81
+ if (typeof value === "string") {
82
+ return stripHtmlTags(value);
83
+ }
84
+ // Handle ProseMirror/Tiptap JSON structure
85
+ if (typeof value === "object" && value !== null) {
86
+ const obj = value;
87
+ // Check for ProseMirror doc structure
88
+ if (obj.type === "doc" && Array.isArray(obj.content)) {
89
+ return extractTextFromProseMirrorDoc(obj);
90
+ }
91
+ // Check for array of blocks
92
+ if (Array.isArray(value)) {
93
+ return value.map((block) => extractTextFromRichText(block)).join("\n\n");
94
+ }
95
+ // Generic object - try to find text content
96
+ if ("text" in obj && typeof obj.text === "string") {
97
+ return obj.text;
98
+ }
99
+ if ("content" in obj && typeof obj.content === "string") {
100
+ return obj.content;
101
+ }
102
+ }
103
+ return "";
104
+ }
105
+ /**
106
+ * Extracts text from a ProseMirror document structure.
107
+ */
108
+ function extractTextFromProseMirrorDoc(doc) {
109
+ const content = doc.content;
110
+ if (!Array.isArray(content)) {
111
+ return "";
112
+ }
113
+ const textParts = [];
114
+ for (const node of content) {
115
+ if (typeof node !== "object" || node === null)
116
+ continue;
117
+ const nodeObj = node;
118
+ const nodeType = nodeObj.type;
119
+ switch (nodeType) {
120
+ case "paragraph":
121
+ case "heading":
122
+ textParts.push(extractTextFromProseMirrorNode(nodeObj));
123
+ break;
124
+ case "bulletList":
125
+ case "orderedList":
126
+ textParts.push(extractTextFromProseMirrorList(nodeObj));
127
+ break;
128
+ case "blockquote": {
129
+ const quoteText = extractTextFromProseMirrorDoc(nodeObj);
130
+ textParts.push(`"${quoteText}"`);
131
+ break;
132
+ }
133
+ case "codeBlock":
134
+ if (nodeObj.content && Array.isArray(nodeObj.content)) {
135
+ const codeText = nodeObj.content
136
+ .map((c) => c.text || "")
137
+ .join("");
138
+ textParts.push(codeText);
139
+ }
140
+ break;
141
+ case "horizontalRule":
142
+ // Skip horizontal rules
143
+ break;
144
+ default:
145
+ // Try generic extraction
146
+ if (nodeObj.content) {
147
+ textParts.push(extractTextFromProseMirrorDoc(nodeObj));
148
+ }
149
+ }
150
+ }
151
+ return textParts.filter(Boolean).join("\n\n");
152
+ }
153
+ /**
154
+ * Extracts text from a ProseMirror node with inline content.
155
+ */
156
+ function extractTextFromProseMirrorNode(node) {
157
+ const content = node.content;
158
+ if (!Array.isArray(content)) {
159
+ return "";
160
+ }
161
+ return content
162
+ .map((child) => {
163
+ if (typeof child !== "object" || child === null)
164
+ return "";
165
+ const childObj = child;
166
+ if (childObj.type === "text") {
167
+ return childObj.text || "";
168
+ }
169
+ // Handle inline nodes with content
170
+ if (childObj.content) {
171
+ return extractTextFromProseMirrorNode(childObj);
172
+ }
173
+ return "";
174
+ })
175
+ .join("");
176
+ }
177
+ /**
178
+ * Extracts text from a ProseMirror list node.
179
+ */
180
+ function extractTextFromProseMirrorList(list) {
181
+ const items = list.content;
182
+ if (!Array.isArray(items)) {
183
+ return "";
184
+ }
185
+ return items
186
+ .map((item, _index) => {
187
+ if (typeof item !== "object" || item === null)
188
+ return "";
189
+ const itemObj = item;
190
+ const itemText = extractTextFromProseMirrorDoc(itemObj);
191
+ return `- ${itemText}`;
192
+ })
193
+ .join("\n");
194
+ }
195
+ /**
196
+ * Strips HTML tags from a string, preserving structure where possible.
197
+ */
198
+ export function stripHtmlTags(html) {
199
+ if (!html)
200
+ return "";
201
+ // First, add newlines for block elements
202
+ let text = html
203
+ .replace(/<\/?(p|div|br|h[1-6]|li|tr)[^>]*>/gi, "\n")
204
+ .replace(/<\/?(ul|ol|table|blockquote)[^>]*>/gi, "\n\n");
205
+ // Remove remaining HTML tags
206
+ text = text.replace(/<[^>]*>/g, "");
207
+ // Decode common HTML entities
208
+ text = text
209
+ .replace(/&nbsp;/g, " ")
210
+ .replace(/&amp;/g, "&")
211
+ .replace(/&lt;/g, "<")
212
+ .replace(/&gt;/g, ">")
213
+ .replace(/&quot;/g, '"')
214
+ .replace(/&#39;/g, "'")
215
+ .replace(/&mdash;/g, "—")
216
+ .replace(/&ndash;/g, "–");
217
+ // Clean up whitespace
218
+ text = text
219
+ .split("\n")
220
+ .map((line) => line.trim())
221
+ .join("\n")
222
+ .replace(/\n{3,}/g, "\n\n")
223
+ .trim();
224
+ return text;
225
+ }
226
+ /**
227
+ * Extracts text from a JSON field value.
228
+ *
229
+ * Recursively extracts string values from objects and arrays.
230
+ * Useful for structured data fields that may contain text content.
231
+ *
232
+ * @param value - The JSON field value
233
+ * @param maxDepth - Maximum recursion depth
234
+ * @returns Extracted text content
235
+ */
236
+ export function extractTextFromJson(value, maxDepth = 5) {
237
+ if (maxDepth <= 0)
238
+ return "";
239
+ if (value === null || value === undefined) {
240
+ return "";
241
+ }
242
+ if (typeof value === "string") {
243
+ return value;
244
+ }
245
+ if (typeof value === "number" || typeof value === "boolean") {
246
+ return String(value);
247
+ }
248
+ if (Array.isArray(value)) {
249
+ return value
250
+ .map((item) => extractTextFromJson(item, maxDepth - 1))
251
+ .filter(Boolean)
252
+ .join(", ");
253
+ }
254
+ if (typeof value === "object") {
255
+ const obj = value;
256
+ const textParts = [];
257
+ // Prioritize common text field names
258
+ const priorityKeys = [
259
+ "text",
260
+ "content",
261
+ "value",
262
+ "label",
263
+ "title",
264
+ "name",
265
+ "description",
266
+ ];
267
+ const seenKeys = new Set();
268
+ for (const key of priorityKeys) {
269
+ if (key in obj) {
270
+ const extracted = extractTextFromJson(obj[key], maxDepth - 1);
271
+ if (extracted) {
272
+ textParts.push(extracted);
273
+ seenKeys.add(key);
274
+ }
275
+ }
276
+ }
277
+ // Then process remaining keys
278
+ for (const [key, val] of Object.entries(obj)) {
279
+ if (seenKeys.has(key))
280
+ continue;
281
+ // Skip internal/system keys
282
+ if (key.startsWith("_") || key.startsWith("$"))
283
+ continue;
284
+ const extracted = extractTextFromJson(val, maxDepth - 1);
285
+ if (extracted) {
286
+ textParts.push(extracted);
287
+ }
288
+ }
289
+ return textParts.join(" ");
290
+ }
291
+ return "";
292
+ }
293
+ /**
294
+ * Extracts text from a select or multiSelect field value.
295
+ */
296
+ export function extractTextFromSelect(value) {
297
+ if (value === null || value === undefined) {
298
+ return "";
299
+ }
300
+ if (typeof value === "string") {
301
+ return value;
302
+ }
303
+ if (Array.isArray(value)) {
304
+ return value.filter((v) => typeof v === "string").join(", ");
305
+ }
306
+ return "";
307
+ }
308
+ // =============================================================================
309
+ // Content Extraction
310
+ // =============================================================================
311
+ /**
312
+ * Extracts text content from a content entry based on its content type schema.
313
+ *
314
+ * This function:
315
+ * 1. Iterates through fields defined in the content type
316
+ * 2. Extracts text from each field based on its type
317
+ * 3. Tracks references (content and media)
318
+ * 4. Builds a combined text representation
319
+ *
320
+ * @param entry - The content entry to extract from
321
+ * @param contentType - The content type definition
322
+ * @param options - Extraction options
323
+ * @param resolvedReferences - Optional map of resolved reference information
324
+ * @returns Extracted content with metadata
325
+ *
326
+ * @example
327
+ * ```typescript
328
+ * const extracted = extractContent(entry, contentType, {
329
+ * includeFields: ["title", "content", "excerpt"],
330
+ * extractRichText: true,
331
+ * });
332
+ *
333
+ * console.log(extracted.fullText);
334
+ * // "My Blog Post\n\nThis is the main content...\n\nA brief excerpt."
335
+ * ```
336
+ */
337
+ export function extractContent(entry, contentType, options = {}, resolvedReferences) {
338
+ const opts = { ...DEFAULT_EXTRACTION_OPTIONS, ...options };
339
+ const data = entry.data || {};
340
+ const fieldTexts = {};
341
+ const sourceInfo = [];
342
+ const referencedEntryIds = [];
343
+ const referencedMediaIds = [];
344
+ let title;
345
+ // Determine which fields to process
346
+ const fieldsToProcess = contentType.fields.filter((field) => {
347
+ // Check include list
348
+ if (opts.includeFields && opts.includeFields.length > 0) {
349
+ if (!opts.includeFields.includes(field.name))
350
+ return false;
351
+ }
352
+ // Check exclude list
353
+ if (opts.excludeFields && opts.excludeFields.length > 0) {
354
+ if (opts.excludeFields.includes(field.name))
355
+ return false;
356
+ }
357
+ return true;
358
+ });
359
+ // Process each field
360
+ for (const field of fieldsToProcess) {
361
+ const value = data[field.name];
362
+ if (value === null || value === undefined)
363
+ continue;
364
+ let extractedText = "";
365
+ switch (field.type) {
366
+ case "text":
367
+ extractedText = typeof value === "string" ? value : String(value);
368
+ break;
369
+ case "richText":
370
+ if (opts.extractRichText) {
371
+ extractedText = extractTextFromRichText(value);
372
+ }
373
+ break;
374
+ case "json":
375
+ if (opts.extractJson) {
376
+ extractedText = extractTextFromJson(value);
377
+ }
378
+ break;
379
+ case "select":
380
+ case "multiSelect":
381
+ extractedText = extractTextFromSelect(value);
382
+ break;
383
+ case "reference": {
384
+ // Track reference IDs
385
+ const refIds = extractReferenceIds(value, field);
386
+ referencedEntryIds.push(...refIds);
387
+ // Include reference context if available
388
+ if (opts.includeReferenceContext && resolvedReferences) {
389
+ const refTexts = refIds
390
+ .map((id) => {
391
+ const ref = resolvedReferences.get(id);
392
+ if (ref && ref.title) {
393
+ return `[${ref.title}]`;
394
+ }
395
+ return null;
396
+ })
397
+ .filter(Boolean);
398
+ if (refTexts.length > 0) {
399
+ extractedText = `Referenced: ${refTexts.join(", ")}`;
400
+ }
401
+ }
402
+ break;
403
+ }
404
+ case "media": {
405
+ // Track media IDs
406
+ const mediaIds = extractMediaIds(value, field);
407
+ referencedMediaIds.push(...mediaIds);
408
+ // Media doesn't contribute to text content
409
+ break;
410
+ }
411
+ case "number":
412
+ case "boolean":
413
+ case "date":
414
+ case "datetime":
415
+ // These can optionally be included as context
416
+ extractedText = formatFieldValue(value, field.type);
417
+ break;
418
+ default:
419
+ // Unknown field type - try generic extraction
420
+ if (typeof value === "string") {
421
+ extractedText = value;
422
+ }
423
+ }
424
+ if (extractedText) {
425
+ fieldTexts[field.name] = extractedText;
426
+ sourceInfo.push({
427
+ fieldName: field.name,
428
+ fieldLabel: field.label,
429
+ fieldType: field.type,
430
+ charCount: extractedText.length,
431
+ });
432
+ // Extract title from title field
433
+ if (field.name === contentType.titleField) {
434
+ title = extractedText;
435
+ }
436
+ }
437
+ }
438
+ // Build full text with field labels for context
439
+ const fullTextParts = [];
440
+ // Add title first if available
441
+ if (title) {
442
+ fullTextParts.push(title);
443
+ }
444
+ // Add other fields
445
+ for (const field of fieldsToProcess) {
446
+ if (field.name === contentType.titleField)
447
+ continue; // Already added
448
+ const text = fieldTexts[field.name];
449
+ if (text) {
450
+ fullTextParts.push(text);
451
+ }
452
+ }
453
+ return {
454
+ fullText: fullTextParts.join("\n\n"),
455
+ fieldTexts,
456
+ title,
457
+ referencedEntryIds,
458
+ referencedMediaIds,
459
+ sourceInfo,
460
+ };
461
+ }
462
+ /**
463
+ * Extracts reference IDs from a reference field value.
464
+ */
465
+ function extractReferenceIds(value, field) {
466
+ if (value === null || value === undefined) {
467
+ return [];
468
+ }
469
+ const isMultiple = field.options?.multiple === true;
470
+ if (isMultiple && Array.isArray(value)) {
471
+ return value.filter((v) => typeof v === "string");
472
+ }
473
+ if (typeof value === "string") {
474
+ return [value];
475
+ }
476
+ return [];
477
+ }
478
+ /**
479
+ * Extracts media IDs from a media field value.
480
+ */
481
+ function extractMediaIds(value, field) {
482
+ if (value === null || value === undefined) {
483
+ return [];
484
+ }
485
+ const isMultiple = field.options?.multiple === true;
486
+ if (isMultiple && Array.isArray(value)) {
487
+ return value.filter((v) => typeof v === "string");
488
+ }
489
+ if (typeof value === "string") {
490
+ return [value];
491
+ }
492
+ return [];
493
+ }
494
+ /**
495
+ * Formats a field value for text representation.
496
+ */
497
+ function formatFieldValue(value, fieldType) {
498
+ if (value === null || value === undefined) {
499
+ return "";
500
+ }
501
+ switch (fieldType) {
502
+ case "number":
503
+ return typeof value === "number" ? value.toString() : String(value);
504
+ case "boolean":
505
+ return value ? "Yes" : "No";
506
+ case "date":
507
+ case "datetime":
508
+ if (typeof value === "string") {
509
+ return value;
510
+ }
511
+ if (typeof value === "number") {
512
+ return new Date(value).toISOString();
513
+ }
514
+ return String(value);
515
+ default:
516
+ return String(value);
517
+ }
518
+ }
519
+ // =============================================================================
520
+ // Text Chunking
521
+ // =============================================================================
522
+ /**
523
+ * Splits text into semantic chunks optimized for embedding.
524
+ *
525
+ * The algorithm:
526
+ * 1. First tries to split on paragraph breaks (default delimiter)
527
+ * 2. Falls back to line breaks if paragraphs are too large
528
+ * 3. Falls back to sentence boundaries if lines are too large
529
+ * 4. Force-splits at hard limit if necessary
530
+ * 5. Optionally preserves heading context
531
+ *
532
+ * @param text - The text to chunk
533
+ * @param options - Chunking options
534
+ * @returns Array of text chunks
535
+ *
536
+ * @example
537
+ * ```typescript
538
+ * const chunks = chunkText(longArticle, {
539
+ * maxCharsSoftLimit: 1000,
540
+ * preserveHeadingContext: true,
541
+ * });
542
+ * ```
543
+ */
544
+ export function chunkText(text, options = {}) {
545
+ const opts = { ...DEFAULT_CHUNK_OPTIONS, ...options };
546
+ if (!text || text.trim().length === 0) {
547
+ return [];
548
+ }
549
+ // If text is small enough, return as single chunk
550
+ if (text.length <= opts.maxCharsSoftLimit) {
551
+ return [text.trim()];
552
+ }
553
+ const chunks = [];
554
+ let currentHeading = null;
555
+ // Split by primary delimiter
556
+ let segments = text.split(opts.delimiter);
557
+ // If we have very few segments, try secondary splitting
558
+ if (segments.length <= 2 && text.length > opts.maxCharsSoftLimit) {
559
+ for (const fallback of opts.fallbackDelimiters) {
560
+ const fallbackSegments = text.split(fallback);
561
+ if (fallbackSegments.length > segments.length) {
562
+ segments = fallbackSegments;
563
+ break;
564
+ }
565
+ }
566
+ }
567
+ let currentChunk = "";
568
+ for (const segment of segments) {
569
+ const trimmedSegment = segment.trim();
570
+ if (!trimmedSegment)
571
+ continue;
572
+ // Detect headings (lines that look like titles)
573
+ const isHeading = detectHeading(trimmedSegment);
574
+ if (isHeading && opts.preserveHeadingContext) {
575
+ currentHeading = trimmedSegment;
576
+ }
577
+ // Check if adding this segment would exceed soft limit
578
+ const potentialChunk = currentChunk
579
+ ? `${currentChunk}\n\n${trimmedSegment}`
580
+ : trimmedSegment;
581
+ if (potentialChunk.length > opts.maxCharsSoftLimit && currentChunk) {
582
+ // Save current chunk
583
+ chunks.push(finalizeChunk(currentChunk, currentHeading, opts));
584
+ // Start new chunk, potentially with heading context
585
+ if (opts.preserveHeadingContext && currentHeading && !isHeading) {
586
+ currentChunk = `${currentHeading}\n\n${trimmedSegment}`;
587
+ }
588
+ else {
589
+ currentChunk = trimmedSegment;
590
+ }
591
+ }
592
+ else {
593
+ currentChunk = potentialChunk;
594
+ }
595
+ // Handle segments that are too large even alone
596
+ if (currentChunk.length > opts.maxCharsHardLimit) {
597
+ const subChunks = forceSplitText(currentChunk, opts);
598
+ chunks.push(...subChunks.slice(0, -1));
599
+ currentChunk = subChunks[subChunks.length - 1] || "";
600
+ }
601
+ }
602
+ // Don't forget the last chunk
603
+ if (currentChunk.trim()) {
604
+ chunks.push(finalizeChunk(currentChunk, null, opts));
605
+ }
606
+ return chunks;
607
+ }
608
+ /**
609
+ * Detects if a text segment is likely a heading.
610
+ */
611
+ function detectHeading(text) {
612
+ const trimmed = text.trim();
613
+ // Short lines that don't end with sentence punctuation are likely headings
614
+ if (trimmed.length < 100 && !trimmed.match(/[.!?]$/)) {
615
+ // Check if it starts with heading patterns
616
+ if (trimmed.match(/^#{1,6}\s/) || // Markdown headings
617
+ trimmed.match(/^[A-Z][\w\s]+:?$/) || // Title Case lines
618
+ trimmed.match(/^\d+\.\s+[A-Z]/)) {
619
+ // Numbered sections
620
+ return true;
621
+ }
622
+ }
623
+ return false;
624
+ }
625
+ /**
626
+ * Finalizes a chunk by adding overlap if needed.
627
+ */
628
+ function finalizeChunk(chunk, _heading, _opts) {
629
+ return chunk.trim();
630
+ }
631
+ /**
632
+ * Force-splits text that exceeds the hard limit.
633
+ */
634
+ function forceSplitText(text, opts) {
635
+ const chunks = [];
636
+ let remaining = text;
637
+ while (remaining.length > opts.maxCharsHardLimit) {
638
+ // Try to find a good split point
639
+ let splitPoint = opts.maxCharsSoftLimit;
640
+ // Look for sentence boundary
641
+ const sentenceEnd = remaining.lastIndexOf(". ", splitPoint);
642
+ if (sentenceEnd > opts.minCharsSoftLimit) {
643
+ splitPoint = sentenceEnd + 1;
644
+ }
645
+ else {
646
+ // Look for word boundary
647
+ const spacePoint = remaining.lastIndexOf(" ", splitPoint);
648
+ if (spacePoint > opts.minCharsSoftLimit) {
649
+ splitPoint = spacePoint;
650
+ }
651
+ }
652
+ chunks.push(remaining.slice(0, splitPoint).trim());
653
+ remaining = remaining.slice(splitPoint).trim();
654
+ }
655
+ if (remaining) {
656
+ chunks.push(remaining);
657
+ }
658
+ return chunks;
659
+ }
660
+ // =============================================================================
661
+ // Main API Functions
662
+ // =============================================================================
663
+ /**
664
+ * Processes a content entry into chunks ready for RAG indexing.
665
+ *
666
+ * This is the main function to use for preparing CMS content for @convex-dev/rag.
667
+ * It combines extraction and chunking with full metadata.
668
+ *
669
+ * @param entry - The content entry to process
670
+ * @param contentType - The content type definition
671
+ * @param options - Extraction and chunking options
672
+ * @param resolvedReferences - Optional map of resolved references for context
673
+ * @returns Array of content chunks with metadata
674
+ *
675
+ * @example
676
+ * ```typescript
677
+ * // In a Convex action
678
+ * export const indexEntry = action({
679
+ * args: { entryId: v.id("contentEntries") },
680
+ * handler: async (ctx, { entryId }) => {
681
+ * const entry = await ctx.runQuery(api.contentEntries.get, { id: entryId });
682
+ * const contentType = await ctx.runQuery(api.contentTypes.get, {
683
+ * id: entry.contentTypeId
684
+ * });
685
+ *
686
+ * const chunks = chunkContentEntry(entry, contentType, {
687
+ * chunkOptions: { maxCharsSoftLimit: 800 },
688
+ * includeMetadata: true,
689
+ * });
690
+ *
691
+ * // Add to RAG index
692
+ * await rag.add(ctx, {
693
+ * namespace: `cms:${contentType.name}`,
694
+ * key: entryId,
695
+ * chunks: chunks.map(c => c.text),
696
+ * title: entry.data.title,
697
+ * });
698
+ *
699
+ * return { indexed: chunks.length };
700
+ * },
701
+ * });
702
+ * ```
703
+ */
704
+ export function chunkContentEntry(entry, contentType, options = {}, resolvedReferences) {
705
+ const opts = { ...DEFAULT_EXTRACTION_OPTIONS, ...options };
706
+ // Extract content from the entry
707
+ const extracted = extractContent(entry, contentType, opts, resolvedReferences);
708
+ if (!extracted.fullText) {
709
+ return [];
710
+ }
711
+ // Apply prefix/suffix to full text before chunking
712
+ let textToChunk = extracted.fullText;
713
+ if (opts.chunkPrefix) {
714
+ const prefix = opts.chunkPrefix
715
+ .replace("{contentType}", contentType.displayName)
716
+ .replace("{title}", extracted.title || entry.slug)
717
+ .replace("{slug}", entry.slug);
718
+ textToChunk = `${prefix}\n\n${textToChunk}`;
719
+ }
720
+ if (opts.chunkSuffix) {
721
+ textToChunk = `${textToChunk}\n\n${opts.chunkSuffix}`;
722
+ }
723
+ // Chunk the text
724
+ const textChunks = chunkText(textToChunk, opts.chunkOptions);
725
+ // Build content chunks with metadata
726
+ const chunks = textChunks.map((text, index) => {
727
+ const metadata = {
728
+ entryId: entry._id,
729
+ contentType: contentType.name,
730
+ contentTypeDisplayName: contentType.displayName,
731
+ slug: entry.slug,
732
+ status: entry.status,
733
+ locale: entry.locale,
734
+ sourceFields: extracted.sourceInfo.map((s) => s.fieldName),
735
+ chunkIndex: index,
736
+ totalChunks: textChunks.length,
737
+ title: extracted.title,
738
+ createdAt: new Date(entry._creationTime).toISOString(),
739
+ firstPublishedAt: entry.firstPublishedAt
740
+ ? new Date(entry.firstPublishedAt).toISOString()
741
+ : undefined,
742
+ lastPublishedAt: entry.lastPublishedAt
743
+ ? new Date(entry.lastPublishedAt).toISOString()
744
+ : undefined,
745
+ version: entry.version,
746
+ referencedEntryIds: extracted.referencedEntryIds.length > 0
747
+ ? extracted.referencedEntryIds
748
+ : undefined,
749
+ referencedMediaIds: extracted.referencedMediaIds.length > 0
750
+ ? extracted.referencedMediaIds
751
+ : undefined,
752
+ semanticType: detectSemanticType(text),
753
+ };
754
+ return opts.includeMetadata ? { text, metadata } : { text, metadata };
755
+ });
756
+ // Optionally create a summary chunk
757
+ if (opts.createSummaryChunk && chunks.length > 0) {
758
+ const summaryChunk = createSummaryChunk(entry, contentType, extracted, chunks.length);
759
+ chunks.unshift(summaryChunk);
760
+ // Update chunk indices
761
+ chunks.forEach((chunk, index) => {
762
+ chunk.metadata.chunkIndex = index;
763
+ chunk.metadata.totalChunks = chunks.length;
764
+ });
765
+ }
766
+ return chunks;
767
+ }
768
+ /**
769
+ * Detects the semantic type of a chunk based on its content.
770
+ */
771
+ function detectSemanticType(text) {
772
+ const trimmed = text.trim();
773
+ // Check for headings
774
+ if (trimmed.match(/^#{1,6}\s/) ||
775
+ (trimmed.length < 100 && !trimmed.includes("\n"))) {
776
+ const lines = trimmed.split("\n");
777
+ if (lines.length === 1 && !trimmed.match(/[.!?]$/)) {
778
+ return lines[0].length < 20 ? "title" : "heading";
779
+ }
780
+ }
781
+ // Check for lists
782
+ if (trimmed.match(/^[-*]\s/m) || trimmed.match(/^\d+\.\s/m)) {
783
+ return "list";
784
+ }
785
+ // Check for quotes
786
+ if (trimmed.startsWith('"') || trimmed.startsWith(">")) {
787
+ return "quote";
788
+ }
789
+ // Check for code
790
+ if (trimmed.startsWith("```") || trimmed.match(/^\s{4}/m)) {
791
+ return "code";
792
+ }
793
+ // Default to paragraph or mixed
794
+ return trimmed.includes("\n\n") ? "mixed" : "paragraph";
795
+ }
796
+ /**
797
+ * Creates a summary chunk from key fields.
798
+ */
799
+ function createSummaryChunk(entry, contentType, extracted, totalChunks) {
800
+ const summaryParts = [];
801
+ // Add title
802
+ if (extracted.title) {
803
+ summaryParts.push(`Title: ${extracted.title}`);
804
+ }
805
+ // Add content type
806
+ summaryParts.push(`Type: ${contentType.displayName}`);
807
+ // Add status and dates
808
+ summaryParts.push(`Status: ${entry.status}`);
809
+ if (entry.lastPublishedAt) {
810
+ summaryParts.push(`Published: ${new Date(entry.lastPublishedAt).toLocaleDateString()}`);
811
+ }
812
+ // Add brief excerpt from first field
813
+ const firstField = Object.keys(extracted.fieldTexts)[0];
814
+ if (firstField && extracted.fieldTexts[firstField]) {
815
+ const excerpt = extracted.fieldTexts[firstField].slice(0, 200);
816
+ summaryParts.push(`Summary: ${excerpt}${excerpt.length >= 200 ? "..." : ""}`);
817
+ }
818
+ return {
819
+ text: summaryParts.join("\n"),
820
+ metadata: {
821
+ entryId: entry._id,
822
+ contentType: contentType.name,
823
+ contentTypeDisplayName: contentType.displayName,
824
+ slug: entry.slug,
825
+ status: entry.status,
826
+ locale: entry.locale,
827
+ sourceFields: ["_summary"],
828
+ chunkIndex: 0,
829
+ totalChunks: totalChunks + 1,
830
+ title: extracted.title,
831
+ createdAt: new Date(entry._creationTime).toISOString(),
832
+ firstPublishedAt: entry.firstPublishedAt
833
+ ? new Date(entry.firstPublishedAt).toISOString()
834
+ : undefined,
835
+ lastPublishedAt: entry.lastPublishedAt
836
+ ? new Date(entry.lastPublishedAt).toISOString()
837
+ : undefined,
838
+ version: entry.version,
839
+ semanticType: "field_value",
840
+ },
841
+ };
842
+ }
843
+ // =============================================================================
844
+ // Batch Processing Utilities
845
+ // =============================================================================
846
+ /**
847
+ * Processes multiple content entries into chunks.
848
+ *
849
+ * Useful for batch indexing operations.
850
+ *
851
+ * @param entries - Array of content entries
852
+ * @param contentTypes - Map of content type ID to content type
853
+ * @param options - Extraction options
854
+ * @returns Map of entry ID to chunks
855
+ */
856
+ export function chunkMultipleEntries(entries, contentTypes, options = {}) {
857
+ const results = new Map();
858
+ for (const entry of entries) {
859
+ const contentType = contentTypes.get(entry.contentTypeId);
860
+ if (!contentType) {
861
+ console.warn(`Content type not found for entry ${entry._id}`);
862
+ continue;
863
+ }
864
+ const chunks = chunkContentEntry(entry, contentType, options);
865
+ results.set(entry._id, chunks);
866
+ }
867
+ return results;
868
+ }
869
+ /**
870
+ * Calculates the total character count and chunk count for entries.
871
+ *
872
+ * Useful for estimating indexing costs.
873
+ */
874
+ export function estimateChunkingStats(entries, contentTypes, options = {}) {
875
+ let totalChunks = 0;
876
+ let totalCharacters = 0;
877
+ for (const entry of entries) {
878
+ const contentType = contentTypes.get(entry.contentTypeId);
879
+ if (!contentType)
880
+ continue;
881
+ const chunks = chunkContentEntry(entry, contentType, options);
882
+ totalChunks += chunks.length;
883
+ totalCharacters += chunks.reduce((sum, c) => sum + c.text.length, 0);
884
+ }
885
+ return {
886
+ totalEntries: entries.length,
887
+ totalChunks,
888
+ totalCharacters,
889
+ averageChunksPerEntry: entries.length > 0 ? totalChunks / entries.length : 0,
890
+ averageCharsPerChunk: totalChunks > 0 ? totalCharacters / totalChunks : 0,
891
+ };
892
+ }
893
+ // =============================================================================
894
+ // Exports
895
+ // =============================================================================
896
+ export { DEFAULT_CHUNK_OPTIONS, DEFAULT_EXTRACTION_OPTIONS };
897
+ //# sourceMappingURL=ragContentChunker.js.map