wikimem 0.3.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. package/CHANGELOG.md +138 -29
  2. package/README.md +171 -309
  3. package/dist/cli/commands/ask.d.ts +3 -0
  4. package/dist/cli/commands/ask.d.ts.map +1 -0
  5. package/dist/cli/commands/ask.js +63 -0
  6. package/dist/cli/commands/ask.js.map +1 -0
  7. package/dist/cli/commands/export.d.ts +3 -0
  8. package/dist/cli/commands/export.d.ts.map +1 -0
  9. package/dist/cli/commands/export.js +108 -0
  10. package/dist/cli/commands/export.js.map +1 -0
  11. package/dist/cli/commands/history.d.ts +3 -0
  12. package/dist/cli/commands/history.d.ts.map +1 -0
  13. package/dist/cli/commands/history.js +61 -0
  14. package/dist/cli/commands/history.js.map +1 -0
  15. package/dist/cli/commands/improve.d.ts.map +1 -1
  16. package/dist/cli/commands/improve.js +4 -3
  17. package/dist/cli/commands/improve.js.map +1 -1
  18. package/dist/cli/commands/ingest.d.ts.map +1 -1
  19. package/dist/cli/commands/ingest.js +5 -4
  20. package/dist/cli/commands/ingest.js.map +1 -1
  21. package/dist/cli/commands/init.d.ts.map +1 -1
  22. package/dist/cli/commands/init.js +337 -81
  23. package/dist/cli/commands/init.js.map +1 -1
  24. package/dist/cli/commands/lint.d.ts.map +1 -1
  25. package/dist/cli/commands/lint.js +4 -3
  26. package/dist/cli/commands/lint.js.map +1 -1
  27. package/dist/cli/commands/mcp.d.ts +3 -0
  28. package/dist/cli/commands/mcp.d.ts.map +1 -0
  29. package/dist/cli/commands/mcp.js +11 -0
  30. package/dist/cli/commands/mcp.js.map +1 -0
  31. package/dist/cli/commands/open.d.ts +3 -0
  32. package/dist/cli/commands/open.d.ts.map +1 -0
  33. package/dist/cli/commands/open.js +36 -0
  34. package/dist/cli/commands/open.js.map +1 -0
  35. package/dist/cli/commands/query.d.ts.map +1 -1
  36. package/dist/cli/commands/query.js +5 -4
  37. package/dist/cli/commands/query.js.map +1 -1
  38. package/dist/cli/commands/search.d.ts +3 -0
  39. package/dist/cli/commands/search.d.ts.map +1 -0
  40. package/dist/cli/commands/search.js +61 -0
  41. package/dist/cli/commands/search.js.map +1 -0
  42. package/dist/cli/commands/serve.d.ts.map +1 -1
  43. package/dist/cli/commands/serve.js +41 -2
  44. package/dist/cli/commands/serve.js.map +1 -1
  45. package/dist/cli/commands/watch.d.ts.map +1 -1
  46. package/dist/cli/commands/watch.js +4 -3
  47. package/dist/cli/commands/watch.js.map +1 -1
  48. package/dist/cli/index.d.ts.map +1 -1
  49. package/dist/cli/index.js +27 -1
  50. package/dist/cli/index.js.map +1 -1
  51. package/dist/core/audit-trail.d.ts +15 -0
  52. package/dist/core/audit-trail.d.ts.map +1 -0
  53. package/dist/core/audit-trail.js +43 -0
  54. package/dist/core/audit-trail.js.map +1 -0
  55. package/dist/core/claude-code.d.ts +10 -0
  56. package/dist/core/claude-code.d.ts.map +1 -0
  57. package/dist/core/claude-code.js +81 -0
  58. package/dist/core/claude-code.js.map +1 -0
  59. package/dist/core/config.d.ts +23 -0
  60. package/dist/core/config.d.ts.map +1 -1
  61. package/dist/core/config.js.map +1 -1
  62. package/dist/core/connectors.d.ts +58 -0
  63. package/dist/core/connectors.d.ts.map +1 -0
  64. package/dist/core/connectors.js +189 -0
  65. package/dist/core/connectors.js.map +1 -0
  66. package/dist/core/folder-scanner.d.ts +10 -0
  67. package/dist/core/folder-scanner.d.ts.map +1 -0
  68. package/dist/core/folder-scanner.js +84 -0
  69. package/dist/core/folder-scanner.js.map +1 -0
  70. package/dist/core/git.d.ts +137 -0
  71. package/dist/core/git.d.ts.map +1 -0
  72. package/dist/core/git.js +520 -0
  73. package/dist/core/git.js.map +1 -0
  74. package/dist/core/history.d.ts +21 -0
  75. package/dist/core/history.d.ts.map +1 -0
  76. package/dist/core/history.js +107 -0
  77. package/dist/core/history.js.map +1 -0
  78. package/dist/core/improve.d.ts.map +1 -1
  79. package/dist/core/improve.js +9 -0
  80. package/dist/core/improve.js.map +1 -1
  81. package/dist/core/ingest.d.ts +1 -0
  82. package/dist/core/ingest.d.ts.map +1 -1
  83. package/dist/core/ingest.js +151 -7
  84. package/dist/core/ingest.js.map +1 -1
  85. package/dist/core/lint.d.ts.map +1 -1
  86. package/dist/core/lint.js +23 -4
  87. package/dist/core/lint.js.map +1 -1
  88. package/dist/core/oauth-defaults.d.ts +31 -0
  89. package/dist/core/oauth-defaults.d.ts.map +1 -0
  90. package/dist/core/oauth-defaults.js +77 -0
  91. package/dist/core/oauth-defaults.js.map +1 -0
  92. package/dist/core/observer.d.ts +94 -0
  93. package/dist/core/observer.d.ts.map +1 -0
  94. package/dist/core/observer.js +492 -0
  95. package/dist/core/observer.js.map +1 -0
  96. package/dist/core/pipeline-events.d.ts +63 -0
  97. package/dist/core/pipeline-events.d.ts.map +1 -0
  98. package/dist/core/pipeline-events.js +109 -0
  99. package/dist/core/pipeline-events.js.map +1 -0
  100. package/dist/core/query.d.ts.map +1 -1
  101. package/dist/core/query.js +16 -8
  102. package/dist/core/query.js.map +1 -1
  103. package/dist/core/scraper.d.ts +41 -0
  104. package/dist/core/scraper.d.ts.map +1 -0
  105. package/dist/core/scraper.js +277 -0
  106. package/dist/core/scraper.js.map +1 -0
  107. package/dist/core/sync/gdrive.d.ts +14 -0
  108. package/dist/core/sync/gdrive.d.ts.map +1 -0
  109. package/dist/core/sync/gdrive.js +205 -0
  110. package/dist/core/sync/gdrive.js.map +1 -0
  111. package/dist/core/sync/github.d.ts +20 -0
  112. package/dist/core/sync/github.d.ts.map +1 -0
  113. package/dist/core/sync/github.js +206 -0
  114. package/dist/core/sync/github.js.map +1 -0
  115. package/dist/core/sync/gmail.d.ts +15 -0
  116. package/dist/core/sync/gmail.d.ts.map +1 -0
  117. package/dist/core/sync/gmail.js +159 -0
  118. package/dist/core/sync/gmail.js.map +1 -0
  119. package/dist/core/sync/index.d.ts +47 -0
  120. package/dist/core/sync/index.d.ts.map +1 -0
  121. package/dist/core/sync/index.js +100 -0
  122. package/dist/core/sync/index.js.map +1 -0
  123. package/dist/core/sync/jira.d.ts +15 -0
  124. package/dist/core/sync/jira.d.ts.map +1 -0
  125. package/dist/core/sync/jira.js +176 -0
  126. package/dist/core/sync/jira.js.map +1 -0
  127. package/dist/core/sync/linear.d.ts +15 -0
  128. package/dist/core/sync/linear.d.ts.map +1 -0
  129. package/dist/core/sync/linear.js +111 -0
  130. package/dist/core/sync/linear.js.map +1 -0
  131. package/dist/core/sync/notion.d.ts +14 -0
  132. package/dist/core/sync/notion.d.ts.map +1 -0
  133. package/dist/core/sync/notion.js +168 -0
  134. package/dist/core/sync/notion.js.map +1 -0
  135. package/dist/core/sync/rss.d.ts +20 -0
  136. package/dist/core/sync/rss.d.ts.map +1 -0
  137. package/dist/core/sync/rss.js +165 -0
  138. package/dist/core/sync/rss.js.map +1 -0
  139. package/dist/core/sync/scheduler.d.ts +31 -0
  140. package/dist/core/sync/scheduler.d.ts.map +1 -0
  141. package/dist/core/sync/scheduler.js +129 -0
  142. package/dist/core/sync/scheduler.js.map +1 -0
  143. package/dist/core/sync/slack.d.ts +16 -0
  144. package/dist/core/sync/slack.d.ts.map +1 -0
  145. package/dist/core/sync/slack.js +173 -0
  146. package/dist/core/sync/slack.js.map +1 -0
  147. package/dist/core/vault.d.ts +22 -0
  148. package/dist/core/vault.d.ts.map +1 -1
  149. package/dist/core/vault.js +65 -0
  150. package/dist/core/vault.js.map +1 -1
  151. package/dist/core/webhooks.d.ts +13 -0
  152. package/dist/core/webhooks.d.ts.map +1 -0
  153. package/dist/core/webhooks.js +206 -0
  154. package/dist/core/webhooks.js.map +1 -0
  155. package/dist/index.js +3 -2
  156. package/dist/index.js.map +1 -1
  157. package/dist/mcp-entry.d.ts +10 -0
  158. package/dist/mcp-entry.d.ts.map +1 -0
  159. package/dist/mcp-entry.js +21 -0
  160. package/dist/mcp-entry.js.map +1 -0
  161. package/dist/mcp-server.d.ts +20 -0
  162. package/dist/mcp-server.d.ts.map +1 -0
  163. package/dist/mcp-server.js +483 -0
  164. package/dist/mcp-server.js.map +1 -0
  165. package/dist/mcp-tools-extended.d.ts +15 -0
  166. package/dist/mcp-tools-extended.d.ts.map +1 -0
  167. package/dist/mcp-tools-extended.js +277 -0
  168. package/dist/mcp-tools-extended.js.map +1 -0
  169. package/dist/processors/audio.d.ts.map +1 -1
  170. package/dist/processors/audio.js +42 -4
  171. package/dist/processors/audio.js.map +1 -1
  172. package/dist/processors/csv.d.ts +18 -0
  173. package/dist/processors/csv.d.ts.map +1 -0
  174. package/dist/processors/csv.js +230 -0
  175. package/dist/processors/csv.js.map +1 -0
  176. package/dist/processors/image.d.ts.map +1 -1
  177. package/dist/processors/image.js +55 -27
  178. package/dist/processors/image.js.map +1 -1
  179. package/dist/processors/pdf.d.ts.map +1 -1
  180. package/dist/processors/pdf.js +6 -3
  181. package/dist/processors/pdf.js.map +1 -1
  182. package/dist/processors/pptx.d.ts +3 -1
  183. package/dist/processors/pptx.d.ts.map +1 -1
  184. package/dist/processors/pptx.js +236 -95
  185. package/dist/processors/pptx.js.map +1 -1
  186. package/dist/processors/url.js +4 -1
  187. package/dist/processors/url.js.map +1 -1
  188. package/dist/processors/xlsx.d.ts +2 -0
  189. package/dist/processors/xlsx.d.ts.map +1 -1
  190. package/dist/processors/xlsx.js +182 -46
  191. package/dist/processors/xlsx.js.map +1 -1
  192. package/dist/providers/claude.d.ts +1 -0
  193. package/dist/providers/claude.d.ts.map +1 -1
  194. package/dist/providers/claude.js +5 -3
  195. package/dist/providers/claude.js.map +1 -1
  196. package/dist/providers/index.d.ts +17 -1
  197. package/dist/providers/index.d.ts.map +1 -1
  198. package/dist/providers/index.js +144 -0
  199. package/dist/providers/index.js.map +1 -1
  200. package/dist/providers/openai.d.ts +1 -0
  201. package/dist/providers/openai.d.ts.map +1 -1
  202. package/dist/providers/openai.js +5 -3
  203. package/dist/providers/openai.js.map +1 -1
  204. package/dist/providers/types.d.ts +18 -0
  205. package/dist/providers/types.d.ts.map +1 -1
  206. package/dist/templates/config-yaml.d.ts.map +1 -1
  207. package/dist/templates/config-yaml.js +12 -1
  208. package/dist/templates/config-yaml.js.map +1 -1
  209. package/dist/templates/source-types.d.ts +33 -0
  210. package/dist/templates/source-types.d.ts.map +1 -0
  211. package/dist/templates/source-types.js +178 -0
  212. package/dist/templates/source-types.js.map +1 -0
  213. package/dist/web/public/index.html +9836 -742
  214. package/dist/web/server.d.ts.map +1 -1
  215. package/dist/web/server.js +2823 -43
  216. package/dist/web/server.js.map +1 -1
  217. package/package.json +10 -4
  218. package/scripts/install.sh +54 -0
  219. package/src/web/public/index.html +9836 -742
  220. package/templates/mcp-config.json +9 -0
  221. package/templates/source-types/article.md +21 -0
  222. package/templates/source-types/book.md +21 -0
  223. package/templates/source-types/paper.md +23 -0
  224. package/templates/source-types/podcast.md +21 -0
  225. package/templates/source-types/raw-notes.md +17 -0
  226. package/templates/source-types/tweet-thread.md +19 -0
  227. package/templates/source-types/video.md +21 -0
  228. package/dist/web/public/public/index.html +0 -946
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/processors/csv.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAW9C,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,MAAM,gBAAgB,GAAG,EAAE,CAAC;AAE5B,oDAAoD;AACpD,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,OAAO,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,CAAC;AAC1C,CAAC;AAED,4EAA4E;AAC5E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC5C,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;IAE9B,MAAM,SAAS,GAAG,GAAG,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;IACnE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAE3C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,KAAK;YACL,OAAO,EAAE,iCAAiC,QAAQ,CAAC,QAAQ,CAAC,GAAG;YAC/D,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACtD,QAAQ,EAAE,CAAC;YACX,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,qBAAqB;IACvD,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAC5C,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAEnD,OAAO;QACL,KAAK;QACL,OAAO,EAAE,YAAY;QACrB,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,CAAC;QAC1F,QAAQ;QACR,WAAW;QACX,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,0EAA0E;AAC1E,8EAA8E;AAE9E,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC9D,CAAC;AAED,gEAAgE;AAChE,SAAS,eAAe,CAAC,IAAY;IACnC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACjD,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAChD,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACrD,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAEjD,MAAM,MAAM,GAA4B;QACtC,CAAC,GAAG,EAAE,MAAM,CAAC;QACb,CAAC,IAAI,EAAE,IAAI,CAAC;QACZ,CAAC,GAAG,EAAE,UAAU,CAAC;QACjB,CAAC,GAAG,EAAE,KAAK,CAAC;KACb,CAAC;IACF,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACvB,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC7C,CAAC;AAED,4EAA4E;AAC5E,SAAS,SAAS,CAAC,IAAY,EAAE,SAAiB;IAChD,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,IAAI,UAAU,GAAa,EAAE,CAAC;IAC9B,IAAI,KAAK,GAAG,EAAE,CAAC;IACf,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnB,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;gBACf,qBAAqB;gBACrB,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC/C,KAAK,IAAI,GAAG,CAAC;oBACb,CAAC,IAAI,CAAC,CAAC;oBACP,SAAS;gBACX,CAAC;gBACD,sBAAsB;gBACtB,QAAQ,GAAG,KAAK,CAAC;gBACjB,CAAC,EAAE,CAAC;gBACJ,SAAS;YACX,CAAC;YACD,KAAK,IAAI,EAAE,CAAC;YACZ,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,EAAE,KAAK,GAAG,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrC,QAAQ,GAAG,IAAI,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;YACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;YAChB,aAAa;YACb,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;YACD,UAAU,GAAG,EAAE,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI;gBAAE,CAAC,EAAE,CAAC;YAC7C,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;YAChB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;YACD,UAAU,GAAG,EAAE,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,KAAK,IAAI,EAAE,CAAC;QACZ,CAAC,EAAE,CAAC;IACN,CAAC;IAED,uBAAuB;IACvB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9C,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9B,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAWD,SAAS,iBAAiB,CAAC,IAAgB;IACzC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAE/B,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,4BAA4B;IAEvF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAc,EAAE;QAChD,MAAM,MAAM,GAAG,QAAQ;aACpB,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;aAC/B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE/B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC/D,CAAC;QAED,eAAe;QACf,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC9F,IAAI,WAAW,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;QACjE,CAAC;QAED,gBAAgB;QAChB,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACnE,CAAC,MAAM,CAAC;QACT,IAAI,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACpC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAClE,CAAC;QAED,aAAa;QACb,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,8BAA8B,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,gCAAgC,CAAC,IAAI,CAAC,CAAC,CAAC,CACnF,CAAC,MAAM,CAAC;QACT,IAAI,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACpC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC/D,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAC/D,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,SAAS,UAAU,CAAC,IAAgB,EAAE,SAAiB;IACrD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,YAAY,GAAG,SAAS,GAAG,gBAAgB,CAAC;IAClD,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC;IAEhE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACrE,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,GAAG;YAAE,SAAS;QAEnB,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1B,sDAAsD;YACtD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;QAC7D,CAAC;QACD,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvC,6BAA6B;QAC7B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,gBAAgB,GAAG,CAAC,EAAE,CAAC;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,gBAAgB,GAAG,CAAC,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,eAAe,SAAS,yBAAyB,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E,SAAS,aAAa,CACpB,KAAa,EACb,QAAgB,EAChB,OAAe,EACf,QAAgB,EAChB,WAAmB,EACnB,WAAyB;IAEzB,MAAM,YAAY,GAChB,WAAW,CAAC,MAAM,GAAG,CAAC;QACpB,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,OAAO,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;QAChE,CAAC,CAAC,KAAK,CAAC;IAEZ,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;cAElC,QAAQ;iBACL,WAAW;sBACN,YAAY;mBACf,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CA+DzE"}
1
+ {"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAgCzE"}
@@ -25,42 +25,70 @@ export async function processImage(filePath) {
25
25
  sourcePath: filePath,
26
26
  };
27
27
  }
28
+ const description = await describeWithVision(apiKey, base64, mediaType);
29
+ return {
30
+ title,
31
+ description,
32
+ markdown: buildMarkdown(title, filePath, description),
33
+ sourcePath: filePath,
34
+ };
35
+ }
36
+ async function describeWithVision(apiKey, base64, mediaType) {
28
37
  const client = new Anthropic({ apiKey });
29
- const response = await client.messages.create({
30
- model: 'claude-sonnet-4-20250514',
31
- max_tokens: 2048,
32
- messages: [
33
- {
34
- role: 'user',
35
- content: [
36
- {
37
- type: 'image',
38
- source: { type: 'base64', media_type: mediaType, data: base64 },
39
- },
38
+ // Try Claude Vision — retry once on transient failures
39
+ for (let attempt = 0; attempt < 2; attempt++) {
40
+ try {
41
+ const response = await client.messages.create({
42
+ model: 'claude-sonnet-4-20250514',
43
+ max_tokens: 2048,
44
+ messages: [
40
45
  {
41
- type: 'text',
42
- text: `Describe this image in detail for a knowledge base. Include:
46
+ role: 'user',
47
+ content: [
48
+ {
49
+ type: 'image',
50
+ source: {
51
+ type: 'base64',
52
+ media_type: mediaType,
53
+ data: base64,
54
+ },
55
+ },
56
+ {
57
+ type: 'text',
58
+ text: `Describe this image in detail for a knowledge base. Include:
43
59
  1. What the image shows (objects, people, text, diagrams, charts)
44
60
  2. Key information or data visible
45
- 3. Any text content (OCR)
61
+ 3. Any text content (OCR — extract ALL visible text verbatim)
46
62
  4. Context and significance
47
63
 
48
64
  Be thorough but concise. This description will represent the image in a markdown wiki where agents need to understand its content without seeing it directly.`,
65
+ },
66
+ ],
49
67
  },
50
68
  ],
51
- },
52
- ],
53
- });
54
- const description = response.content
55
- .filter((block) => block.type === 'text')
56
- .map((block) => block.text)
57
- .join('');
58
- return {
59
- title,
60
- description,
61
- markdown: buildMarkdown(title, filePath, description),
62
- sourcePath: filePath,
63
- };
69
+ });
70
+ return response.content
71
+ .filter((block) => block.type === 'text')
72
+ .map((block) => block.text)
73
+ .join('');
74
+ }
75
+ catch (err) {
76
+ const isRetryable = err instanceof Error &&
77
+ (err.message.includes('rate_limit') ||
78
+ err.message.includes('overloaded') ||
79
+ err.message.includes('529') ||
80
+ err.message.includes('timeout'));
81
+ if (isRetryable && attempt === 0) {
82
+ // Wait 2s and retry once
83
+ await new Promise((r) => setTimeout(r, 2000));
84
+ continue;
85
+ }
86
+ // Non-retryable or second failure — return fallback description
87
+ const sizeKB = Math.round(Buffer.from(base64, 'base64').length / 1024);
88
+ return `[Image — Claude Vision analysis failed: ${err instanceof Error ? err.message : 'unknown error'}]\n\n_File size: ${sizeKB} KB. Set ANTHROPIC_API_KEY and ensure API access to enable image description._`;
89
+ }
90
+ }
91
+ return '[Image — description unavailable]';
64
92
  }
65
93
  function buildMarkdown(title, filePath, description) {
66
94
  return `# ${title}
@@ -1 +1 @@
1
- {"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEzF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAEpC,0CAA0C;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,qEAAqE;QACrE,OAAO;YACL,KAAK;YACL,WAAW,EAAE,eAAe,QAAQ,CAAC,QAAQ,CAAC,EAAE;YAChD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,yDAAyD,CAAC;YACnG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAEzC,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;QAC5C,KAAK,EAAE,0BAA0B;QACjC,UAAU,EAAE,IAAI;QAChB,QAAQ,EAAE;YACR;gBACE,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,OAAO;wBACb,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE;qBAChE;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE;;;;;;8JAM4I;qBACnJ;iBACF;aACF;SACF;KACF,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO;SACjC,MAAM,CAAC,CAAC,KAAK,EAAgC,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;SACtE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;SAC1B,IAAI,CAAC,EAAE,CAAC,CAAC;IAEZ,OAAO;QACL,KAAK;QACL,WAAW;QACX,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,WAAW,CAAC;QACrD,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,WAAmB;IACzE,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;mBAE7B,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;IAErD,KAAK,KAAK,QAAQ;;;;EAIpB,WAAW;CACZ,CAAC;AACF,CAAC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB;YACE,OAAO,YAAY,CAAC;IACxB,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEzF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAEpC,0CAA0C;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,qEAAqE;QACrE,OAAO;YACL,KAAK;YACL,WAAW,EAAE,eAAe,QAAQ,CAAC,QAAQ,CAAC,EAAE;YAChD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,yDAAyD,CAAC;YACnG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAExE,OAAO;QACL,KAAK;QACL,WAAW;QACX,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,WAAW,CAAC;QACrD,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,MAAc,EAAE,MAAc,EAAE,SAAiB;IACjF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAEzC,uDAAuD;IACvD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QAC7C,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC5C,KAAK,EAAE,0BAA0B;gBACjC,UAAU,EAAE,IAAI;gBAChB,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,OAAO;gCACb,MAAM,EAAE;oCACN,IAAI,EAAE,QAAQ;oCACd,UAAU,EAAE,SAAoE;oCAChF,IAAI,EAAE,MAAM;iCACb;6BACF;4BACD;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE;;;;;;8JAMwI;6BAC/I;yBACF;qBACF;iBACF;aACF,CAAC,CAAC;YAEH,OAAO,QAAQ,CAAC,OAAO;iBACpB,MAAM,CAAC,CAAC,KAAK,EAAgC,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;iBACtE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;iBAC1B,IAAI,CAAC,EAAE,CAAC,CAAC;QACd,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,WAAW,GACf,GAAG,YAAY,KAAK;gBACpB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;oBACjC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;oBAClC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;oBAC3B,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;YAErC,IAAI,WAAW,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;gBACjC,yBAAyB;gBACzB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC9C,SAAS;YACX,CAAC;YAED,gEAAgE;YAChE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;YACvE,OAAO,2CAA2C,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,oBAAoB,MAAM,gFAAgF,CAAC;QACnN,CAAC;IACH,CAAC;IAED,OAAO,mCAAmC,CAAC;AAC7C,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,WAAmB;IACzE,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;mBAE7B,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;IAErD,KAAK,KAAK,QAAQ;;;;EAIpB,WAAW;CACZ,CAAC;AACF,CAAC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB;YACE,OAAO,YAAY,CAAC;IACxB,CAAC;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CA0BrE"}
1
+ {"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CA6BrE"}
@@ -7,9 +7,12 @@ export async function processPdf(filePath) {
7
7
  const title = basename(filePath, '.pdf');
8
8
  const buffer = readFileSync(filePath);
9
9
  try {
10
- // pdf-parse v1: simple function call
11
- // eslint-disable-next-line @typescript-eslint/no-require-imports
12
- const pdfParse = require('pdf-parse');
10
+ // Import from lib/ directly to avoid pdf-parse's index.js self-test bug
11
+ // (index.js tries to open ./test/data/05-versions-space.pdf on import)
12
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
13
+ // @ts-expect-error — pdf-parse/lib has no type declarations
14
+ const pdfParseModule = await import('pdf-parse/lib/pdf-parse.js');
15
+ const pdfParse = (pdfParseModule.default ?? pdfParseModule);
13
16
  const data = await pdfParse(buffer);
14
17
  const content = data.text.trim();
15
18
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAUrC,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,CAAC;QACH,qCAAqC;QACrC,iEAAiE;QACjE,MAAM,QAAQ,GAAG,OAAO,CAAC,WAAW,CAAgG,CAAC;QACrI,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAEjC,OAAO;YACL,KAAK;YACL,OAAO;YACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC;YAChE,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,KAAK;YACL,OAAO,EAAE,SAAS,KAAK,4BAA4B;YACnD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,0DAA0D,CAAC;YACpG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,SAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;iBAC/B,SAAS,CAAC,CAAC,CAAC,kBAAkB,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE;mBAC5C,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
1
+ {"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAUrC,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,CAAC;QACH,wEAAwE;QACxE,uEAAuE;QACvE,6DAA6D;QAC7D,4DAA4D;QAC5D,MAAM,cAAc,GAAG,MAAM,MAAM,CAAC,4BAA4B,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,CAAC,cAAc,CAAC,OAAO,IAAI,cAAc,CAAgG,CAAC;QAC3J,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAEjC,OAAO;YACL,KAAK;YACL,OAAO;YACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC;YAChE,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,KAAK;YACL,OAAO,EAAE,SAAS,KAAK,4BAA4B;YACnD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,0DAA0D,CAAC;YACpG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,SAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;iBAC/B,SAAS,CAAC,CAAC,CAAC,kBAAkB,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE;mBAC5C,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
@@ -1,6 +1,8 @@
1
1
  /**
2
2
  * PowerPoint (.pptx) processor.
3
- * Extracts slide text and speaker notes from raw XML (no external deps).
3
+ * Extracts slides via zip (adm-zip) + XML parsing.
4
+ * Features: slide titles, body text with bullet/numbered list structure,
5
+ * speaker notes, image alt-text, proper paragraph grouping.
4
6
  */
5
7
  export interface PptxResult {
6
8
  title: string;
@@ -1 +1 @@
1
- {"version":3,"file":"pptx.d.ts","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAQD,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAsBvE"}
1
+ {"version":3,"file":"pptx.d.ts","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAiBD,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAgCvE"}
@@ -1,18 +1,30 @@
1
1
  /**
2
2
  * PowerPoint (.pptx) processor.
3
- * Extracts slide text and speaker notes from raw XML (no external deps).
3
+ * Extracts slides via zip (adm-zip) + XML parsing.
4
+ * Features: slide titles, body text with bullet/numbered list structure,
5
+ * speaker notes, image alt-text, proper paragraph grouping.
4
6
  */
5
- import { readFileSync } from 'node:fs';
6
7
  import { basename } from 'node:path';
8
+ import AdmZip from 'adm-zip';
7
9
  export async function processPptx(filePath) {
8
10
  const title = basename(filePath, '.pptx');
9
- const slides = extractSlides(filePath);
11
+ let slides;
12
+ try {
13
+ slides = extractSlides(filePath);
14
+ }
15
+ catch {
16
+ return {
17
+ title,
18
+ content: `[PowerPoint — extraction failed for ${basename(filePath)}]`,
19
+ markdown: buildMarkdown(title, filePath, '[Extraction failed — file may be corrupted]', 0),
20
+ slideCount: 0,
21
+ sourcePath: filePath,
22
+ };
23
+ }
10
24
  const slideCount = slides.length;
11
25
  let content;
12
26
  if (slides.length > 0) {
13
- content = slides
14
- .map((slide) => formatSlide(slide))
15
- .join('\n\n---\n\n');
27
+ content = slides.map(formatSlide).join('\n\n---\n\n');
16
28
  }
17
29
  else {
18
30
  content = `[PowerPoint — no text content extracted from ${basename(filePath)}]`;
@@ -26,109 +38,203 @@ export async function processPptx(filePath) {
26
38
  };
27
39
  }
28
40
  function extractSlides(filePath) {
29
- const buffer = readFileSync(filePath);
30
- const content = buffer.toString('latin1');
31
- const slides = [];
32
- // .pptx is a zip containing XML files.
33
- // Slide content lives in ppt/slides/slide{N}.xml
34
- // Speaker notes live in ppt/notesSlides/notesSlide{N}.xml
35
- // Since we're reading raw bytes, we look for XML patterns directly.
36
- // Strategy: Split by slide boundaries and extract text from each section
37
- // The <a:t> elements contain all visible text in Office OpenXML
38
- const slideChunks = splitBySlides(content);
39
- for (let i = 0; i < slideChunks.length; i++) {
40
- const chunk = slideChunks[i];
41
- if (!chunk)
42
- continue;
43
- const texts = extractTextElements(chunk);
44
- if (texts.length > 0) {
45
- slides.push({
46
- slideNumber: i + 1,
47
- texts,
48
- notes: [], // Notes extraction below
49
- });
41
+ const zip = new AdmZip(filePath);
42
+ const entries = zip.getEntries();
43
+ const slideEntries = new Map();
44
+ const noteEntries = new Map();
45
+ for (const entry of entries) {
46
+ const name = entry.entryName;
47
+ const slideMatch = name.match(/^ppt\/slides\/slide(\d+)\.xml$/);
48
+ if (slideMatch?.[1]) {
49
+ slideEntries.set(parseInt(slideMatch[1], 10), entry.getData().toString('utf-8'));
50
+ }
51
+ const noteMatch = name.match(/^ppt\/notesSlides\/notesSlide(\d+)\.xml$/);
52
+ if (noteMatch?.[1]) {
53
+ noteEntries.set(parseInt(noteMatch[1], 10), entry.getData().toString('utf-8'));
50
54
  }
51
55
  }
52
- // If chunk-based splitting didn't work, try a simpler approach
53
- if (slides.length === 0) {
54
- const allTexts = extractTextElements(content);
55
- if (allTexts.length > 0) {
56
- // Group texts into pseudo-slides (every ~5 text blocks = 1 slide)
57
- const chunkSize = 5;
58
- for (let i = 0; i < allTexts.length; i += chunkSize) {
59
- const slideTexts = allTexts.slice(i, i + chunkSize);
60
- slides.push({
61
- slideNumber: slides.length + 1,
62
- texts: slideTexts,
63
- notes: [],
64
- });
65
- }
56
+ const slideNumbers = [...slideEntries.keys()].sort((a, b) => a - b);
57
+ const slides = [];
58
+ for (const num of slideNumbers) {
59
+ const slideXml = slideEntries.get(num) ?? '';
60
+ const noteXml = noteEntries.get(num) ?? '';
61
+ const title = extractSlideTitle(slideXml);
62
+ const paragraphs = extractBodyParagraphs(slideXml, title);
63
+ const notes = noteXml ? extractNotesText(noteXml) : [];
64
+ const imageAlts = extractImageAltTexts(slideXml);
65
+ slides.push({ slideNumber: num, title, paragraphs, notes, imageAlts });
66
+ }
67
+ return slides;
68
+ }
69
+ /**
70
+ * Extract slide title from <p:sp> with <p:ph type="title"> or type="ctrTitle".
71
+ */
72
+ function extractSlideTitle(xml) {
73
+ const titleTypes = ['title', 'ctrTitle'];
74
+ for (const phType of titleTypes) {
75
+ const phPattern = new RegExp(`<p:sp>([\\s\\S]*?)<p:ph[^>]*type="${phType}"[^>]*/?>([\\s\\S]*?)</p:sp>`, 'g');
76
+ let match;
77
+ while ((match = phPattern.exec(xml)) !== null) {
78
+ const texts = extractRawTextFromXml(match[0]);
79
+ if (texts.length > 0)
80
+ return texts.join(' ');
66
81
  }
67
82
  }
68
- // Extract speaker notes — look for notesSlide patterns
69
- const noteChunks = splitByNotes(content);
70
- for (let i = 0; i < noteChunks.length; i++) {
71
- const chunk = noteChunks[i];
72
- if (!chunk)
83
+ return '';
84
+ }
85
+ /**
86
+ * Extract body paragraphs with bullet/list detection.
87
+ * Parses <a:p> elements within shapes, detecting:
88
+ * - <a:buChar char="•"/> or similar → bullet point
89
+ * - <a:buAutoNum type="arabicPeriod"/> → numbered list
90
+ * - <a:buNone/> → no bullet (plain paragraph)
91
+ * - <a:pPr lvl="N"/> → indentation level
92
+ * - <a:rPr b="1"/> → bold text
93
+ */
94
+ function extractBodyParagraphs(xml, titleText) {
95
+ const paragraphs = [];
96
+ // Extract all shape bodies, skipping title/subtitle placeholders
97
+ const shapeRegex = /<p:sp>([\s\S]*?)<\/p:sp>/g;
98
+ let shapeMatch;
99
+ while ((shapeMatch = shapeRegex.exec(xml)) !== null) {
100
+ const shapeXml = shapeMatch[1] ?? '';
101
+ // Skip title/subtitle placeholder shapes
102
+ if (/<p:ph[^>]*type="(title|ctrTitle|subTitle)"/.test(shapeXml))
103
+ continue;
104
+ // Skip slide number, date, footer placeholders
105
+ if (/<p:ph[^>]*type="(sldNum|dt|ftr)"/.test(shapeXml))
73
106
  continue;
74
- const notes = extractTextElements(chunk);
75
- // Match notes to slides by index
76
- const slide = slides[i];
77
- if (slide && notes.length > 0) {
78
- slide.notes = notes;
107
+ // Extract <p:txBody> content
108
+ const txBodyMatch = shapeXml.match(/<p:txBody>([\s\S]*?)<\/p:txBody>/);
109
+ if (!txBodyMatch?.[1])
110
+ continue;
111
+ const txBody = txBodyMatch[1];
112
+ const parsedParagraphs = parseParagraphs(txBody);
113
+ for (const p of parsedParagraphs) {
114
+ // Skip if text matches title or is a pure number/date
115
+ if (p.text === titleText)
116
+ continue;
117
+ if (/^\d+$/.test(p.text) || /^\d{1,2}\/\d{1,2}\/\d{2,4}$/.test(p.text))
118
+ continue;
119
+ paragraphs.push(p);
79
120
  }
80
121
  }
81
- return slides;
122
+ return paragraphs;
82
123
  }
83
- function splitBySlides(content) {
84
- // Look for slide{N}.xml boundaries in the zip
85
- const chunks = [];
86
- const slideMarker = /slide\d+\.xml/g;
87
- const positions = [];
124
+ /**
125
+ * Parse <a:p> elements from a txBody, detecting bullets and levels.
126
+ */
127
+ function parseParagraphs(txBodyXml) {
128
+ const result = [];
129
+ const paraRegex = /<a:p>([\s\S]*?)<\/a:p>/g;
88
130
  let match;
89
- while ((match = slideMarker.exec(content)) !== null) {
90
- positions.push(match.index);
91
- }
92
- for (let i = 0; i < positions.length; i++) {
93
- const start = positions[i] ?? 0;
94
- const end = positions[i + 1] ?? content.length;
95
- chunks.push(content.substring(start, Math.min(end, start + 50000)));
131
+ while ((match = paraRegex.exec(txBodyXml)) !== null) {
132
+ const paraXml = match[1] ?? '';
133
+ // Extract indentation level from <a:pPr lvl="N">
134
+ const levelMatch = paraXml.match(/<a:pPr[^>]*\blvl="(\d+)"/);
135
+ const level = levelMatch?.[1] ? parseInt(levelMatch[1], 10) : 0;
136
+ // Detect bullet type
137
+ let bullet = 'none';
138
+ if (/<a:buChar\b/.test(paraXml)) {
139
+ bullet = 'bullet';
140
+ }
141
+ else if (/<a:buAutoNum\b/.test(paraXml)) {
142
+ bullet = 'numbered';
143
+ }
144
+ else if (/<a:buNone\s*\/>/.test(paraXml)) {
145
+ bullet = 'none';
146
+ }
147
+ else if (/<a:pPr\b/.test(paraXml) && !/<a:buNone/.test(paraXml) && level > 0) {
148
+ // If there's a pPr with a level but no explicit buNone, it's likely a bullet
149
+ bullet = 'bullet';
150
+ }
151
+ // Check for bold
152
+ const isBold = /<a:rPr[^>]*\bb="1"/.test(paraXml);
153
+ // Extract text runs
154
+ const texts = [];
155
+ const runRegex = /<a:r>([\s\S]*?)<\/a:r>/g;
156
+ let runMatch;
157
+ while ((runMatch = runRegex.exec(paraXml)) !== null) {
158
+ const tMatch = (runMatch[1] ?? '').match(/<a:t>([\s\S]*?)<\/a:t>/);
159
+ if (tMatch?.[1]?.trim()) {
160
+ texts.push(decodeXmlEntities(tMatch[1].trim()));
161
+ }
162
+ }
163
+ // Also check for <a:fld> (field) text runs (e.g., slide numbers in body)
164
+ const fldRegex = /<a:fld[^>]*>([\s\S]*?)<\/a:fld>/g;
165
+ let fldMatch;
166
+ while ((fldMatch = fldRegex.exec(paraXml)) !== null) {
167
+ const tMatch = (fldMatch[1] ?? '').match(/<a:t>([\s\S]*?)<\/a:t>/);
168
+ if (tMatch?.[1]?.trim()) {
169
+ texts.push(decodeXmlEntities(tMatch[1].trim()));
170
+ }
171
+ }
172
+ const text = texts.join(' ');
173
+ if (text.length > 0) {
174
+ result.push({ text, level, bullet, isBold });
175
+ }
96
176
  }
97
- return chunks;
177
+ return result;
98
178
  }
99
- function splitByNotes(content) {
100
- const chunks = [];
101
- const noteMarker = /notesSlide\d+\.xml/g;
102
- const positions = [];
179
+ /**
180
+ * Extract image alt-text from <p:pic> elements.
181
+ * Alt text is stored in: <p:cNvPr id="..." name="..." descr="Alt text here"/>
182
+ */
183
+ function extractImageAltTexts(xml) {
184
+ const alts = [];
185
+ // Look for p:pic shapes with descr attribute
186
+ const picRegex = /<p:pic>([\s\S]*?)<\/p:pic>/g;
103
187
  let match;
104
- while ((match = noteMarker.exec(content)) !== null) {
105
- positions.push(match.index);
188
+ while ((match = picRegex.exec(xml)) !== null) {
189
+ const picXml = match[1] ?? '';
190
+ // descr attribute on cNvPr holds alt text
191
+ const descrMatch = picXml.match(/descr="([^"]+)"/);
192
+ if (descrMatch?.[1]) {
193
+ alts.push(decodeXmlEntities(descrMatch[1]));
194
+ }
195
+ // Also check for <a:hlinkClick> tooltip as fallback
196
+ const tooltipMatch = picXml.match(/tooltip="([^"]+)"/);
197
+ if (tooltipMatch?.[1] && !descrMatch) {
198
+ alts.push(decodeXmlEntities(tooltipMatch[1]));
199
+ }
106
200
  }
107
- for (let i = 0; i < positions.length; i++) {
108
- const start = positions[i] ?? 0;
109
- const end = positions[i + 1] ?? content.length;
110
- chunks.push(content.substring(start, Math.min(end, start + 50000)));
201
+ // Also check <wsp> (WordArt/shapes) and <p:sp> with images
202
+ const spRegex = /<p:sp>([\s\S]*?)<\/p:sp>/g;
203
+ while ((match = spRegex.exec(xml)) !== null) {
204
+ const spXml = match[1] ?? '';
205
+ if (/<a:blipFill>/.test(spXml)) {
206
+ const descrMatch = spXml.match(/<p:cNvPr[^>]*descr="([^"]+)"/);
207
+ if (descrMatch?.[1]) {
208
+ alts.push(decodeXmlEntities(descrMatch[1]));
209
+ }
210
+ }
111
211
  }
112
- return chunks;
212
+ return alts;
213
+ }
214
+ /**
215
+ * Extract speaker notes text from notesSlide XML.
216
+ * Parses at paragraph level for better structure.
217
+ */
218
+ function extractNotesText(noteXml) {
219
+ const paragraphs = parseParagraphs(noteXml);
220
+ return paragraphs
221
+ .map((p) => p.text)
222
+ .filter((t) => {
223
+ if (/^\d+$/.test(t))
224
+ return false;
225
+ if (t.length < 2)
226
+ return false;
227
+ return true;
228
+ });
113
229
  }
114
- function extractTextElements(xml) {
230
+ function extractRawTextFromXml(xml) {
115
231
  const texts = [];
116
- // <a:t> elements contain text in Office OpenXML
117
232
  const textRegex = /<a:t>([\s\S]*?)<\/a:t>/g;
118
233
  let match;
119
234
  while ((match = textRegex.exec(xml)) !== null) {
120
235
  const text = match[1]?.trim();
121
- if (text && text.length > 0) {
122
- texts.push(decodeXmlEntities(text));
123
- }
124
- }
125
- // Also check for <a:fld> (field codes that may contain text)
126
- const fldRegex = /<a:fld[^>]*>[\s\S]*?<a:t>([\s\S]*?)<\/a:t>[\s\S]*?<\/a:fld>/g;
127
- while ((match = fldRegex.exec(xml)) !== null) {
128
- const text = match[1]?.trim();
129
- if (text && text.length > 0 && !texts.includes(text)) {
236
+ if (text && text.length > 0)
130
237
  texts.push(decodeXmlEntities(text));
131
- }
132
238
  }
133
239
  return texts;
134
240
  }
@@ -139,15 +245,52 @@ function decodeXmlEntities(text) {
139
245
  .replace(/&gt;/g, '>')
140
246
  .replace(/&quot;/g, '"')
141
247
  .replace(/&apos;/g, "'")
142
- .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
248
+ .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
249
+ .replace(/&#x([0-9A-Fa-f]+);/g, (_, code) => String.fromCharCode(parseInt(code, 16)));
143
250
  }
144
251
  function formatSlide(slide) {
145
- let md = `### Slide ${slide.slideNumber}\n\n`;
146
- md += slide.texts.join('\n\n');
252
+ const parts = [];
253
+ // Heading
254
+ if (slide.title) {
255
+ parts.push(`## Slide ${slide.slideNumber}: ${slide.title}`);
256
+ }
257
+ else {
258
+ parts.push(`## Slide ${slide.slideNumber}`);
259
+ }
260
+ // Body paragraphs with proper bullet/list formatting
261
+ if (slide.paragraphs.length > 0) {
262
+ let numberedCounter = 0;
263
+ const bodyLines = [];
264
+ for (const p of slide.paragraphs) {
265
+ const indent = ' '.repeat(p.level);
266
+ const text = p.isBold ? `**${p.text}**` : p.text;
267
+ if (p.bullet === 'numbered') {
268
+ numberedCounter++;
269
+ bodyLines.push(`${indent}${numberedCounter}. ${text}`);
270
+ }
271
+ else if (p.bullet === 'bullet') {
272
+ numberedCounter = 0;
273
+ bodyLines.push(`${indent}- ${text}`);
274
+ }
275
+ else {
276
+ numberedCounter = 0;
277
+ bodyLines.push(`${indent}${text}`);
278
+ }
279
+ }
280
+ parts.push(bodyLines.join('\n'));
281
+ }
282
+ else {
283
+ parts.push('_[No body text]_');
284
+ }
285
+ // Image alt-texts
286
+ if (slide.imageAlts.length > 0) {
287
+ parts.push(`**Images:** ${slide.imageAlts.map((a) => `_${a}_`).join(', ')}`);
288
+ }
289
+ // Speaker notes
147
290
  if (slide.notes.length > 0) {
148
- md += `\n\n**Speaker Notes:**\n\n> ${slide.notes.join(' ')}`;
291
+ parts.push(`**Speaker Notes:**\n\n> ${slide.notes.join('\n> ')}`);
149
292
  }
150
- return md;
293
+ return parts.join('\n\n');
151
294
  }
152
295
  function buildMarkdown(title, filePath, content, slideCount) {
153
296
  return `# ${title}
@@ -157,8 +300,6 @@ function buildMarkdown(title, filePath, content, slideCount) {
157
300
  > **Slides:** ${slideCount}
158
301
  > **Processed:** ${new Date().toISOString().split('T')[0]}
159
302
 
160
- ## Slides
161
-
162
303
  ${content}
163
304
  `;
164
305
  }
@@ -1 +1 @@
1
- {"version":3,"file":"pptx.js","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAgBrC,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1C,MAAM,MAAM,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;IAEjC,IAAI,OAAe,CAAC;IACpB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,GAAG,MAAM;aACb,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;aAClC,IAAI,CAAC,aAAa,CAAC,CAAC;IACzB,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,gDAAgD,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;IAClF,CAAC;IAED,OAAO;QACL,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;QAC7D,UAAU;QACV,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE1C,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,uCAAuC;IACvC,iDAAiD;IACjD,0DAA0D;IAC1D,oEAAoE;IAEpE,yEAAyE;IACzE,gEAAgE;IAChE,MAAM,WAAW,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAE3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC7B,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC;gBACV,WAAW,EAAE,CAAC,GAAG,CAAC;gBAClB,KAAK;gBACL,KAAK,EAAE,EAAE,EAAE,yBAAyB;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,kEAAkE;YAClE,MAAM,SAAS,GAAG,CAAC,CAAC;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBACpD,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;gBACpD,MAAM,CAAC,IAAI,CAAC;oBACV,WAAW,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC;oBAC9B,KAAK,EAAE,UAAU;oBACjB,KAAK,EAAE,EAAE;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,uDAAuD;IACvD,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACzC,iCAAiC;QACjC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,8CAA8C;IAC9C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,WAAW,GAAG,gBAAgB,CAAC;IACrC,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,YAAY,CAAC,OAAe;IACnC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,UAAU,GAAG,qBAAqB,CAAC;IACzC,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,mBAAmB,CAAC,GAAW;IACtC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,gDAAgD;IAChD,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,8DAA8D,CAAC;IAChF,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACrD,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,IAAI;SACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AAC1F,CAAC;AAED,SAAS,WAAW,CAAC,KAAmB;IACtC,IAAI,EAAE,GAAG,aAAa,KAAK,CAAC,WAAW,MAAM,CAAC;IAC9C,EAAE,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAE/B,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,EAAE,IAAI,+BAA+B,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;IAC/D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,UAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;gBAEhC,UAAU;mBACP,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
1
+ {"version":3,"file":"pptx.js","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,MAAM,MAAM,SAAS,CAAC;AAyB7B,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1C,IAAI,MAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,KAAK;YACL,OAAO,EAAE,uCAAuC,QAAQ,CAAC,QAAQ,CAAC,GAAG;YACrE,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,6CAA6C,EAAE,CAAC,CAAC;YAC1F,UAAU,EAAE,CAAC;YACb,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;IACjC,IAAI,OAAe,CAAC;IAEpB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,gDAAgD,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;IAClF,CAAC;IAED,OAAO;QACL,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;QAC7D,UAAU;QACV,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC;IACjC,MAAM,OAAO,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC;IAEjC,MAAM,YAAY,GAAwB,IAAI,GAAG,EAAE,CAAC;IACpD,MAAM,WAAW,GAAwB,IAAI,GAAG,EAAE,CAAC;IAEnD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC;QAE7B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChE,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpB,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;QACzE,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACnB,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACpE,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,KAAK,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAC7C,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAE3C,MAAM,KAAK,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACvD,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QAEjD,MAAM,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,GAAW;IACpC,MAAM,UAAU,GAAG,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,SAAS,GAAG,IAAI,MAAM,CAC1B,qCAAqC,MAAM,8BAA8B,EACzE,GAAG,CACJ,CAAC;QACF,IAAI,KAA6B,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC9C,MAAM,KAAK,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,qBAAqB,CAAC,GAAW,EAAE,SAAiB;IAC3D,MAAM,UAAU,GAAgB,EAAE,CAAC;IAEnC,iEAAiE;IACjE,MAAM,UAAU,GAAG,2BAA2B,CAAC;IAC/C,IAAI,UAAkC,CAAC;IAEvC,OAAO,CAAC,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAErC,yCAAyC;QACzC,IAAI,4CAA4C,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,SAAS;QAC1E,+CAA+C;QAC/C,IAAI,kCAAkC,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEhE,6BAA6B;QAC7B,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACvE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;YAAE,SAAS;QAEhC,MAAM,MAAM,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,gBAAgB,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;QAEjD,KAAK,MAAM,CAAC,IAAI,gBAAgB,EAAE,CAAC;YACjC,sDAAsD;YACtD,IAAI,CAAC,CAAC,IAAI,KAAK,SAAS;gBAAE,SAAS;YACnC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,6BAA6B,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;gBAAE,SAAS;YACjF,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,SAAiB;IACxC,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAE/B,iDAAiD;QACjD,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC7D,MAAM,KAAK,GAAG,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhE,qBAAqB;QACrB,IAAI,MAAM,GAAwB,MAAM,CAAC;QACzC,IAAI,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAChC,MAAM,GAAG,QAAQ,CAAC;QACpB,CAAC;aAAM,IAAI,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1C,MAAM,GAAG,UAAU,CAAC;QACtB,CAAC;aAAM,IAAI,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3C,MAAM,GAAG,MAAM,CAAC;QAClB,CAAC;aAAM,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YAC/E,6EAA6E;YAC7E,MAAM,GAAG,QAAQ,CAAC;QACpB,CAAC;QAED,iBAAiB;QACjB,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAElD,oBAAoB;QACpB,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,yBAAyB,CAAC;QAC3C,IAAI,QAAgC,CAAC;QACrC,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACpD,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;YACnE,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAED,yEAAyE;QACzE,MAAM,QAAQ,GAAG,kCAAkC,CAAC;QACpD,IAAI,QAAgC,CAAC;QACrC,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACpD,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;YACnE,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAS,oBAAoB,CAAC,GAAW;IACvC,MAAM,IAAI,GAAa,EAAE,CAAC;IAE1B,6CAA6C;IAC7C,MAAM,QAAQ,GAAG,6BAA6B,CAAC;IAC/C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC9B,0CAA0C;QAC1C,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACnD,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpB,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,CAAC;QACD,oDAAoD;QACpD,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QACvD,IAAI,YAAY,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,2DAA2D;IAC3D,MAAM,OAAO,GAAG,2BAA2B,CAAC;IAC5C,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/B,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;YAC/D,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACpB,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,OAAe;IACvC,MAAM,UAAU,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAC5C,OAAO,UAAU;SACd,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SAClB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QACZ,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAClC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAC/B,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACP,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAW;IACxC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;IACnE,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,IAAI;SACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC;SACpF,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AACpG,CAAC;AAED,SAAS,WAAW,CAAC,KAAmB;IACtC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,UAAU;IACV,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QAChB,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,WAAW,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IAC9D,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,qDAAqD;IACrD,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACpC,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAEjD,IAAI,CAAC,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;gBAC5B,eAAe,EAAE,CAAC;gBAClB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,eAAe,KAAK,IAAI,EAAE,CAAC,CAAC;YACzD,CAAC;iBAAM,IAAI,CAAC,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;gBACjC,eAAe,GAAG,CAAC,CAAC;gBACpB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,eAAe,GAAG,CAAC,CAAC;gBACpB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACjC,CAAC;IAED,kBAAkB;IAClB,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED,gBAAgB;IAChB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,2BAA2B,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACpE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,UAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;gBAEhC,UAAU;mBACP,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;EAEvD,OAAO;CACR,CAAC;AACF,CAAC"}
@@ -23,8 +23,11 @@ async function processWithFirecrawl(url, apiKey) {
23
23
  throw new Error(`Firecrawl API error: ${response.status}`);
24
24
  }
25
25
  const data = (await response.json());
26
+ if (!data.data?.markdown) {
27
+ throw new Error(`Firecrawl returned no content for ${url}`);
28
+ }
26
29
  return {
27
- title: data.data.metadata.title ?? new URL(url).hostname,
30
+ title: data.data.metadata?.title ?? new URL(url).hostname,
28
31
  content: data.data.markdown,
29
32
  url,
30
33
  };