@vertesia/workflow 0.42.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (406) hide show
  1. package/LICENSE +13 -0
  2. package/README.md +24 -0
  3. package/bin/bundle-workflows.mjs +26 -0
  4. package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +32 -0
  5. package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
  6. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +66 -0
  7. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
  8. package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +18 -0
  9. package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
  10. package/lib/cjs/activities/chunkDocument.js +79 -0
  11. package/lib/cjs/activities/chunkDocument.js.map +1 -0
  12. package/lib/cjs/activities/createDocumentFromOther.js +64 -0
  13. package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
  14. package/lib/cjs/activities/executeInteraction.js +135 -0
  15. package/lib/cjs/activities/executeInteraction.js.map +1 -0
  16. package/lib/cjs/activities/extractDocumentText.js +140 -0
  17. package/lib/cjs/activities/extractDocumentText.js.map +1 -0
  18. package/lib/cjs/activities/generateDocumentProperties.js +59 -0
  19. package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
  20. package/lib/cjs/activities/generateEmbeddings.js +292 -0
  21. package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
  22. package/lib/cjs/activities/generateImageRendition.js +104 -0
  23. package/lib/cjs/activities/generateImageRendition.js.map +1 -0
  24. package/lib/cjs/activities/generateOrAssignContentType.js +103 -0
  25. package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
  26. package/lib/cjs/activities/getObjectFromStore.js +20 -0
  27. package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
  28. package/lib/cjs/activities/index.js +54 -0
  29. package/lib/cjs/activities/index.js.map +1 -0
  30. package/lib/cjs/activities/media/processPdfWithTextract.js +102 -0
  31. package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
  32. package/lib/cjs/activities/media/transcribeMediaWithGladia.js +51 -0
  33. package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
  34. package/lib/cjs/activities/notifyWebhook.js +34 -0
  35. package/lib/cjs/activities/notifyWebhook.js.map +1 -0
  36. package/lib/cjs/activities/setDocumentStatus.js +15 -0
  37. package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
  38. package/lib/cjs/conversion/TextractProcessor.js +416 -0
  39. package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
  40. package/lib/cjs/conversion/image.js +22 -0
  41. package/lib/cjs/conversion/image.js.map +1 -0
  42. package/lib/cjs/conversion/mutool.js +147 -0
  43. package/lib/cjs/conversion/mutool.js.map +1 -0
  44. package/lib/cjs/conversion/pandoc.js +39 -0
  45. package/lib/cjs/conversion/pandoc.js.map +1 -0
  46. package/lib/cjs/conversion/pdf.js +13 -0
  47. package/lib/cjs/conversion/pdf.js.map +1 -0
  48. package/lib/cjs/dsl/conditions.js +81 -0
  49. package/lib/cjs/dsl/conditions.js.map +1 -0
  50. package/lib/cjs/dsl/dsl-workflow.js +223 -0
  51. package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
  52. package/lib/cjs/dsl/projections.js +59 -0
  53. package/lib/cjs/dsl/projections.js.map +1 -0
  54. package/lib/cjs/dsl/setup/ActivityContext.js +96 -0
  55. package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
  56. package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
  57. package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
  58. package/lib/cjs/dsl/setup/fetch/index.js +16 -0
  59. package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
  60. package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
  61. package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
  62. package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
  63. package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
  64. package/lib/cjs/dsl/validation.js +122 -0
  65. package/lib/cjs/dsl/validation.js.map +1 -0
  66. package/lib/cjs/dsl/vars.js +341 -0
  67. package/lib/cjs/dsl/vars.js.map +1 -0
  68. package/lib/cjs/dsl/walk.js +100 -0
  69. package/lib/cjs/dsl/walk.js.map +1 -0
  70. package/lib/cjs/errors.js +36 -0
  71. package/lib/cjs/errors.js.map +1 -0
  72. package/lib/cjs/index.js +43 -0
  73. package/lib/cjs/index.js.map +1 -0
  74. package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
  75. package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
  76. package/lib/cjs/iterative-generation/activities/finalizeOutput.js +69 -0
  77. package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
  78. package/lib/cjs/iterative-generation/activities/generatePart.js +73 -0
  79. package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
  80. package/lib/cjs/iterative-generation/activities/generateToc.js +91 -0
  81. package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
  82. package/lib/cjs/iterative-generation/activities/index.js +12 -0
  83. package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
  84. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +55 -0
  85. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
  86. package/lib/cjs/iterative-generation/types.js +5 -0
  87. package/lib/cjs/iterative-generation/types.js.map +1 -0
  88. package/lib/cjs/iterative-generation/utils.js +118 -0
  89. package/lib/cjs/iterative-generation/utils.js.map +1 -0
  90. package/lib/cjs/package.json +3 -0
  91. package/lib/cjs/result-types.js +10 -0
  92. package/lib/cjs/result-types.js.map +1 -0
  93. package/lib/cjs/system/generateObjectText.js +89 -0
  94. package/lib/cjs/system/generateObjectText.js.map +1 -0
  95. package/lib/cjs/system/notifyWebhookWorkflow.js +52 -0
  96. package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
  97. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +37 -0
  98. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
  99. package/lib/cjs/utils/auth.js +15 -0
  100. package/lib/cjs/utils/auth.js.map +1 -0
  101. package/lib/cjs/utils/blobs.js +63 -0
  102. package/lib/cjs/utils/blobs.js.map +1 -0
  103. package/lib/cjs/utils/client.js +25 -0
  104. package/lib/cjs/utils/client.js.map +1 -0
  105. package/lib/cjs/utils/expand-vars.js +33 -0
  106. package/lib/cjs/utils/expand-vars.js.map +1 -0
  107. package/lib/cjs/utils/memory.js +72 -0
  108. package/lib/cjs/utils/memory.js.map +1 -0
  109. package/lib/cjs/utils/tokens.js +38 -0
  110. package/lib/cjs/utils/tokens.js.map +1 -0
  111. package/lib/cjs/vars.js +20 -0
  112. package/lib/cjs/vars.js.map +1 -0
  113. package/lib/cjs/workflows.js +17 -0
  114. package/lib/cjs/workflows.js.map +1 -0
  115. package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +29 -0
  116. package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
  117. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +63 -0
  118. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
  119. package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +15 -0
  120. package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
  121. package/lib/esm/activities/chunkDocument.js +76 -0
  122. package/lib/esm/activities/chunkDocument.js.map +1 -0
  123. package/lib/esm/activities/createDocumentFromOther.js +58 -0
  124. package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
  125. package/lib/esm/activities/executeInteraction.js +131 -0
  126. package/lib/esm/activities/executeInteraction.js.map +1 -0
  127. package/lib/esm/activities/extractDocumentText.js +137 -0
  128. package/lib/esm/activities/extractDocumentText.js.map +1 -0
  129. package/lib/esm/activities/generateDocumentProperties.js +56 -0
  130. package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
  131. package/lib/esm/activities/generateEmbeddings.js +256 -0
  132. package/lib/esm/activities/generateEmbeddings.js.map +1 -0
  133. package/lib/esm/activities/generateImageRendition.js +98 -0
  134. package/lib/esm/activities/generateImageRendition.js.map +1 -0
  135. package/lib/esm/activities/generateOrAssignContentType.js +100 -0
  136. package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
  137. package/lib/esm/activities/getObjectFromStore.js +17 -0
  138. package/lib/esm/activities/getObjectFromStore.js.map +1 -0
  139. package/lib/esm/activities/index.js +21 -0
  140. package/lib/esm/activities/index.js.map +1 -0
  141. package/lib/esm/activities/media/processPdfWithTextract.js +98 -0
  142. package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
  143. package/lib/esm/activities/media/transcribeMediaWithGladia.js +48 -0
  144. package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
  145. package/lib/esm/activities/notifyWebhook.js +31 -0
  146. package/lib/esm/activities/notifyWebhook.js.map +1 -0
  147. package/lib/esm/activities/setDocumentStatus.js +12 -0
  148. package/lib/esm/activities/setDocumentStatus.js.map +1 -0
  149. package/lib/esm/conversion/TextractProcessor.js +409 -0
  150. package/lib/esm/conversion/TextractProcessor.js.map +1 -0
  151. package/lib/esm/conversion/image.js +16 -0
  152. package/lib/esm/conversion/image.js.map +1 -0
  153. package/lib/esm/conversion/mutool.js +139 -0
  154. package/lib/esm/conversion/mutool.js.map +1 -0
  155. package/lib/esm/conversion/pandoc.js +36 -0
  156. package/lib/esm/conversion/pandoc.js.map +1 -0
  157. package/lib/esm/conversion/pdf.js +7 -0
  158. package/lib/esm/conversion/pdf.js.map +1 -0
  159. package/lib/esm/dsl/conditions.js +75 -0
  160. package/lib/esm/dsl/conditions.js.map +1 -0
  161. package/lib/esm/dsl/dsl-workflow.js +216 -0
  162. package/lib/esm/dsl/dsl-workflow.js.map +1 -0
  163. package/lib/esm/dsl/projections.js +55 -0
  164. package/lib/esm/dsl/projections.js.map +1 -0
  165. package/lib/esm/dsl/setup/ActivityContext.js +91 -0
  166. package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
  167. package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
  168. package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
  169. package/lib/esm/dsl/setup/fetch/index.js +12 -0
  170. package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
  171. package/lib/esm/dsl/setup/fetch/providers.js +61 -0
  172. package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
  173. package/lib/esm/dsl/test/test-child-workflow.js +5 -0
  174. package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
  175. package/lib/esm/dsl/validation.js +118 -0
  176. package/lib/esm/dsl/validation.js.map +1 -0
  177. package/lib/esm/dsl/vars.js +335 -0
  178. package/lib/esm/dsl/vars.js.map +1 -0
  179. package/lib/esm/dsl/walk.js +96 -0
  180. package/lib/esm/dsl/walk.js.map +1 -0
  181. package/lib/esm/errors.js +30 -0
  182. package/lib/esm/errors.js.map +1 -0
  183. package/lib/esm/index.js +25 -0
  184. package/lib/esm/index.js.map +1 -0
  185. package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
  186. package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
  187. package/lib/esm/iterative-generation/activities/finalizeOutput.js +66 -0
  188. package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
  189. package/lib/esm/iterative-generation/activities/generatePart.js +70 -0
  190. package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
  191. package/lib/esm/iterative-generation/activities/generateToc.js +88 -0
  192. package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
  193. package/lib/esm/iterative-generation/activities/index.js +5 -0
  194. package/lib/esm/iterative-generation/activities/index.js.map +1 -0
  195. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +52 -0
  196. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
  197. package/lib/esm/iterative-generation/types.js +2 -0
  198. package/lib/esm/iterative-generation/types.js.map +1 -0
  199. package/lib/esm/iterative-generation/utils.js +109 -0
  200. package/lib/esm/iterative-generation/utils.js.map +1 -0
  201. package/lib/esm/result-types.js +7 -0
  202. package/lib/esm/result-types.js.map +1 -0
  203. package/lib/esm/system/generateObjectText.js +86 -0
  204. package/lib/esm/system/generateObjectText.js.map +1 -0
  205. package/lib/esm/system/notifyWebhookWorkflow.js +49 -0
  206. package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
  207. package/lib/esm/system/recalculateEmbeddingsWorkflow.js +34 -0
  208. package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
  209. package/lib/esm/utils/auth.js +8 -0
  210. package/lib/esm/utils/auth.js.map +1 -0
  211. package/lib/esm/utils/blobs.js +52 -0
  212. package/lib/esm/utils/blobs.js.map +1 -0
  213. package/lib/esm/utils/client.js +22 -0
  214. package/lib/esm/utils/client.js.map +1 -0
  215. package/lib/esm/utils/expand-vars.js +30 -0
  216. package/lib/esm/utils/expand-vars.js.map +1 -0
  217. package/lib/esm/utils/memory.js +60 -0
  218. package/lib/esm/utils/memory.js.map +1 -0
  219. package/lib/esm/utils/tokens.js +34 -0
  220. package/lib/esm/utils/tokens.js.map +1 -0
  221. package/lib/esm/vars.js +4 -0
  222. package/lib/esm/vars.js.map +1 -0
  223. package/lib/esm/workflows.js +9 -0
  224. package/lib/esm/workflows.js.map +1 -0
  225. package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
  226. package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
  227. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +29 -0
  228. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
  229. package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
  230. package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
  231. package/lib/types/activities/chunkDocument.d.ts +18 -0
  232. package/lib/types/activities/chunkDocument.d.ts.map +1 -0
  233. package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
  234. package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
  235. package/lib/types/activities/executeInteraction.d.ts +40 -0
  236. package/lib/types/activities/executeInteraction.d.ts.map +1 -0
  237. package/lib/types/activities/extractDocumentText.d.ts +9 -0
  238. package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
  239. package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
  240. package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
  241. package/lib/types/activities/generateEmbeddings.d.ts +49 -0
  242. package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
  243. package/lib/types/activities/generateImageRendition.d.ts +17 -0
  244. package/lib/types/activities/generateImageRendition.d.ts.map +1 -0
  245. package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
  246. package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
  247. package/lib/types/activities/getObjectFromStore.d.ts +14 -0
  248. package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
  249. package/lib/types/activities/index.d.ts +21 -0
  250. package/lib/types/activities/index.d.ts.map +1 -0
  251. package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
  252. package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
  253. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +14 -0
  254. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
  255. package/lib/types/activities/notifyWebhook.d.ts +17 -0
  256. package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
  257. package/lib/types/activities/setDocumentStatus.d.ts +15 -0
  258. package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
  259. package/lib/types/conversion/TextractProcessor.d.ts +45 -0
  260. package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
  261. package/lib/types/conversion/image.d.ts +9 -0
  262. package/lib/types/conversion/image.d.ts.map +1 -0
  263. package/lib/types/conversion/mutool.d.ts +19 -0
  264. package/lib/types/conversion/mutool.d.ts.map +1 -0
  265. package/lib/types/conversion/pandoc.d.ts +2 -0
  266. package/lib/types/conversion/pandoc.d.ts.map +1 -0
  267. package/lib/types/conversion/pdf.d.ts +2 -0
  268. package/lib/types/conversion/pdf.d.ts.map +1 -0
  269. package/lib/types/dsl/conditions.d.ts +2 -0
  270. package/lib/types/dsl/conditions.d.ts.map +1 -0
  271. package/lib/types/dsl/dsl-workflow.d.ts +5 -0
  272. package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
  273. package/lib/types/dsl/projections.d.ts +4 -0
  274. package/lib/types/dsl/projections.d.ts.map +1 -0
  275. package/lib/types/dsl/setup/ActivityContext.d.ts +14 -0
  276. package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
  277. package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
  278. package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
  279. package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
  280. package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
  281. package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
  282. package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
  283. package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
  284. package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
  285. package/lib/types/dsl/validation.d.ts +4 -0
  286. package/lib/types/dsl/validation.d.ts.map +1 -0
  287. package/lib/types/dsl/vars.d.ts +48 -0
  288. package/lib/types/dsl/vars.d.ts.map +1 -0
  289. package/lib/types/dsl/walk.d.ts +18 -0
  290. package/lib/types/dsl/walk.d.ts.map +1 -0
  291. package/lib/types/errors.d.ts +16 -0
  292. package/lib/types/errors.d.ts.map +1 -0
  293. package/lib/types/index.d.ts +24 -0
  294. package/lib/types/index.d.ts.map +1 -0
  295. package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
  296. package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
  297. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
  298. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
  299. package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
  300. package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
  301. package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
  302. package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
  303. package/lib/types/iterative-generation/activities/index.d.ts +5 -0
  304. package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
  305. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
  306. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
  307. package/lib/types/iterative-generation/types.d.ts +79 -0
  308. package/lib/types/iterative-generation/types.d.ts.map +1 -0
  309. package/lib/types/iterative-generation/utils.d.ts +27 -0
  310. package/lib/types/iterative-generation/utils.d.ts.map +1 -0
  311. package/lib/types/result-types.d.ts +22 -0
  312. package/lib/types/result-types.d.ts.map +1 -0
  313. package/lib/types/system/generateObjectText.d.ts +4 -0
  314. package/lib/types/system/generateObjectText.d.ts.map +1 -0
  315. package/lib/types/system/notifyWebhookWorkflow.d.ts +6 -0
  316. package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
  317. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +40 -0
  318. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
  319. package/lib/types/utils/auth.d.ts +4 -0
  320. package/lib/types/utils/auth.d.ts.map +1 -0
  321. package/lib/types/utils/blobs.d.ts +8 -0
  322. package/lib/types/utils/blobs.d.ts.map +1 -0
  323. package/lib/types/utils/client.d.ts +7 -0
  324. package/lib/types/utils/client.d.ts.map +1 -0
  325. package/lib/types/utils/expand-vars.d.ts +8 -0
  326. package/lib/types/utils/expand-vars.d.ts.map +1 -0
  327. package/lib/types/utils/memory.d.ts +12 -0
  328. package/lib/types/utils/memory.d.ts.map +1 -0
  329. package/lib/types/utils/tokens.d.ts +11 -0
  330. package/lib/types/utils/tokens.d.ts.map +1 -0
  331. package/lib/types/vars.d.ts +3 -0
  332. package/lib/types/vars.d.ts.map +1 -0
  333. package/lib/types/workflows.d.ts +9 -0
  334. package/lib/types/workflows.d.ts.map +1 -0
  335. package/lib/workflows-bundle.js +18394 -0
  336. package/package.json +109 -0
  337. package/src/activities/advanced/createDocumentTypeFromInteractionRun.ts +54 -0
  338. package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +97 -0
  339. package/src/activities/advanced/updateDocumentFromInteractionRun.ts +34 -0
  340. package/src/activities/chunkDocument.ts +124 -0
  341. package/src/activities/createDocumentFromOther.ts +92 -0
  342. package/src/activities/executeInteraction.ts +191 -0
  343. package/src/activities/extractDocumentText.ts +174 -0
  344. package/src/activities/generateDocumentProperties.ts +93 -0
  345. package/src/activities/generateEmbeddings.ts +345 -0
  346. package/src/activities/generateImageRendition.ts +134 -0
  347. package/src/activities/generateOrAssignContentType.ts +152 -0
  348. package/src/activities/getObjectFromStore.ts +31 -0
  349. package/src/activities/index.ts +21 -0
  350. package/src/activities/media/processPdfWithTextract.ts +141 -0
  351. package/src/activities/media/transcribeMediaWithGladia.ts +83 -0
  352. package/src/activities/notifyWebhook.test.ts +32 -0
  353. package/src/activities/notifyWebhook.ts +51 -0
  354. package/src/activities/setDocumentStatus.ts +25 -0
  355. package/src/conversion/TextractProcessor.ts +505 -0
  356. package/src/conversion/image.test.ts +26 -0
  357. package/src/conversion/image.ts +22 -0
  358. package/src/conversion/mutool.test.ts +74 -0
  359. package/src/conversion/mutool.ts +180 -0
  360. package/src/conversion/pandoc.test.ts +22 -0
  361. package/src/conversion/pandoc.ts +44 -0
  362. package/src/conversion/pdf.test.ts +35 -0
  363. package/src/conversion/pdf.ts +8 -0
  364. package/src/dsl/conditions.ts +76 -0
  365. package/src/dsl/dsl-workflow.test.ts +58 -0
  366. package/src/dsl/dsl-workflow.ts +235 -0
  367. package/src/dsl/ms.d.ts +11 -0
  368. package/src/dsl/projections.test.ts +159 -0
  369. package/src/dsl/projections.ts +72 -0
  370. package/src/dsl/setup/ActivityContext.ts +106 -0
  371. package/src/dsl/setup/fetch/DataProvider.ts +45 -0
  372. package/src/dsl/setup/fetch/index.ts +19 -0
  373. package/src/dsl/setup/fetch/providers.ts +67 -0
  374. package/src/dsl/test/test-child-workflow.ts +6 -0
  375. package/src/dsl/validation.test.ts +257 -0
  376. package/src/dsl/validation.ts +125 -0
  377. package/src/dsl/vars.test.ts +245 -0
  378. package/src/dsl/vars.ts +340 -0
  379. package/src/dsl/walk.test.ts +81 -0
  380. package/src/dsl/walk.ts +103 -0
  381. package/src/dsl/workflow-exec-child.test.ts +182 -0
  382. package/src/dsl/workflow-fetch.test.ts +135 -0
  383. package/src/dsl/workflow-import.test.ts +89 -0
  384. package/src/dsl/workflow.test.ts +110 -0
  385. package/src/errors.ts +24 -0
  386. package/src/index.ts +27 -0
  387. package/src/iterative-generation/activities/extractToc.ts +49 -0
  388. package/src/iterative-generation/activities/finalizeOutput.ts +77 -0
  389. package/src/iterative-generation/activities/generatePart.ts +82 -0
  390. package/src/iterative-generation/activities/generateToc.ts +98 -0
  391. package/src/iterative-generation/activities/index.ts +4 -0
  392. package/src/iterative-generation/iterativeGenerationWorkflow.ts +67 -0
  393. package/src/iterative-generation/types.ts +99 -0
  394. package/src/iterative-generation/utils.ts +123 -0
  395. package/src/result-types.ts +25 -0
  396. package/src/system/generateObjectText.ts +109 -0
  397. package/src/system/notifyWebhookWorkflow.ts +64 -0
  398. package/src/system/recalculateEmbeddingsWorkflow.ts +46 -0
  399. package/src/utils/auth.ts +10 -0
  400. package/src/utils/blobs.ts +58 -0
  401. package/src/utils/client.ts +31 -0
  402. package/src/utils/expand-vars.ts +31 -0
  403. package/src/utils/memory.ts +66 -0
  404. package/src/utils/tokens.ts +44 -0
  405. package/src/vars.ts +3 -0
  406. package/src/workflows.ts +9 -0
@@ -0,0 +1,505 @@
1
+ import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3";
2
+ import type { Block } from "@aws-sdk/client-textract";
3
+ import {
4
+ GetDocumentAnalysisCommand,
5
+ StartDocumentAnalysisCommand,
6
+ TextractClient
7
+ } from "@aws-sdk/client-textract";
8
+ import type { AwsCredentialIdentityProvider } from "@smithy/types";
9
+ import Papa from 'papaparse';
10
+
11
+ interface BlocksMap {
12
+ [key: string]: Block;
13
+ }
14
+
15
+ interface ContentBlock {
16
+ type: 'text' | 'table' | 'image';
17
+ content: string;
18
+ confidence?: number;
19
+ // Optional geometry if it's an image
20
+ left?: number;
21
+ top?: number;
22
+ width?: number;
23
+ height?: number;
24
+ }
25
+
26
+ interface PageContent {
27
+ pageNumber: number;
28
+ blocks: ContentBlock[];
29
+ }
30
+
31
+ interface TextractProcessorOptions {
32
+ fileKey: string;
33
+ region: string;
34
+ bucket: string;
35
+ credentials?: AwsCredentialIdentityProvider;
36
+ log?: any;
37
+ detectImages?: boolean;
38
+ /**
39
+ * NEW: If true, includes cell-confidence information in the table CSV
40
+ */
41
+ includeConfidenceInTables?: boolean;
42
+ }
43
+
44
+ export class TextractProcessor {
45
+ private textractClient: TextractClient;
46
+ private s3Client: S3Client;
47
+ private fileKey: string;
48
+ private bucket: string;
49
+ private log: any;
50
+ private detectImages: boolean;
51
+ /**
52
+ * Whether or not to include confidence values in CSV output for tables.
53
+ */
54
+ private includeConfidenceInTables: boolean;
55
+
56
+ constructor({
57
+ fileKey,
58
+ region,
59
+ bucket,
60
+ credentials,
61
+ log,
62
+ detectImages = false,
63
+ includeConfidenceInTables = false // NEW default = false
64
+ }: TextractProcessorOptions) {
65
+ this.fileKey = fileKey;
66
+ this.bucket = bucket;
67
+ this.log = log;
68
+ this.detectImages = detectImages;
69
+ this.includeConfidenceInTables = includeConfidenceInTables;
70
+
71
+ this.textractClient = new TextractClient({
72
+ region,
73
+ credentials
74
+ });
75
+ this.s3Client = new S3Client({
76
+ region,
77
+ credentials
78
+ });
79
+ }
80
+
81
+ private getText(result: Block, blocksMap: BlocksMap): string {
82
+ let text = '';
83
+ if (result.Relationships) {
84
+ for (const relationship of result.Relationships) {
85
+ if (relationship.Type === 'CHILD') {
86
+ for (const childId of relationship.Ids || []) {
87
+ const word = blocksMap[childId];
88
+ if (word.BlockType === 'WORD') {
89
+ const wordText = word.Text || '';
90
+ // Example logic to quote numeric text with commas
91
+ if (wordText.includes(',') &&
92
+ wordText.replace(',', '').match(/^\d+$/)) {
93
+ text += `"${wordText}" `;
94
+ } else {
95
+ text += `${wordText} `;
96
+ }
97
+ }
98
+ if (
99
+ word.BlockType === 'SELECTION_ELEMENT' &&
100
+ word.SelectionStatus === 'SELECTED'
101
+ ) {
102
+ text += 'X ';
103
+ }
104
+ }
105
+ }
106
+ }
107
+ }
108
+ return text.trim();
109
+ }
110
+
111
+ private isBlockInTable(block: Block, blocksMap: BlocksMap): boolean {
112
+ if (block.BlockType !== 'LINE') {
113
+ return false;
114
+ }
115
+ if (block.Relationships) {
116
+ for (const relationship of block.Relationships) {
117
+ if (relationship.Type === 'CHILD') {
118
+ for (const childId of relationship.Ids || []) {
119
+ const wordBlock = blocksMap[childId];
120
+ if (this.isWordInTableCell(wordBlock, blocksMap)) {
121
+ return true;
122
+ }
123
+ }
124
+ }
125
+ }
126
+ }
127
+ return false;
128
+ }
129
+
130
+ private isWordInTableCell(wordBlock: Block, blocksMap: BlocksMap): boolean {
131
+ // Check if the wordBlock is a descendant of any TABLE->CELL block
132
+ for (const blockId in blocksMap) {
133
+ const potentialTable = blocksMap[blockId];
134
+ if (potentialTable.BlockType === 'TABLE' && potentialTable.Relationships) {
135
+ for (const relationship of potentialTable.Relationships) {
136
+ if (relationship.Type === 'CHILD') {
137
+ for (const cellId of relationship.Ids || []) {
138
+ const cell = blocksMap[cellId];
139
+ if (cell.BlockType === 'CELL' && cell.Relationships) {
140
+ for (const cellRel of cell.Relationships) {
141
+ if (
142
+ cellRel.Type === 'CHILD' &&
143
+ cellRel.Ids?.includes(wordBlock.Id!)
144
+ ) {
145
+ return true;
146
+ }
147
+ }
148
+ }
149
+ }
150
+ }
151
+ }
152
+ }
153
+ }
154
+ return false;
155
+ }
156
+
157
+ /**
158
+ * NEW: Helper type to store row and column text along with confidence.
159
+ */
160
+ private getRowsColumnsMap(
161
+ tableResult: Block,
162
+ blocksMap: BlocksMap
163
+ ): {
164
+ rows: Array<Array<{ text: string; confidence: number }>>;
165
+ } {
166
+ const rows: Array<Array<{ text: string; confidence: number }>> = [];
167
+
168
+ tableResult.Relationships?.forEach(relationship => {
169
+ if (relationship.Type === 'CHILD') {
170
+ relationship.Ids?.forEach(childId => {
171
+ const cell = blocksMap[childId];
172
+ if (cell.BlockType === 'CELL') {
173
+ const rowIndex = cell.RowIndex || 1;
174
+ const colIndex = cell.ColumnIndex || 1;
175
+
176
+ // Expand the array if needed
177
+ if (!rows[rowIndex - 1]) {
178
+ rows[rowIndex - 1] = [];
179
+ }
180
+
181
+ // Prepare cell text and confidence
182
+ const text = this.getText(cell, blocksMap);
183
+ const confidence = cell.Confidence || 0;
184
+
185
+ // If there's a gap, fill it with placeholders
186
+ // so that we can safely place text at colIndex - 1
187
+ for (let i = rows[rowIndex - 1].length; i < colIndex - 1; i++) {
188
+ rows[rowIndex - 1].push({ text: '', confidence: 0 });
189
+ }
190
+ rows[rowIndex - 1][colIndex - 1] = { text, confidence };
191
+ }
192
+ });
193
+ }
194
+ });
195
+
196
+ return { rows };
197
+ }
198
+
199
+ private generateTableCSV(
200
+ tableResult: Block,
201
+ blocksMap: BlocksMap,
202
+ _tableIndex: number,
203
+ _pageNumber: number
204
+ ): { csv: string; tableConfidence: number } {
205
+ const { rows } = this.getRowsColumnsMap(tableResult, blocksMap);
206
+
207
+ let totalConfidence = 0;
208
+ let cellCount = 0;
209
+
210
+ // Prepare CSV data
211
+ const csvData: string[][] = [];
212
+ for (const row of rows) {
213
+ const rowData: string[] = [];
214
+ for (const cell of row) {
215
+ // Add to CSV
216
+ rowData.push(cell.text.trim());
217
+ // Accumulate confidence
218
+ totalConfidence += cell.confidence;
219
+ cellCount++;
220
+ }
221
+ csvData.push(rowData);
222
+ }
223
+
224
+ // Compute average confidence (or any other method you prefer)
225
+ const tableConfidence = cellCount > 0 ? (totalConfidence / cellCount) : 0;
226
+
227
+ // Convert to CSV
228
+ const csv = Papa.unparse(csvData, {
229
+ delimiter: ',',
230
+ quotes: true,
231
+ quoteChar: '"',
232
+ escapeChar: '"',
233
+ header: false,
234
+ newline: '\n',
235
+ skipEmptyLines: false
236
+ });
237
+
238
+ return { csv, tableConfidence };
239
+ }
240
+
241
+ async upload(fileBuf: Buffer): Promise<void> {
242
+ this.log.info('Uploading file to S3', { fileKey: this.fileKey });
243
+ const command = new PutObjectCommand({
244
+ Bucket: this.bucket,
245
+ Key: this.fileKey,
246
+ Body: fileBuf,
247
+ });
248
+ await this.s3Client.send(command);
249
+ }
250
+
251
+ async startAnalysis(s3Key: string): Promise<string> {
252
+ const command = new StartDocumentAnalysisCommand({
253
+ DocumentLocation: {
254
+ S3Object: {
255
+ Bucket: this.bucket,
256
+ Name: s3Key
257
+ }
258
+ },
259
+ FeatureTypes: ["TABLES"]
260
+ });
261
+ const response = await this.textractClient.send(command);
262
+ return response.JobId!;
263
+ }
264
+
265
+ async checkJobStatus(jobId: string): Promise<string> {
266
+ const command = new GetDocumentAnalysisCommand({ JobId: jobId });
267
+ const response = await this.textractClient.send(command);
268
+ return response.JobStatus!;
269
+ }
270
+
271
+ private getImagePlaceholder(block: Block): string {
272
+ const geometry = block.Geometry?.BoundingBox;
273
+ if (!geometry) return '';
274
+ const area = (geometry.Width || 0) * (geometry.Height || 0);
275
+ if (area < 0.05) return ''; // skip small images
276
+
277
+ const top = geometry.Top || 0;
278
+ const left = geometry.Left || 0;
279
+
280
+ let position = '';
281
+ if (top < 0.3) position += 'TOP_';
282
+ else if (top > 0.7) position += 'BOTTOM_';
283
+
284
+ if (left < 0.3) position += 'LEFT';
285
+ else if (left > 0.7) position += 'RIGHT';
286
+ else position += 'CENTER';
287
+
288
+ return `[IMAGE_${position}]\n`;
289
+ }
290
+
291
+ private getIndentationLevel(block: Block): number {
292
+ const left = block.Geometry?.BoundingBox?.Left || 0;
293
+ if (left < 0.15) return 0;
294
+ if (left < 0.25) return 1;
295
+ return 2;
296
+ }
297
+
298
+ private isLikelyHeader(block: Block, prevBlock: Block | null): boolean {
299
+ if (!prevBlock) return true;
300
+ const gap = (block.Geometry?.BoundingBox?.Top || 0) -
301
+ ((prevBlock.Geometry?.BoundingBox?.Top || 0) +
302
+ (prevBlock.Geometry?.BoundingBox?.Height || 0));
303
+ return gap > 0.03;
304
+ }
305
+
306
+ private formatTextBlock(block: Block, prevBlock: Block | null): string {
307
+ const text = block.Text || '';
308
+ const indentLevel = this.getIndentationLevel(block);
309
+ const indent = ' '.repeat(indentLevel);
310
+
311
+ if (this.isLikelyHeader(block, prevBlock)) {
312
+ return `\n${indent}${text}\n`;
313
+ }
314
+ return `${indent}${text}\n`;
315
+ }
316
+
317
+ private shouldMergeLines(prev: Block, current: Block): boolean {
318
+ const prevBottom = (prev.Geometry?.BoundingBox?.Top || 0)
319
+ + (prev.Geometry?.BoundingBox?.Height || 0);
320
+ const currentTop = current.Geometry?.BoundingBox?.Top || 0;
321
+ const gap = currentTop - prevBottom;
322
+
323
+ // For example, if gap < 0.02, treat them as contiguous
324
+ if (gap < 0.02) {
325
+ return true;
326
+ }
327
+ return false;
328
+ }
329
+
330
+ async processResults(jobId: string): Promise<string> {
331
+ let nextToken: string | undefined;
332
+ let allBlocks: Block[] = [];
333
+
334
+ do {
335
+ const command = new GetDocumentAnalysisCommand({
336
+ JobId: jobId,
337
+ NextToken: nextToken
338
+ });
339
+ const response = await this.textractClient.send(command);
340
+ allBlocks = allBlocks.concat(response.Blocks || []);
341
+ nextToken = response.NextToken;
342
+ } while (nextToken);
343
+
344
+ // Create blocks map
345
+ const blocksMap: BlocksMap = {};
346
+ for (const block of allBlocks) {
347
+ blocksMap[block.Id!] = block;
348
+ }
349
+
350
+ // We'll store each page's content in sequence
351
+ const pageContents: PageContent[] = [];
352
+ let currentPage: PageContent | null = null;
353
+
354
+ // We'll keep track of a "current text block" that we're building
355
+ let currentTextContent = "";
356
+ let prevLineBlock: Block | null = null;
357
+
358
+ // Sort by page and vertical position
359
+ allBlocks.sort((a, b) => {
360
+ if (a.Page !== b.Page) return (a.Page || 0) - (b.Page || 0);
361
+ return (a.Geometry?.BoundingBox?.Top || 0) - (b.Geometry?.BoundingBox?.Top || 0);
362
+ });
363
+
364
+ for (const block of allBlocks) {
365
+ if (block.BlockType === 'PAGE') {
366
+ // If we were building a text block, push it before starting a new page
367
+ if (currentTextContent.trim().length > 0 && currentPage) {
368
+ currentPage.blocks.push({
369
+ type: 'text',
370
+ content: currentTextContent
371
+ });
372
+ }
373
+ if (currentPage) {
374
+ pageContents.push(currentPage);
375
+ }
376
+ currentPage = {
377
+ pageNumber: block.Page || 0,
378
+ blocks: []
379
+ };
380
+ currentTextContent = "";
381
+ prevLineBlock = null;
382
+ }
383
+ else if (currentPage && block.Page === currentPage.pageNumber) {
384
+ // TABLE handling
385
+ if (block.BlockType === 'TABLE') {
386
+ // If there's a pending text block, push it first
387
+ if (currentTextContent.trim().length > 0) {
388
+ currentPage.blocks.push({
389
+ type: 'text',
390
+ content: currentTextContent
391
+ });
392
+ currentTextContent = "";
393
+ }
394
+ const { csv, tableConfidence } = this.generateTableCSV(
395
+ block,
396
+ blocksMap,
397
+ currentPage.blocks.filter(b => b.type === 'table').length + 1,
398
+ currentPage.pageNumber
399
+ );
400
+ currentPage.blocks.push({
401
+ type: 'table',
402
+ content: csv,
403
+ confidence: tableConfidence
404
+ });
405
+ prevLineBlock = null;
406
+ }
407
+ // LINE handling (merge or start new)
408
+ else if (block.BlockType === 'LINE' && !this.isBlockInTable(block, blocksMap)) {
409
+ if (prevLineBlock && this.shouldMergeLines(prevLineBlock, block)) {
410
+ // If we consider this line to be part of the same paragraph,
411
+ // just append the text. We'll call formatTextBlock to get
412
+ // indentation/header logic, but we won't add a leading newline.
413
+ const formatted = this.formatTextBlock(block, prevLineBlock);
414
+
415
+ // formatTextBlock might include a leading newline if isLikelyHeader = true
416
+ // so you can strip it out if you want them truly "merged" into one paragraph:
417
+ const mergedText = formatted.replace(/^\s*\n/, " ");
418
+
419
+ currentTextContent += " " + mergedText.trim();
420
+ } else {
421
+ // If there's an existing text block, push it
422
+ if (currentTextContent.trim().length > 0) {
423
+ currentPage.blocks.push({
424
+ type: 'text',
425
+ content: currentTextContent
426
+ });
427
+ }
428
+ // Start a new text block
429
+ currentTextContent = this.formatTextBlock(block, prevLineBlock).trim();
430
+ }
431
+ prevLineBlock = block;
432
+ }
433
+ // IMAGES (if detectImages)
434
+ else if (this.detectImages) {
435
+ const geometry = block.Geometry?.BoundingBox;
436
+ if (geometry && geometry.Width && geometry.Height) {
437
+ const imagePlaceholder = this.getImagePlaceholder(block);
438
+ if (imagePlaceholder) {
439
+ // If there's a pending text block, push it first
440
+ if (currentTextContent.trim().length > 0) {
441
+ currentPage.blocks.push({
442
+ type: 'text',
443
+ content: currentTextContent
444
+ });
445
+ currentTextContent = "";
446
+ }
447
+
448
+ currentPage.blocks.push({
449
+ type: 'image',
450
+ content: imagePlaceholder,
451
+ left: geometry.Left,
452
+ top: geometry.Top,
453
+ width: geometry.Width,
454
+ height: geometry.Height
455
+ });
456
+ }
457
+ }
458
+ // No line update to prevLineBlock here
459
+ }
460
+ }
461
+ }
462
+
463
+ // Handle last page
464
+ if (currentPage) {
465
+ if (currentTextContent.trim().length > 0) {
466
+ currentPage.blocks.push({
467
+ type: 'text',
468
+ content: currentTextContent
469
+ });
470
+ }
471
+ pageContents.push(currentPage);
472
+ }
473
+
474
+ // Build final output
475
+ let fulltext = '';
476
+ let imgNumber = 1;
477
+ for (const page of pageContents) {
478
+ fulltext += `<page number="${page.pageNumber}">\n`;
479
+ for (const block of page.blocks) {
480
+ if (block.type === 'text') {
481
+ fulltext += `<text>\n${block.content}\n</text>\n\n`;
482
+ } else if (block.type === 'table') {
483
+ const confidenceAttr = block.confidence !== undefined && this.includeConfidenceInTables
484
+ ? ` confidence="${block.confidence.toFixed(2)}"`
485
+ : '';
486
+ fulltext += `<table type="csv"${confidenceAttr}>\n`;
487
+ fulltext += `${block.content}\n`;
488
+ fulltext += `</table>\n\n`;
489
+ } else if (block.type === 'image') {
490
+ // Include geometry if you like
491
+ const leftAttr = block.left ? ` left="${block.left.toFixed(4)}"` : '';
492
+ const topAttr = block.top ? ` top="${block.top.toFixed(4)}"` : '';
493
+ const widthAttr = block.width ? ` width="${block.width.toFixed(4)}"` : '';
494
+ const heightAttr = block.height ? ` height="${block.height.toFixed(4)}"` : '';
495
+
496
+ fulltext += `<image id="${imgNumber++}" ${leftAttr}${topAttr}${widthAttr}${heightAttr}>\n${block.content.trim()}\n</image>\n\n`;
497
+ }
498
+ }
499
+ fulltext += `</page>\n\n`;
500
+ }
501
+
502
+ return fulltext;
503
+ }
504
+
505
+ }
@@ -0,0 +1,26 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import sharp from 'sharp';
4
+ import { expect, test } from 'vitest';
5
+ import { imageResizer } from '../conversion/image';
6
+
7
+
8
+ test('should resize an image to a maximum height or width', async () => {
9
+ const max_hw = 1024;
10
+ const format: keyof sharp.FormatEnum = 'jpeg';
11
+ const imageFile = fs.readFileSync(path.join(__dirname, '../../fixtures', 'cat-picture.jpg'));
12
+
13
+ const resizer = imageResizer(max_hw, format);
14
+
15
+ const resized = sharp(imageFile).pipe(resizer);
16
+ const buffer = await resized.toBuffer();
17
+ const metadata = await sharp(buffer).metadata();
18
+
19
+ console.log(metadata);
20
+ resized.toFile('/tmp/cat-picture.jpg');
21
+
22
+ expect(metadata.width).to.be.lessThanOrEqual(max_hw);
23
+ expect(metadata.height).to.be.lessThanOrEqual(max_hw);
24
+ expect(metadata.format).to.equal(format);
25
+
26
+ });
@@ -0,0 +1,22 @@
1
+
2
+ import sharp from "sharp";
3
+
4
+
5
+ /**
6
+ * Resizes an image to a maximum height or width
7
+ * @param max_hw
8
+ * @param format
9
+ * @returns
10
+ */
11
+ export function imageResizer(max_hw: number, format: keyof sharp.FormatEnum) {
12
+
13
+ return sharp().resize({
14
+ width: max_hw,
15
+ height: max_hw,
16
+ fit: sharp.fit.inside,
17
+ withoutEnlargement: true,
18
+
19
+ }).toFormat(format);
20
+
21
+ }
22
+
@@ -0,0 +1,74 @@
1
+ import { MockActivityEnvironment, TestWorkflowEnvironment } from '@temporalio/testing';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { beforeAll, expect, test } from 'vitest';
5
+ import { mutoolPdfToText, pdfExtractPages, pdfToImages } from './mutool.js';
6
+
7
+
8
+ let testEnv: TestWorkflowEnvironment;
9
+ let activityContext: MockActivityEnvironment;
10
+
11
+ beforeAll(async () => {
12
+ testEnv = await TestWorkflowEnvironment.createLocal();
13
+ activityContext = new MockActivityEnvironment();
14
+ });
15
+
16
+ const TIMEOUT = 10000;
17
+
18
+ test('[mutool] should convert pdf to text', async () => {
19
+ const pdf = fs.readFileSync(path.join(__dirname, '../../fixtures', 'test-pdf1.pdf'));
20
+ const buf = Buffer.from(pdf);
21
+ console.log("Running mutoolPdfToText")
22
+ const result = await activityContext.run(mutoolPdfToText, buf);
23
+ expect(result).toContain('VF primarily uses foreign currency exchange');
24
+
25
+ }, TIMEOUT);
26
+
27
+ test('[mutool] should convert pdf to images', async () => {
28
+ const filename = path.join(__dirname, '../../fixtures', 'test-pdf1.pdf');
29
+
30
+ console.log("Running pdfToImages")
31
+ const result = await activityContext.run(pdfToImages, filename);
32
+ console.log(result);
33
+
34
+ expect(result).toBeInstanceOf(Array);
35
+ expect((result as string[]).length).toBe(119);
36
+
37
+ }, TIMEOUT);
38
+
39
+ test('[mutool] should convert pdf to images with pages', async () => {
40
+ const filename = path.join(__dirname, '../../fixtures', 'test-pdf1.pdf');
41
+ const pages = [7, 8, 9];
42
+
43
+ console.log("Running pdfToImages with pages")
44
+ const result = await activityContext.run(pdfToImages, filename, pages);
45
+ console.log(result);
46
+
47
+ expect(result).toBeInstanceOf(Array);
48
+ expect((result as string[]).length).toBe(3);
49
+
50
+ }, TIMEOUT);
51
+
52
+ test('[mutool] should extract 3 pages from PDF into new PDF', async () => {
53
+ const filename = path.join(__dirname, '../../fixtures', 'test-pdf1.pdf');
54
+ const pages = [7, 8, 9];
55
+
56
+ console.log("Running pdfGetPages")
57
+ const result = await activityContext.run(pdfExtractPages, filename, pages);
58
+ console.log(result);
59
+
60
+ expect(result).toContain(".pdf");
61
+
62
+ }, TIMEOUT);
63
+
64
+ test('[mutool] should extract 1 pages from PDF into new PDF', async () => {
65
+ const filename = path.join(__dirname, '../../fixtures', 'test-pdf1.pdf');
66
+ const pages = [12];
67
+
68
+ console.log("Running pdfGetPages")
69
+ const result = await activityContext.run(pdfExtractPages, filename, pages);
70
+ console.log(result);
71
+
72
+ expect(result).toContain(".pdf");
73
+
74
+ }, TIMEOUT);