@vertesia/workflow 0.24.0-dev.202601221707

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (490) hide show
  1. package/LICENSE +13 -0
  2. package/README.md +65 -0
  3. package/bin/bundle-workflows.mjs +39 -0
  4. package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +33 -0
  5. package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
  6. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +73 -0
  7. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
  8. package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +19 -0
  9. package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
  10. package/lib/cjs/activities/chunkDocument.js +85 -0
  11. package/lib/cjs/activities/chunkDocument.js.map +1 -0
  12. package/lib/cjs/activities/copyParentArtifacts.js +127 -0
  13. package/lib/cjs/activities/copyParentArtifacts.js.map +1 -0
  14. package/lib/cjs/activities/createDocumentFromOther.js +64 -0
  15. package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
  16. package/lib/cjs/activities/executeInteraction.js +194 -0
  17. package/lib/cjs/activities/executeInteraction.js.map +1 -0
  18. package/lib/cjs/activities/extractDocumentText.js +156 -0
  19. package/lib/cjs/activities/extractDocumentText.js.map +1 -0
  20. package/lib/cjs/activities/generateDocumentProperties.js +83 -0
  21. package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
  22. package/lib/cjs/activities/generateEmbeddings.js +257 -0
  23. package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
  24. package/lib/cjs/activities/generateOrAssignContentType.js +125 -0
  25. package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
  26. package/lib/cjs/activities/getObjectFromStore.js +20 -0
  27. package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
  28. package/lib/cjs/activities/handleError.js +22 -0
  29. package/lib/cjs/activities/handleError.js.map +1 -0
  30. package/lib/cjs/activities/index-dsl.js +51 -0
  31. package/lib/cjs/activities/index-dsl.js.map +1 -0
  32. package/lib/cjs/activities/index.js +21 -0
  33. package/lib/cjs/activities/index.js.map +1 -0
  34. package/lib/cjs/activities/media/prepareAudio.js +239 -0
  35. package/lib/cjs/activities/media/prepareAudio.js.map +1 -0
  36. package/lib/cjs/activities/media/prepareVideo.js +429 -0
  37. package/lib/cjs/activities/media/prepareVideo.js.map +1 -0
  38. package/lib/cjs/activities/media/processPdfWithTextract.js +103 -0
  39. package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
  40. package/lib/cjs/activities/media/saveGladiaTranscription.js +81 -0
  41. package/lib/cjs/activities/media/saveGladiaTranscription.js.map +1 -0
  42. package/lib/cjs/activities/media/transcribeMediaWithGladia.js +82 -0
  43. package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
  44. package/lib/cjs/activities/notifyWebhook.js +158 -0
  45. package/lib/cjs/activities/notifyWebhook.js.map +1 -0
  46. package/lib/cjs/activities/rateLimiter.js +30 -0
  47. package/lib/cjs/activities/rateLimiter.js.map +1 -0
  48. package/lib/cjs/activities/renditions/generateImageRendition.js +66 -0
  49. package/lib/cjs/activities/renditions/generateImageRendition.js.map +1 -0
  50. package/lib/cjs/activities/renditions/generateVideoRendition.js +200 -0
  51. package/lib/cjs/activities/renditions/generateVideoRendition.js.map +1 -0
  52. package/lib/cjs/activities/setDocumentStatus.js +15 -0
  53. package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
  54. package/lib/cjs/conversion/TextractProcessor.js +417 -0
  55. package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
  56. package/lib/cjs/conversion/image.js +149 -0
  57. package/lib/cjs/conversion/image.js.map +1 -0
  58. package/lib/cjs/conversion/markitdown.js +42 -0
  59. package/lib/cjs/conversion/markitdown.js.map +1 -0
  60. package/lib/cjs/conversion/mutool.js +147 -0
  61. package/lib/cjs/conversion/mutool.js.map +1 -0
  62. package/lib/cjs/conversion/pandoc.js +39 -0
  63. package/lib/cjs/conversion/pandoc.js.map +1 -0
  64. package/lib/cjs/dsl/conditions.js +81 -0
  65. package/lib/cjs/dsl/conditions.js.map +1 -0
  66. package/lib/cjs/dsl/dsl-workflow.js +343 -0
  67. package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
  68. package/lib/cjs/dsl/dslProxyActivities.js +23 -0
  69. package/lib/cjs/dsl/dslProxyActivities.js.map +1 -0
  70. package/lib/cjs/dsl/projections.js +59 -0
  71. package/lib/cjs/dsl/projections.js.map +1 -0
  72. package/lib/cjs/dsl/setup/ActivityContext.js +122 -0
  73. package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
  74. package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
  75. package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
  76. package/lib/cjs/dsl/setup/fetch/index.js +16 -0
  77. package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
  78. package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
  79. package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
  80. package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
  81. package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
  82. package/lib/cjs/dsl/validation.js +122 -0
  83. package/lib/cjs/dsl/validation.js.map +1 -0
  84. package/lib/cjs/dsl/vars.js +341 -0
  85. package/lib/cjs/dsl/vars.js.map +1 -0
  86. package/lib/cjs/dsl/walk.js +100 -0
  87. package/lib/cjs/dsl/walk.js.map +1 -0
  88. package/lib/cjs/dsl.js +20 -0
  89. package/lib/cjs/dsl.js.map +1 -0
  90. package/lib/cjs/errors.js +79 -0
  91. package/lib/cjs/errors.js.map +1 -0
  92. package/lib/cjs/index.js +56 -0
  93. package/lib/cjs/index.js.map +1 -0
  94. package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
  95. package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
  96. package/lib/cjs/iterative-generation/activities/finalizeOutput.js +72 -0
  97. package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
  98. package/lib/cjs/iterative-generation/activities/generatePart.js +78 -0
  99. package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
  100. package/lib/cjs/iterative-generation/activities/generateToc.js +86 -0
  101. package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
  102. package/lib/cjs/iterative-generation/activities/index.js +12 -0
  103. package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
  104. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +56 -0
  105. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
  106. package/lib/cjs/iterative-generation/types.js +5 -0
  107. package/lib/cjs/iterative-generation/types.js.map +1 -0
  108. package/lib/cjs/iterative-generation/utils.js +121 -0
  109. package/lib/cjs/iterative-generation/utils.js.map +1 -0
  110. package/lib/cjs/package.json +3 -0
  111. package/lib/cjs/result-types.js +10 -0
  112. package/lib/cjs/result-types.js.map +1 -0
  113. package/lib/cjs/system/notifyWebhookWorkflow.js +53 -0
  114. package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
  115. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +33 -0
  116. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
  117. package/lib/cjs/utils/auth.js +15 -0
  118. package/lib/cjs/utils/auth.js.map +1 -0
  119. package/lib/cjs/utils/blobs.js +64 -0
  120. package/lib/cjs/utils/blobs.js.map +1 -0
  121. package/lib/cjs/utils/chunks.js +14 -0
  122. package/lib/cjs/utils/chunks.js.map +1 -0
  123. package/lib/cjs/utils/client.js +31 -0
  124. package/lib/cjs/utils/client.js.map +1 -0
  125. package/lib/cjs/utils/expand-vars.js +33 -0
  126. package/lib/cjs/utils/expand-vars.js.map +1 -0
  127. package/lib/cjs/utils/memory.js +65 -0
  128. package/lib/cjs/utils/memory.js.map +1 -0
  129. package/lib/cjs/utils/renditions.js +88 -0
  130. package/lib/cjs/utils/renditions.js.map +1 -0
  131. package/lib/cjs/utils/storage.js +54 -0
  132. package/lib/cjs/utils/storage.js.map +1 -0
  133. package/lib/cjs/utils/tokens.js +38 -0
  134. package/lib/cjs/utils/tokens.js.map +1 -0
  135. package/lib/cjs/vars.js +20 -0
  136. package/lib/cjs/vars.js.map +1 -0
  137. package/lib/cjs/workflows.js +15 -0
  138. package/lib/cjs/workflows.js.map +1 -0
  139. package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +30 -0
  140. package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
  141. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +70 -0
  142. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
  143. package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +16 -0
  144. package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
  145. package/lib/esm/activities/chunkDocument.js +82 -0
  146. package/lib/esm/activities/chunkDocument.js.map +1 -0
  147. package/lib/esm/activities/copyParentArtifacts.js +124 -0
  148. package/lib/esm/activities/copyParentArtifacts.js.map +1 -0
  149. package/lib/esm/activities/createDocumentFromOther.js +58 -0
  150. package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
  151. package/lib/esm/activities/executeInteraction.js +190 -0
  152. package/lib/esm/activities/executeInteraction.js.map +1 -0
  153. package/lib/esm/activities/extractDocumentText.js +153 -0
  154. package/lib/esm/activities/extractDocumentText.js.map +1 -0
  155. package/lib/esm/activities/generateDocumentProperties.js +80 -0
  156. package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
  157. package/lib/esm/activities/generateEmbeddings.js +254 -0
  158. package/lib/esm/activities/generateEmbeddings.js.map +1 -0
  159. package/lib/esm/activities/generateOrAssignContentType.js +122 -0
  160. package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
  161. package/lib/esm/activities/getObjectFromStore.js +17 -0
  162. package/lib/esm/activities/getObjectFromStore.js.map +1 -0
  163. package/lib/esm/activities/handleError.js +19 -0
  164. package/lib/esm/activities/handleError.js.map +1 -0
  165. package/lib/esm/activities/index-dsl.js +25 -0
  166. package/lib/esm/activities/index-dsl.js.map +1 -0
  167. package/lib/esm/activities/index.js +5 -0
  168. package/lib/esm/activities/index.js.map +1 -0
  169. package/lib/esm/activities/media/prepareAudio.js +200 -0
  170. package/lib/esm/activities/media/prepareAudio.js.map +1 -0
  171. package/lib/esm/activities/media/prepareVideo.js +390 -0
  172. package/lib/esm/activities/media/prepareVideo.js.map +1 -0
  173. package/lib/esm/activities/media/processPdfWithTextract.js +99 -0
  174. package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
  175. package/lib/esm/activities/media/saveGladiaTranscription.js +78 -0
  176. package/lib/esm/activities/media/saveGladiaTranscription.js.map +1 -0
  177. package/lib/esm/activities/media/transcribeMediaWithGladia.js +79 -0
  178. package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
  179. package/lib/esm/activities/notifyWebhook.js +155 -0
  180. package/lib/esm/activities/notifyWebhook.js.map +1 -0
  181. package/lib/esm/activities/rateLimiter.js +27 -0
  182. package/lib/esm/activities/rateLimiter.js.map +1 -0
  183. package/lib/esm/activities/renditions/generateImageRendition.js +63 -0
  184. package/lib/esm/activities/renditions/generateImageRendition.js.map +1 -0
  185. package/lib/esm/activities/renditions/generateVideoRendition.js +194 -0
  186. package/lib/esm/activities/renditions/generateVideoRendition.js.map +1 -0
  187. package/lib/esm/activities/setDocumentStatus.js +12 -0
  188. package/lib/esm/activities/setDocumentStatus.js.map +1 -0
  189. package/lib/esm/conversion/TextractProcessor.js +410 -0
  190. package/lib/esm/conversion/TextractProcessor.js.map +1 -0
  191. package/lib/esm/conversion/image.js +143 -0
  192. package/lib/esm/conversion/image.js.map +1 -0
  193. package/lib/esm/conversion/markitdown.js +36 -0
  194. package/lib/esm/conversion/markitdown.js.map +1 -0
  195. package/lib/esm/conversion/mutool.js +139 -0
  196. package/lib/esm/conversion/mutool.js.map +1 -0
  197. package/lib/esm/conversion/pandoc.js +36 -0
  198. package/lib/esm/conversion/pandoc.js.map +1 -0
  199. package/lib/esm/dsl/conditions.js +75 -0
  200. package/lib/esm/dsl/conditions.js.map +1 -0
  201. package/lib/esm/dsl/dsl-workflow.js +336 -0
  202. package/lib/esm/dsl/dsl-workflow.js.map +1 -0
  203. package/lib/esm/dsl/dslProxyActivities.js +20 -0
  204. package/lib/esm/dsl/dslProxyActivities.js.map +1 -0
  205. package/lib/esm/dsl/projections.js +55 -0
  206. package/lib/esm/dsl/projections.js.map +1 -0
  207. package/lib/esm/dsl/setup/ActivityContext.js +117 -0
  208. package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
  209. package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
  210. package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
  211. package/lib/esm/dsl/setup/fetch/index.js +12 -0
  212. package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
  213. package/lib/esm/dsl/setup/fetch/providers.js +61 -0
  214. package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
  215. package/lib/esm/dsl/test/test-child-workflow.js +5 -0
  216. package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
  217. package/lib/esm/dsl/validation.js +118 -0
  218. package/lib/esm/dsl/validation.js.map +1 -0
  219. package/lib/esm/dsl/vars.js +335 -0
  220. package/lib/esm/dsl/vars.js.map +1 -0
  221. package/lib/esm/dsl/walk.js +96 -0
  222. package/lib/esm/dsl/walk.js.map +1 -0
  223. package/lib/esm/dsl.js +4 -0
  224. package/lib/esm/dsl.js.map +1 -0
  225. package/lib/esm/errors.js +69 -0
  226. package/lib/esm/errors.js.map +1 -0
  227. package/lib/esm/index.js +38 -0
  228. package/lib/esm/index.js.map +1 -0
  229. package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
  230. package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
  231. package/lib/esm/iterative-generation/activities/finalizeOutput.js +69 -0
  232. package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
  233. package/lib/esm/iterative-generation/activities/generatePart.js +75 -0
  234. package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
  235. package/lib/esm/iterative-generation/activities/generateToc.js +83 -0
  236. package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
  237. package/lib/esm/iterative-generation/activities/index.js +5 -0
  238. package/lib/esm/iterative-generation/activities/index.js.map +1 -0
  239. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +53 -0
  240. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
  241. package/lib/esm/iterative-generation/types.js +2 -0
  242. package/lib/esm/iterative-generation/types.js.map +1 -0
  243. package/lib/esm/iterative-generation/utils.js +112 -0
  244. package/lib/esm/iterative-generation/utils.js.map +1 -0
  245. package/lib/esm/result-types.js +7 -0
  246. package/lib/esm/result-types.js.map +1 -0
  247. package/lib/esm/system/notifyWebhookWorkflow.js +50 -0
  248. package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
  249. package/lib/esm/system/recalculateEmbeddingsWorkflow.js +30 -0
  250. package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
  251. package/lib/esm/utils/auth.js +8 -0
  252. package/lib/esm/utils/auth.js.map +1 -0
  253. package/lib/esm/utils/blobs.js +54 -0
  254. package/lib/esm/utils/blobs.js.map +1 -0
  255. package/lib/esm/utils/chunks.js +9 -0
  256. package/lib/esm/utils/chunks.js.map +1 -0
  257. package/lib/esm/utils/client.js +27 -0
  258. package/lib/esm/utils/client.js.map +1 -0
  259. package/lib/esm/utils/expand-vars.js +30 -0
  260. package/lib/esm/utils/expand-vars.js.map +1 -0
  261. package/lib/esm/utils/memory.js +55 -0
  262. package/lib/esm/utils/memory.js.map +1 -0
  263. package/lib/esm/utils/renditions.js +80 -0
  264. package/lib/esm/utils/renditions.js.map +1 -0
  265. package/lib/esm/utils/storage.js +45 -0
  266. package/lib/esm/utils/storage.js.map +1 -0
  267. package/lib/esm/utils/tokens.js +34 -0
  268. package/lib/esm/utils/tokens.js.map +1 -0
  269. package/lib/esm/vars.js +4 -0
  270. package/lib/esm/vars.js.map +1 -0
  271. package/lib/esm/workflows.js +8 -0
  272. package/lib/esm/workflows.js.map +1 -0
  273. package/lib/tsconfig.tsbuildinfo +1 -0
  274. package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
  275. package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
  276. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +39 -0
  277. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
  278. package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
  279. package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
  280. package/lib/types/activities/chunkDocument.d.ts +33 -0
  281. package/lib/types/activities/chunkDocument.d.ts.map +1 -0
  282. package/lib/types/activities/copyParentArtifacts.d.ts +19 -0
  283. package/lib/types/activities/copyParentArtifacts.d.ts.map +1 -0
  284. package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
  285. package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
  286. package/lib/types/activities/executeInteraction.d.ts +61 -0
  287. package/lib/types/activities/executeInteraction.d.ts.map +1 -0
  288. package/lib/types/activities/extractDocumentText.d.ts +10 -0
  289. package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
  290. package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
  291. package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
  292. package/lib/types/activities/generateEmbeddings.d.ts +53 -0
  293. package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
  294. package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
  295. package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
  296. package/lib/types/activities/getObjectFromStore.d.ts +14 -0
  297. package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
  298. package/lib/types/activities/handleError.d.ts +6 -0
  299. package/lib/types/activities/handleError.d.ts.map +1 -0
  300. package/lib/types/activities/index-dsl.d.ts +25 -0
  301. package/lib/types/activities/index-dsl.d.ts.map +1 -0
  302. package/lib/types/activities/index.d.ts +5 -0
  303. package/lib/types/activities/index.d.ts.map +1 -0
  304. package/lib/types/activities/media/prepareAudio.d.ts +25 -0
  305. package/lib/types/activities/media/prepareAudio.d.ts.map +1 -0
  306. package/lib/types/activities/media/prepareVideo.d.ts +30 -0
  307. package/lib/types/activities/media/prepareVideo.d.ts.map +1 -0
  308. package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
  309. package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
  310. package/lib/types/activities/media/saveGladiaTranscription.d.ts +14 -0
  311. package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +1 -0
  312. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +19 -0
  313. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
  314. package/lib/types/activities/notifyWebhook.d.ts +27 -0
  315. package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
  316. package/lib/types/activities/rateLimiter.d.ts +11 -0
  317. package/lib/types/activities/rateLimiter.d.ts.map +1 -0
  318. package/lib/types/activities/renditions/generateImageRendition.d.ts +14 -0
  319. package/lib/types/activities/renditions/generateImageRendition.d.ts.map +1 -0
  320. package/lib/types/activities/renditions/generateVideoRendition.d.ts +15 -0
  321. package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +1 -0
  322. package/lib/types/activities/setDocumentStatus.d.ts +15 -0
  323. package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
  324. package/lib/types/conversion/TextractProcessor.d.ts +45 -0
  325. package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
  326. package/lib/types/conversion/image.d.ts +13 -0
  327. package/lib/types/conversion/image.d.ts.map +1 -0
  328. package/lib/types/conversion/markitdown.d.ts +2 -0
  329. package/lib/types/conversion/markitdown.d.ts.map +1 -0
  330. package/lib/types/conversion/mutool.d.ts +19 -0
  331. package/lib/types/conversion/mutool.d.ts.map +1 -0
  332. package/lib/types/conversion/pandoc.d.ts +2 -0
  333. package/lib/types/conversion/pandoc.d.ts.map +1 -0
  334. package/lib/types/dsl/conditions.d.ts +2 -0
  335. package/lib/types/dsl/conditions.d.ts.map +1 -0
  336. package/lib/types/dsl/dsl-workflow.d.ts +5 -0
  337. package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
  338. package/lib/types/dsl/dslProxyActivities.d.ts +10 -0
  339. package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -0
  340. package/lib/types/dsl/projections.d.ts +4 -0
  341. package/lib/types/dsl/projections.d.ts.map +1 -0
  342. package/lib/types/dsl/setup/ActivityContext.d.ts +17 -0
  343. package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
  344. package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
  345. package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
  346. package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
  347. package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
  348. package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
  349. package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
  350. package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
  351. package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
  352. package/lib/types/dsl/validation.d.ts +4 -0
  353. package/lib/types/dsl/validation.d.ts.map +1 -0
  354. package/lib/types/dsl/vars.d.ts +48 -0
  355. package/lib/types/dsl/vars.d.ts.map +1 -0
  356. package/lib/types/dsl/walk.d.ts +18 -0
  357. package/lib/types/dsl/walk.d.ts.map +1 -0
  358. package/lib/types/dsl.d.ts +4 -0
  359. package/lib/types/dsl.d.ts.map +1 -0
  360. package/lib/types/errors.d.ts +37 -0
  361. package/lib/types/errors.d.ts.map +1 -0
  362. package/lib/types/index.d.ts +37 -0
  363. package/lib/types/index.d.ts.map +1 -0
  364. package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
  365. package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
  366. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
  367. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
  368. package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
  369. package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
  370. package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
  371. package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
  372. package/lib/types/iterative-generation/activities/index.d.ts +5 -0
  373. package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
  374. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
  375. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
  376. package/lib/types/iterative-generation/types.d.ts +79 -0
  377. package/lib/types/iterative-generation/types.d.ts.map +1 -0
  378. package/lib/types/iterative-generation/utils.d.ts +26 -0
  379. package/lib/types/iterative-generation/utils.d.ts.map +1 -0
  380. package/lib/types/result-types.d.ts +22 -0
  381. package/lib/types/result-types.d.ts.map +1 -0
  382. package/lib/types/system/notifyWebhookWorkflow.d.ts +8 -0
  383. package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
  384. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +25 -0
  385. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
  386. package/lib/types/utils/auth.d.ts +4 -0
  387. package/lib/types/utils/auth.d.ts.map +1 -0
  388. package/lib/types/utils/blobs.d.ts +7 -0
  389. package/lib/types/utils/blobs.d.ts.map +1 -0
  390. package/lib/types/utils/chunks.d.ts +9 -0
  391. package/lib/types/utils/chunks.d.ts.map +1 -0
  392. package/lib/types/utils/client.d.ts +8 -0
  393. package/lib/types/utils/client.d.ts.map +1 -0
  394. package/lib/types/utils/expand-vars.d.ts +8 -0
  395. package/lib/types/utils/expand-vars.d.ts.map +1 -0
  396. package/lib/types/utils/memory.d.ts +8 -0
  397. package/lib/types/utils/memory.d.ts.map +1 -0
  398. package/lib/types/utils/renditions.d.ts +23 -0
  399. package/lib/types/utils/renditions.d.ts.map +1 -0
  400. package/lib/types/utils/storage.d.ts +16 -0
  401. package/lib/types/utils/storage.d.ts.map +1 -0
  402. package/lib/types/utils/tokens.d.ts +11 -0
  403. package/lib/types/utils/tokens.d.ts.map +1 -0
  404. package/lib/types/vars.d.ts +3 -0
  405. package/lib/types/vars.d.ts.map +1 -0
  406. package/lib/types/workflows.d.ts +8 -0
  407. package/lib/types/workflows.d.ts.map +1 -0
  408. package/lib/workflows-bundle.js +17213 -0
  409. package/package.json +146 -0
  410. package/src/activities/advanced/createDocumentTypeFromInteractionRun.ts +55 -0
  411. package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +119 -0
  412. package/src/activities/advanced/updateDocumentFromInteractionRun.ts +35 -0
  413. package/src/activities/chunkDocument.ts +146 -0
  414. package/src/activities/copyParentArtifacts.ts +162 -0
  415. package/src/activities/createDocumentFromOther.ts +92 -0
  416. package/src/activities/executeInteraction.ts +300 -0
  417. package/src/activities/extractDocumentText.ts +205 -0
  418. package/src/activities/generateDocumentProperties.ts +120 -0
  419. package/src/activities/generateEmbeddings.ts +387 -0
  420. package/src/activities/generateOrAssignContentType.ts +218 -0
  421. package/src/activities/getObjectFromStore.ts +31 -0
  422. package/src/activities/handleError.ts +25 -0
  423. package/src/activities/index-dsl.ts +25 -0
  424. package/src/activities/index.ts +4 -0
  425. package/src/activities/media/prepareAudio.ts +334 -0
  426. package/src/activities/media/prepareVideo.ts +622 -0
  427. package/src/activities/media/processPdfWithTextract.ts +141 -0
  428. package/src/activities/media/saveGladiaTranscription.ts +128 -0
  429. package/src/activities/media/transcribeMediaWithGladia.ts +117 -0
  430. package/src/activities/notifyWebhook.test.ts +134 -0
  431. package/src/activities/notifyWebhook.ts +199 -0
  432. package/src/activities/rateLimiter.ts +41 -0
  433. package/src/activities/renditions/generateImageRendition.ts +111 -0
  434. package/src/activities/renditions/generateVideoRendition.ts +293 -0
  435. package/src/activities/setDocumentStatus.ts +25 -0
  436. package/src/conversion/TextractProcessor.ts +506 -0
  437. package/src/conversion/image.test.ts +118 -0
  438. package/src/conversion/image.ts +168 -0
  439. package/src/conversion/markitdown.ts +41 -0
  440. package/src/conversion/mutool.test.ts +74 -0
  441. package/src/conversion/mutool.ts +180 -0
  442. package/src/conversion/pandoc.test.ts +24 -0
  443. package/src/conversion/pandoc.ts +40 -0
  444. package/src/dsl/conditions.ts +76 -0
  445. package/src/dsl/dsl-workflow.test.ts +58 -0
  446. package/src/dsl/dsl-workflow.ts +397 -0
  447. package/src/dsl/dslProxyActivities.ts +38 -0
  448. package/src/dsl/ms.d.ts +11 -0
  449. package/src/dsl/projections.test.ts +159 -0
  450. package/src/dsl/projections.ts +72 -0
  451. package/src/dsl/setup/ActivityContext.ts +178 -0
  452. package/src/dsl/setup/fetch/DataProvider.ts +45 -0
  453. package/src/dsl/setup/fetch/index.ts +19 -0
  454. package/src/dsl/setup/fetch/providers.ts +67 -0
  455. package/src/dsl/test/test-child-workflow.ts +6 -0
  456. package/src/dsl/validation.test.ts +257 -0
  457. package/src/dsl/validation.ts +125 -0
  458. package/src/dsl/vars.test.ts +245 -0
  459. package/src/dsl/vars.ts +340 -0
  460. package/src/dsl/walk.test.ts +81 -0
  461. package/src/dsl/walk.ts +103 -0
  462. package/src/dsl/workflow-exec-child.test.ts +273 -0
  463. package/src/dsl/workflow-fetch.test.ts +138 -0
  464. package/src/dsl/workflow-import.test.ts +89 -0
  465. package/src/dsl/workflow.test.ts +122 -0
  466. package/src/dsl.ts +3 -0
  467. package/src/errors.ts +101 -0
  468. package/src/index.ts +41 -0
  469. package/src/iterative-generation/activities/extractToc.ts +63 -0
  470. package/src/iterative-generation/activities/finalizeOutput.ts +100 -0
  471. package/src/iterative-generation/activities/generatePart.ts +123 -0
  472. package/src/iterative-generation/activities/generateToc.ts +116 -0
  473. package/src/iterative-generation/activities/index.ts +4 -0
  474. package/src/iterative-generation/iterativeGenerationWorkflow.ts +68 -0
  475. package/src/iterative-generation/types.ts +99 -0
  476. package/src/iterative-generation/utils.ts +126 -0
  477. package/src/result-types.ts +25 -0
  478. package/src/system/notifyWebhookWorkflow.ts +70 -0
  479. package/src/system/recalculateEmbeddingsWorkflow.ts +41 -0
  480. package/src/utils/auth.ts +10 -0
  481. package/src/utils/blobs.ts +59 -0
  482. package/src/utils/chunks.ts +17 -0
  483. package/src/utils/client.ts +46 -0
  484. package/src/utils/expand-vars.ts +31 -0
  485. package/src/utils/memory.ts +61 -0
  486. package/src/utils/renditions.ts +127 -0
  487. package/src/utils/storage.ts +60 -0
  488. package/src/utils/tokens.ts +44 -0
  489. package/src/vars.ts +3 -0
  490. package/src/workflows.ts +7 -0
@@ -0,0 +1,92 @@
1
+ import { log } from "@temporalio/activity";
2
+ import { NodeStreamSource } from "@vertesia/client/node";
3
+ import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
4
+ import fs from 'fs';
5
+ import { pdfExtractPages } from "../conversion/mutool.js";
6
+ import { setupActivity } from "../dsl/setup/ActivityContext.js";
7
+ import { DocumentNotFoundError } from "../errors.js";
8
+ import { saveBlobToTempFile } from "../utils/blobs.js";
9
+
10
+ interface CreatePdfDocumentFromSourceParams {
11
+
12
+ target_object_type: string; //type of the object to create
13
+ title: string; //title of the object to create
14
+ filename?: string; //filename of the object to create
15
+ pages: number[]; //pages to extract into the new document
16
+ parent?: string; //set the new document as child of the source document
17
+
18
+ }
19
+
20
+
21
+ export interface CreatePdfDocumentFromSource extends DSLActivitySpec<CreatePdfDocumentFromSourceParams> {
22
+ name: 'createPdfDocumentFromSource';
23
+ }
24
+
25
+
26
+ /**
27
+ * Create a new PDF by extracting pages from a source PDF
28
+ * @returns
29
+ */
30
+ export async function createPdfDocumentFromSource(payload: DSLActivityExecutionPayload<CreatePdfDocumentFromSourceParams>) {
31
+ const { client, objectId, params } = await setupActivity<CreatePdfDocumentFromSourceParams>(payload);
32
+ const inputObject = await client.objects.retrieve(objectId);
33
+
34
+ const { pages, filename, title } = params;
35
+ log.info(`Creating PDF from source`, { objectId, pages, filename, title });
36
+
37
+ if (!pages || pages.length === 0) {
38
+ log.error(`No pages provided`);
39
+ throw new Error(`No pages provided`);
40
+ }
41
+
42
+ if (!inputObject) {
43
+ log.error(`Document ${objectId} not found`);
44
+ throw new DocumentNotFoundError(`Document ${objectId} not found`, [objectId]);
45
+ }
46
+
47
+ if (!inputObject.content?.source) {
48
+ log.error(`Document ${objectId} has no source`);
49
+ throw new DocumentNotFoundError(`Document ${objectId} has no source`, [objectId]);
50
+ }
51
+
52
+ if (!inputObject.content.type || (!inputObject.content.type?.startsWith('application/pdf'))) {
53
+ log.error(`Document ${objectId} is not an image`);
54
+ throw new DocumentNotFoundError(`Document ${objectId} is not an image or pdf: ${inputObject.content.type}`, [objectId]);
55
+ }
56
+
57
+ const targetType = await client.types.getTypeByName(params.target_object_type);
58
+ if (!targetType) {
59
+ log.error(`Type ${params.target_object_type} not found`);
60
+ throw new DocumentNotFoundError(`Type ${params.target_object_type} not found`);
61
+ }
62
+
63
+ const tmpFile = await saveBlobToTempFile(client, inputObject.content.source, ".pdf");
64
+ const newPdf = await pdfExtractPages(tmpFile, pages);
65
+ log.info(`PDF created from pages ${pages.join(', ')} `, { newPdf });
66
+ const name = `pages-${pages.join('-')}.pdf`;
67
+
68
+ const sourceToUpload = new NodeStreamSource(
69
+ fs.createReadStream(newPdf),
70
+ name,
71
+ "application/pdf"
72
+ )
73
+
74
+ log.info(`Uploading file ${newPdf} `);
75
+ const upload = await client.objects.upload(sourceToUpload);
76
+ log.info(`File uploaded ${upload.source} `);
77
+
78
+ const newObject = await client.objects.create({
79
+ type: targetType.id,
80
+ name: title || targetType.name,
81
+ parent: objectId,
82
+ content: {
83
+ source: upload.source,
84
+ name: upload.name,
85
+ type: 'application/pdf'
86
+ }
87
+ });
88
+
89
+ return { newObjectId: newObject.id, uploadedFile: upload.name };
90
+
91
+
92
+ }
@@ -0,0 +1,300 @@
1
+ import { CompletionResult, ModelOptions } from "@llumiverse/common";
2
+ import { activityInfo, log } from "@temporalio/activity";
3
+ import { VertesiaClient } from "@vertesia/client";
4
+ import { NodeStreamSource } from "@vertesia/client/node";
5
+ import {
6
+ DSLActivityExecutionPayload,
7
+ DSLActivitySpec,
8
+ ExecutionRun,
9
+ ExecutionRunStatus,
10
+ ExecutionRunWorkflow,
11
+ InteractionExecutionConfiguration,
12
+ RunSearchPayload,
13
+ } from "@vertesia/common";
14
+ import { projectResult } from "../dsl/projections.js";
15
+ import { setupActivity } from "../dsl/setup/ActivityContext.js";
16
+ import { ActivityParamInvalidError, ActivityParamNotFoundError, ResourceExhaustedError } from "../errors.js";
17
+ import { TruncateSpec, truncByMaxTokens } from "../utils/tokens.js";
18
+ import { Readable } from "stream";
19
+
20
+ //Example:
21
+ //@ts-ignore
22
+ const JSON: DSLActivitySpec = {
23
+ name: "executeInteraction",
24
+ import: ["defaultModel", "guidlineId", "docTypeId"],
25
+ params: {
26
+ defaultModel: "${model}",
27
+ interactionName: "GenerateSummary",
28
+ model: "${defaultModel ?? 'gpt4'}",
29
+ environment: "13456",
30
+ max_tokens: 100,
31
+ temperature: 0.5,
32
+ tags: ["test"],
33
+ result_schema: "${docType.object_schema}",
34
+ prompt_data: {
35
+ documents: "${documents}",
36
+ guidline: "${guidline.text}",
37
+ },
38
+ },
39
+ fetch: {
40
+ documents: {
41
+ type: "document",
42
+ query: {
43
+ id: { $in: "${objectIds}" },
44
+ },
45
+ select: "+text",
46
+ },
47
+ guidline: {
48
+ type: "document",
49
+ limit: 1,
50
+ query: {
51
+ id: "${guidlineId}",
52
+ },
53
+ select: "+text",
54
+ on_not_found: "throw",
55
+ },
56
+ docType: {
57
+ type: "document_type",
58
+ limit: 1,
59
+ query: {
60
+ id: "${docTypeId}",
61
+ },
62
+ select: "+object_schema",
63
+ },
64
+ },
65
+ };
66
+ export interface InteractionExecutionParams {
67
+ /**
68
+ * The environment to use. If not specified the project default environment will be used.
69
+ * If the latter is not specified an exception will be thrown.
70
+ */
71
+ environment?: string;
72
+ /**
73
+ * The model to use. If not specified the project default model will be used.
74
+ * If the latter is not specified the default model of the environment will be used.
75
+ * If the latter is not specified an exception will be thrown.
76
+ */
77
+ model?: string;
78
+
79
+ /**
80
+ * Request a JSON schema for the result
81
+ */
82
+ result_schema?: any;
83
+
84
+ /** Wether to validate the result against the schema */
85
+ validate_result?: boolean;
86
+
87
+ /**
88
+ * Tags to add to the execution run
89
+ */
90
+ tags?: string[];
91
+
92
+ /**
93
+ * Wether or not to include the previous error in the interaction prompt data
94
+ */
95
+ include_previous_error?: boolean;
96
+
97
+ /**
98
+ * Options to control generation
99
+ */
100
+ model_options?: ModelOptions;
101
+
102
+ /**
103
+ * activity won't be retried if it fails due to resource exhaustion (429)
104
+ */
105
+ exit_on_resource_exhaustion?: boolean;
106
+ }
107
+
108
+ /**
109
+ * TODO: must be kept in sync with InteractionAsyncExecutionPayload form @vertesia/common
110
+ * Also see the executeInteractionAsync endpoint on the server for how the client payload is sent to the workflow.
111
+ * (interaction is translated to interactionName)
112
+ */
113
+ export interface ExecuteInteractionParams extends InteractionExecutionParams {
114
+ //TODO rename to interaction as in InteractionAsyncExecutionPayload
115
+ interactionName: string;
116
+ prompt_data: Record<string, any>;
117
+ /**
118
+ * Additional prompt data passed by the workflow configuration. This will be merged with prompt_data if any.
119
+ * You should use `import: ["static_prompt_data"]` to import the workflow prompt data as static_prompt_data param.
120
+ * Otherwise the workflow prompt data will be ignored.
121
+ */
122
+ static_prompt_data?: Record<string, any>;
123
+ truncate?: Record<string, TruncateSpec>;
124
+ }
125
+
126
+ export interface ExecuteInteraction extends DSLActivitySpec<ExecuteInteractionParams> {
127
+ name: "executeInteraction";
128
+ }
129
+
130
+ export async function executeInteraction(payload: DSLActivityExecutionPayload<ExecuteInteractionParams>) {
131
+ const { client, params } = await setupActivity<ExecuteInteractionParams>(payload);
132
+
133
+ const { interactionName, prompt_data, static_prompt_data: wf_prompt_data } = params;
134
+ if (wf_prompt_data) {
135
+ Object.assign(prompt_data, wf_prompt_data);
136
+ }
137
+
138
+ if (!interactionName) {
139
+ log.error("Missing interactionName", { params });
140
+ throw new ActivityParamNotFoundError("interactionName", payload.activity);
141
+ }
142
+
143
+ if (params.truncate) {
144
+ const truncate = params.truncate;
145
+ for (const [key, value] of Object.entries(truncate)) {
146
+ prompt_data[key] = truncByMaxTokens(prompt_data[key], value);
147
+ }
148
+ }
149
+
150
+ try {
151
+ const res = await executeInteractionFromActivity(
152
+ client,
153
+ interactionName,
154
+ params,
155
+ prompt_data,
156
+ payload.debug_mode,
157
+ );
158
+
159
+ let completionResult: CompletionResult[] = res.result;
160
+
161
+ // Handle image uploads if the result contains base64 images
162
+ const imageResults = completionResult.filter(r => r.type === "image");
163
+ if (imageResults.length > 0) {
164
+ const uploadedImages = await Promise.all(
165
+ completionResult.map(async (item, index) => {
166
+ if (item.type === "image") {
167
+ const image = item.value;
168
+ // Extract base64 data and create buffer
169
+ const base64Data = image.replace(/^data:image\/[a-z]+;base64,/, "");
170
+ const buffer = Buffer.from(base64Data, 'base64');
171
+
172
+ // Generate filename
173
+ const { runId } = activityInfo().workflowExecution;
174
+ const { activityId } = activityInfo();
175
+ const filename = `generated-image-${runId}-${activityId}-${index}.png`;
176
+
177
+ // Create a readable stream from the buffer
178
+ const stream = Readable.from(buffer);
179
+
180
+ const source = new NodeStreamSource(
181
+ stream,
182
+ filename,
183
+ "image/png",
184
+ );
185
+
186
+ const file = await client.files.uploadFile(source);
187
+ return { type: "image", value: file } as CompletionResult;
188
+ }
189
+ return item;
190
+ })
191
+ );
192
+ completionResult = uploadedImages;
193
+ }
194
+
195
+ return projectResult(payload, params, res, {
196
+ runId: res.id,
197
+ status: res.status,
198
+ result: completionResult,
199
+ });
200
+
201
+ } catch (error: any) {
202
+ log.error(`Failed to execute interaction ${interactionName}`, { error });
203
+ if (error.statusCode === 429 && params.exit_on_resource_exhaustion) {
204
+ throw new ResourceExhaustedError(error.statusCode, "Resource exhausted - rate limit exceeded");
205
+ } else if (error.message.includes("Failed to validate merged prompt schema")) {
206
+ //issue with the input data, don't retry
207
+ throw new ActivityParamInvalidError("prompt_data", payload.activity, error.message);
208
+ } else if (error.message.includes("modelId: Path `modelId` is required")) {
209
+ //issue with the input data, don't retry
210
+ throw new ActivityParamInvalidError("model", payload.activity, error.message);
211
+ } else {
212
+ throw new Error(`Interaction Execution failed ${interactionName}: ${error.message}`);
213
+ }
214
+ }
215
+ }
216
+
217
+ export async function executeInteractionFromActivity(
218
+ client: VertesiaClient,
219
+ interactionName: string,
220
+ params: InteractionExecutionParams,
221
+ prompt_data: any,
222
+ debug?: boolean,
223
+ ) {
224
+ const userTags = params.tags;
225
+ const info = activityInfo();
226
+ const runId = info.workflowExecution.runId;
227
+ let tags = ["workflow"];
228
+ if (userTags) {
229
+ tags = tags.concat(userTags);
230
+ }
231
+ const workflow: ExecutionRunWorkflow = {
232
+ run_id: info.workflowExecution.runId,
233
+ workflow_id: info.workflowExecution.workflowId,
234
+ activity_type: info.activityType,
235
+ };
236
+
237
+ let previousStudioExecutionRun: ExecutionRun | undefined = undefined;
238
+ if (params.include_previous_error) {
239
+ //retrieve last failed run if any
240
+ if (info.attempt > 1) {
241
+ log.info("Retrying, searching for previous run", { prev_run_id: runId });
242
+ const payload: RunSearchPayload = {
243
+ query: { workflow_run_ids: [runId] },
244
+ limit: 1,
245
+ };
246
+ const previousRun = await client.runs.search(payload).then((res) => {
247
+ log.info("Search results", { results: res });
248
+ return res ? (res[0] ?? undefined) : undefined;
249
+ });
250
+
251
+ if (previousRun) {
252
+ log.info("Found previous run", { previousRun });
253
+ previousStudioExecutionRun = await client.runs.retrieve(previousRun.id);
254
+ }
255
+ }
256
+ }
257
+ if (debug && previousStudioExecutionRun?.error) {
258
+ log.info(`Found previous run error`, { error: previousStudioExecutionRun?.error });
259
+ }
260
+
261
+ const config: InteractionExecutionConfiguration = {
262
+ environment: params.environment,
263
+ model: params.model,
264
+ model_options: params.model_options,
265
+ do_validate: params.validate_result,
266
+ };
267
+ const data = {
268
+ ...prompt_data,
269
+ previous_error: previousStudioExecutionRun?.error,
270
+ };
271
+
272
+ const result_schema = params.result_schema;
273
+
274
+ log.debug(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags, workflow });
275
+
276
+ const res = await client.interactions
277
+ .executeByName(interactionName, {
278
+ config,
279
+ data,
280
+ result_schema,
281
+ tags,
282
+ stream: false,
283
+ workflow,
284
+ })
285
+ .catch((err) => {
286
+ log.error(`Error executing interaction ${interactionName}`, { err });
287
+ throw err;
288
+ });
289
+
290
+ if (debug) {
291
+ log.info(`Interaction executed ${interactionName}`, res);
292
+ }
293
+
294
+ if (res.error || res.status === ExecutionRunStatus.failed) {
295
+ log.error(`Error executing interaction ${interactionName}`, { error: res.error });
296
+ throw new Error(`Interaction Execution failed ${interactionName}: ${res.error}`);
297
+ }
298
+
299
+ return res;
300
+ }
@@ -0,0 +1,205 @@
1
+ import { log } from "@temporalio/activity";
2
+ import {
3
+ ContentObject,
4
+ CreateContentObjectPayload,
5
+ DSLActivityExecutionPayload,
6
+ DSLActivitySpec,
7
+ } from "@vertesia/common";
8
+ import { markdownWithMarkitdown } from "../conversion/markitdown.js";
9
+ import { mutoolPdfToText } from "../conversion/mutool.js";
10
+ import { markdownWithPandoc } from "../conversion/pandoc.js";
11
+ import { setupActivity } from "../dsl/setup/ActivityContext.js";
12
+ import { DocumentNotFoundError } from "../errors.js";
13
+ import { TextExtractionResult, TextExtractionStatus } from "../result-types.js";
14
+ import { fetchBlobAsBuffer, md5 } from "../utils/blobs.js";
15
+ import { countTokens } from "../utils/tokens.js";
16
+
17
+ //@ts-ignore
18
+ const JSON: DSLActivitySpec = {
19
+ name: "extractDocumentText",
20
+ };
21
+
22
+ // doesn't have any own param
23
+ export interface ExtractDocumentTextParams {}
24
+ export interface ExtractDocumentText extends DSLActivitySpec<ExtractDocumentTextParams> {
25
+ name: "extractDocumentText";
26
+ projection?: never;
27
+ }
28
+
29
+ export async function extractDocumentText(
30
+ payload: DSLActivityExecutionPayload<ExtractDocumentTextParams>,
31
+ ): Promise<TextExtractionResult> {
32
+ const { client, objectId } = await setupActivity(payload);
33
+
34
+ const r = await client.objects.find({
35
+ query: { _id: objectId },
36
+ limit: 1,
37
+ select: "+text",
38
+ });
39
+ const doc = r[0] as ContentObject;
40
+ if (!doc) {
41
+ log.error(`Document ${objectId} not found`);
42
+ throw new DocumentNotFoundError(`Document ${objectId} not found`, payload.objectIds);
43
+ }
44
+
45
+ log.info(`Extracting text for object ${doc.id}`);
46
+
47
+ if (!doc.content?.type || !doc.content?.source) {
48
+ if (doc.text) {
49
+ return createResponse(doc, doc.text, TextExtractionStatus.skipped, "Text present and no source or type");
50
+ } else {
51
+ return createResponse(doc, "", TextExtractionStatus.error, "No source or type found");
52
+ }
53
+ }
54
+
55
+ //skip if text already extracted and proper etag
56
+ if (doc.text && doc.text.length > 0 && doc.text_etag === doc.content.etag) {
57
+ return createResponse(doc, doc.text, TextExtractionStatus.skipped, "Text already extracted");
58
+ }
59
+
60
+ let fileBuffer: Buffer;
61
+ try {
62
+ fileBuffer = await fetchBlobAsBuffer(client, doc.content.source);
63
+ } catch (e: any) {
64
+ log.error(`Error reading file: ${e}`);
65
+ return createResponse(doc, "", TextExtractionStatus.error, e.message);
66
+ }
67
+
68
+ let txt: string;
69
+
70
+ switch (doc.content.type) {
71
+ case "application/pdf":
72
+ txt = await mutoolPdfToText(fileBuffer);
73
+ break;
74
+
75
+ case "text/plain":
76
+ txt = fileBuffer.toString("utf8");
77
+ break;
78
+
79
+ //docx
80
+ case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
81
+ txt = await markdownWithMarkitdown(fileBuffer, "docx");
82
+ break;
83
+
84
+ //pptx
85
+ case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
86
+ txt = await markdownWithMarkitdown(fileBuffer, "pptx");
87
+ break;
88
+
89
+ //html
90
+ case "text/html":
91
+ txt = await markdownWithPandoc(fileBuffer, "html");
92
+ break;
93
+
94
+ //opendocument
95
+ case "application/vnd.oasis.opendocument.text":
96
+ txt = await markdownWithPandoc(fileBuffer, "odt");
97
+ break;
98
+
99
+ //rtf
100
+ case "application/rtf":
101
+ txt = await markdownWithPandoc(fileBuffer, "rtf");
102
+ break;
103
+
104
+ //markdown
105
+ case "text/markdown":
106
+ txt = fileBuffer.toString("utf8");
107
+ break;
108
+
109
+ //csv
110
+ case "text/csv":
111
+ txt = fileBuffer.toString("utf8");
112
+ break;
113
+
114
+ //typescript
115
+ case "application/typescript":
116
+ txt = fileBuffer.toString("utf8");
117
+ break;
118
+
119
+ //javascript
120
+ case "application/javascript":
121
+ txt = fileBuffer.toString("utf8");
122
+ break;
123
+
124
+ //json
125
+ case "application/json":
126
+ txt = fileBuffer.toString("utf8");
127
+ break;
128
+
129
+ default:
130
+ if (sniffIfText(fileBuffer)) {
131
+ txt = fileBuffer.toString("utf8"); //TODO: add charset detection
132
+ break;
133
+ }
134
+ return createResponse(
135
+ doc,
136
+ doc.text ?? "",
137
+ TextExtractionStatus.skipped,
138
+ `Unsupported mime type: ${doc.content.type}`,
139
+ );
140
+ }
141
+
142
+ const tokensData = countTokens(txt);
143
+ const etag = doc.content.etag ?? md5(txt);
144
+
145
+ const updateData: CreateContentObjectPayload = {
146
+ text: txt,
147
+ text_etag: etag,
148
+ tokens: {
149
+ ...tokensData,
150
+ etag: etag,
151
+ },
152
+ };
153
+
154
+ await client.objects.update(doc.id, updateData);
155
+
156
+ return createResponse(doc, txt, TextExtractionStatus.success);
157
+ }
158
+
159
+ function createResponse(
160
+ doc: ContentObject,
161
+ text: string,
162
+ status: TextExtractionStatus,
163
+ message?: string,
164
+ ): TextExtractionResult {
165
+ return {
166
+ status,
167
+ message,
168
+ tokens: doc.tokens,
169
+ len: text.length,
170
+ objectId: doc.id,
171
+ hasText: !!text,
172
+ };
173
+ }
174
+
175
+ function sniffIfText(buf: Buffer) {
176
+ // If file is too large, don't even try
177
+ if (buf.length > 500 * 1024) {
178
+ return false;
179
+ }
180
+
181
+ // Count binary/control characters
182
+ let binaryCount = 0;
183
+ const sampleSize = Math.min(buf.length, 1000); // Check first 1000 bytes
184
+
185
+ for (let i = 0; i < sampleSize; i++) {
186
+ // Count control characters (except common whitespace)
187
+ const byte = buf[i];
188
+ if ((byte < 32 && ![9, 10, 13].includes(byte)) || byte === 0) {
189
+ binaryCount++;
190
+ }
191
+ }
192
+
193
+ // If more than 10% binary/control chars, probably not text
194
+ if (binaryCount / sampleSize > 0.1) {
195
+ return false;
196
+ }
197
+
198
+ // Additional check for valid UTF-8 encoding
199
+ try {
200
+ const s = buf.toString("utf8");
201
+ return s.length > 0 && !s.includes("\uFFFD"); // Replacement character
202
+ } catch (e) {
203
+ return false;
204
+ }
205
+ }