@vertesia/workflow 0.82.0 → 0.82.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +33 -0
  2. package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
  3. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +73 -0
  4. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
  5. package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +19 -0
  6. package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
  7. package/lib/cjs/activities/chunkDocument.js +85 -0
  8. package/lib/cjs/activities/chunkDocument.js.map +1 -0
  9. package/lib/cjs/activities/createDocumentFromOther.js +64 -0
  10. package/lib/cjs/activities/createDocumentFromOther.js.map +1 -0
  11. package/lib/cjs/activities/executeInteraction.js +194 -0
  12. package/lib/cjs/activities/executeInteraction.js.map +1 -0
  13. package/lib/cjs/activities/extractDocumentText.js +156 -0
  14. package/lib/cjs/activities/extractDocumentText.js.map +1 -0
  15. package/lib/cjs/activities/generateDocumentProperties.js +83 -0
  16. package/lib/cjs/activities/generateDocumentProperties.js.map +1 -0
  17. package/lib/cjs/activities/generateEmbeddings.js +228 -0
  18. package/lib/cjs/activities/generateEmbeddings.js.map +1 -0
  19. package/lib/cjs/activities/generateOrAssignContentType.js +125 -0
  20. package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -0
  21. package/lib/cjs/activities/getObjectFromStore.js +20 -0
  22. package/lib/cjs/activities/getObjectFromStore.js.map +1 -0
  23. package/lib/cjs/activities/handleError.js +22 -0
  24. package/lib/cjs/activities/handleError.js.map +1 -0
  25. package/lib/cjs/activities/index-dsl.js +47 -0
  26. package/lib/cjs/activities/index-dsl.js.map +1 -0
  27. package/lib/cjs/activities/index.js +21 -0
  28. package/lib/cjs/activities/index.js.map +1 -0
  29. package/lib/cjs/activities/media/prepareVideo.js +429 -0
  30. package/lib/cjs/activities/media/prepareVideo.js.map +1 -0
  31. package/lib/cjs/activities/media/processPdfWithTextract.js +103 -0
  32. package/lib/cjs/activities/media/processPdfWithTextract.js.map +1 -0
  33. package/lib/cjs/activities/media/saveGladiaTranscription.js +81 -0
  34. package/lib/cjs/activities/media/saveGladiaTranscription.js.map +1 -0
  35. package/lib/cjs/activities/media/transcribeMediaWithGladia.js +82 -0
  36. package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +1 -0
  37. package/lib/cjs/activities/notifyWebhook.js +167 -0
  38. package/lib/cjs/activities/notifyWebhook.js.map +1 -0
  39. package/lib/cjs/activities/rateLimiter.js +30 -0
  40. package/lib/cjs/activities/rateLimiter.js.map +1 -0
  41. package/lib/cjs/activities/renditions/generateImageRendition.js +66 -0
  42. package/lib/cjs/activities/renditions/generateImageRendition.js.map +1 -0
  43. package/lib/cjs/activities/renditions/generateVideoRendition.js +200 -0
  44. package/lib/cjs/activities/renditions/generateVideoRendition.js.map +1 -0
  45. package/lib/cjs/activities/setDocumentStatus.js +15 -0
  46. package/lib/cjs/activities/setDocumentStatus.js.map +1 -0
  47. package/lib/cjs/conversion/TextractProcessor.js +417 -0
  48. package/lib/cjs/conversion/TextractProcessor.js.map +1 -0
  49. package/lib/cjs/conversion/image.js +149 -0
  50. package/lib/cjs/conversion/image.js.map +1 -0
  51. package/lib/cjs/conversion/markitdown.js +42 -0
  52. package/lib/cjs/conversion/markitdown.js.map +1 -0
  53. package/lib/cjs/conversion/mutool.js +147 -0
  54. package/lib/cjs/conversion/mutool.js.map +1 -0
  55. package/lib/cjs/conversion/pandoc.js +39 -0
  56. package/lib/cjs/conversion/pandoc.js.map +1 -0
  57. package/lib/cjs/dsl/conditions.js +81 -0
  58. package/lib/cjs/dsl/conditions.js.map +1 -0
  59. package/lib/cjs/dsl/dsl-workflow.js +343 -0
  60. package/lib/cjs/dsl/dsl-workflow.js.map +1 -0
  61. package/lib/cjs/dsl/dslProxyActivities.js +23 -0
  62. package/lib/cjs/dsl/dslProxyActivities.js.map +1 -0
  63. package/lib/cjs/dsl/projections.js +59 -0
  64. package/lib/cjs/dsl/projections.js.map +1 -0
  65. package/lib/cjs/dsl/setup/ActivityContext.js +122 -0
  66. package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -0
  67. package/lib/cjs/dsl/setup/fetch/DataProvider.js +51 -0
  68. package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +1 -0
  69. package/lib/cjs/dsl/setup/fetch/index.js +16 -0
  70. package/lib/cjs/dsl/setup/fetch/index.js.map +1 -0
  71. package/lib/cjs/dsl/setup/fetch/providers.js +67 -0
  72. package/lib/cjs/dsl/setup/fetch/providers.js.map +1 -0
  73. package/lib/cjs/dsl/test/test-child-workflow.js +10 -0
  74. package/lib/cjs/dsl/test/test-child-workflow.js.map +1 -0
  75. package/lib/cjs/dsl/validation.js +122 -0
  76. package/lib/cjs/dsl/validation.js.map +1 -0
  77. package/lib/cjs/dsl/vars.js +341 -0
  78. package/lib/cjs/dsl/vars.js.map +1 -0
  79. package/lib/cjs/dsl/walk.js +100 -0
  80. package/lib/cjs/dsl/walk.js.map +1 -0
  81. package/lib/cjs/dsl.js +20 -0
  82. package/lib/cjs/dsl.js.map +1 -0
  83. package/lib/cjs/errors.js +70 -0
  84. package/lib/cjs/errors.js.map +1 -0
  85. package/lib/cjs/index.js +55 -0
  86. package/lib/cjs/index.js.map +1 -0
  87. package/lib/cjs/iterative-generation/activities/extractToc.js +47 -0
  88. package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -0
  89. package/lib/cjs/iterative-generation/activities/finalizeOutput.js +72 -0
  90. package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -0
  91. package/lib/cjs/iterative-generation/activities/generatePart.js +78 -0
  92. package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -0
  93. package/lib/cjs/iterative-generation/activities/generateToc.js +86 -0
  94. package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -0
  95. package/lib/cjs/iterative-generation/activities/index.js +12 -0
  96. package/lib/cjs/iterative-generation/activities/index.js.map +1 -0
  97. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +56 -0
  98. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
  99. package/lib/cjs/iterative-generation/types.js +5 -0
  100. package/lib/cjs/iterative-generation/types.js.map +1 -0
  101. package/lib/cjs/iterative-generation/utils.js +121 -0
  102. package/lib/cjs/iterative-generation/utils.js.map +1 -0
  103. package/lib/cjs/package.json +3 -0
  104. package/lib/cjs/result-types.js +10 -0
  105. package/lib/cjs/result-types.js.map +1 -0
  106. package/lib/cjs/system/notifyWebhookWorkflow.js +53 -0
  107. package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -0
  108. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +33 -0
  109. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -0
  110. package/lib/cjs/utils/auth.js +15 -0
  111. package/lib/cjs/utils/auth.js.map +1 -0
  112. package/lib/cjs/utils/blobs.js +64 -0
  113. package/lib/cjs/utils/blobs.js.map +1 -0
  114. package/lib/cjs/utils/chunks.js +14 -0
  115. package/lib/cjs/utils/chunks.js.map +1 -0
  116. package/lib/cjs/utils/client.js +31 -0
  117. package/lib/cjs/utils/client.js.map +1 -0
  118. package/lib/cjs/utils/expand-vars.js +33 -0
  119. package/lib/cjs/utils/expand-vars.js.map +1 -0
  120. package/lib/cjs/utils/memory.js +65 -0
  121. package/lib/cjs/utils/memory.js.map +1 -0
  122. package/lib/cjs/utils/renditions.js +88 -0
  123. package/lib/cjs/utils/renditions.js.map +1 -0
  124. package/lib/cjs/utils/storage.js +54 -0
  125. package/lib/cjs/utils/storage.js.map +1 -0
  126. package/lib/cjs/utils/tokens.js +38 -0
  127. package/lib/cjs/utils/tokens.js.map +1 -0
  128. package/lib/cjs/vars.js +20 -0
  129. package/lib/cjs/vars.js.map +1 -0
  130. package/lib/cjs/workflows.js +15 -0
  131. package/lib/cjs/workflows.js.map +1 -0
  132. package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +30 -0
  133. package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +1 -0
  134. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +70 -0
  135. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -0
  136. package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +16 -0
  137. package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +1 -0
  138. package/lib/esm/activities/chunkDocument.js +82 -0
  139. package/lib/esm/activities/chunkDocument.js.map +1 -0
  140. package/lib/esm/activities/createDocumentFromOther.js +58 -0
  141. package/lib/esm/activities/createDocumentFromOther.js.map +1 -0
  142. package/lib/esm/activities/executeInteraction.js +190 -0
  143. package/lib/esm/activities/executeInteraction.js.map +1 -0
  144. package/lib/esm/activities/extractDocumentText.js +153 -0
  145. package/lib/esm/activities/extractDocumentText.js.map +1 -0
  146. package/lib/esm/activities/generateDocumentProperties.js +80 -0
  147. package/lib/esm/activities/generateDocumentProperties.js.map +1 -0
  148. package/lib/esm/activities/generateEmbeddings.js +225 -0
  149. package/lib/esm/activities/generateEmbeddings.js.map +1 -0
  150. package/lib/esm/activities/generateOrAssignContentType.js +122 -0
  151. package/lib/esm/activities/generateOrAssignContentType.js.map +1 -0
  152. package/lib/esm/activities/getObjectFromStore.js +17 -0
  153. package/lib/esm/activities/getObjectFromStore.js.map +1 -0
  154. package/lib/esm/activities/handleError.js +19 -0
  155. package/lib/esm/activities/handleError.js.map +1 -0
  156. package/lib/esm/activities/index-dsl.js +23 -0
  157. package/lib/esm/activities/index-dsl.js.map +1 -0
  158. package/lib/esm/activities/index.js +5 -0
  159. package/lib/esm/activities/index.js.map +1 -0
  160. package/lib/esm/activities/media/prepareVideo.js +390 -0
  161. package/lib/esm/activities/media/prepareVideo.js.map +1 -0
  162. package/lib/esm/activities/media/processPdfWithTextract.js +99 -0
  163. package/lib/esm/activities/media/processPdfWithTextract.js.map +1 -0
  164. package/lib/esm/activities/media/saveGladiaTranscription.js +78 -0
  165. package/lib/esm/activities/media/saveGladiaTranscription.js.map +1 -0
  166. package/lib/esm/activities/media/transcribeMediaWithGladia.js +79 -0
  167. package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +1 -0
  168. package/lib/esm/activities/notifyWebhook.js +164 -0
  169. package/lib/esm/activities/notifyWebhook.js.map +1 -0
  170. package/lib/esm/activities/rateLimiter.js +27 -0
  171. package/lib/esm/activities/rateLimiter.js.map +1 -0
  172. package/lib/esm/activities/renditions/generateImageRendition.js +63 -0
  173. package/lib/esm/activities/renditions/generateImageRendition.js.map +1 -0
  174. package/lib/esm/activities/renditions/generateVideoRendition.js +194 -0
  175. package/lib/esm/activities/renditions/generateVideoRendition.js.map +1 -0
  176. package/lib/esm/activities/setDocumentStatus.js +12 -0
  177. package/lib/esm/activities/setDocumentStatus.js.map +1 -0
  178. package/lib/esm/conversion/TextractProcessor.js +410 -0
  179. package/lib/esm/conversion/TextractProcessor.js.map +1 -0
  180. package/lib/esm/conversion/image.js +143 -0
  181. package/lib/esm/conversion/image.js.map +1 -0
  182. package/lib/esm/conversion/markitdown.js +36 -0
  183. package/lib/esm/conversion/markitdown.js.map +1 -0
  184. package/lib/esm/conversion/mutool.js +139 -0
  185. package/lib/esm/conversion/mutool.js.map +1 -0
  186. package/lib/esm/conversion/pandoc.js +36 -0
  187. package/lib/esm/conversion/pandoc.js.map +1 -0
  188. package/lib/esm/dsl/conditions.js +75 -0
  189. package/lib/esm/dsl/conditions.js.map +1 -0
  190. package/lib/esm/dsl/dsl-workflow.js +336 -0
  191. package/lib/esm/dsl/dsl-workflow.js.map +1 -0
  192. package/lib/esm/dsl/dslProxyActivities.js +20 -0
  193. package/lib/esm/dsl/dslProxyActivities.js.map +1 -0
  194. package/lib/esm/dsl/projections.js +55 -0
  195. package/lib/esm/dsl/projections.js.map +1 -0
  196. package/lib/esm/dsl/setup/ActivityContext.js +117 -0
  197. package/lib/esm/dsl/setup/ActivityContext.js.map +1 -0
  198. package/lib/esm/dsl/setup/fetch/DataProvider.js +47 -0
  199. package/lib/esm/dsl/setup/fetch/DataProvider.js.map +1 -0
  200. package/lib/esm/dsl/setup/fetch/index.js +12 -0
  201. package/lib/esm/dsl/setup/fetch/index.js.map +1 -0
  202. package/lib/esm/dsl/setup/fetch/providers.js +61 -0
  203. package/lib/esm/dsl/setup/fetch/providers.js.map +1 -0
  204. package/lib/esm/dsl/test/test-child-workflow.js +5 -0
  205. package/lib/esm/dsl/test/test-child-workflow.js.map +1 -0
  206. package/lib/esm/dsl/validation.js +118 -0
  207. package/lib/esm/dsl/validation.js.map +1 -0
  208. package/lib/esm/dsl/vars.js +335 -0
  209. package/lib/esm/dsl/vars.js.map +1 -0
  210. package/lib/esm/dsl/walk.js +96 -0
  211. package/lib/esm/dsl/walk.js.map +1 -0
  212. package/lib/esm/dsl.js +4 -0
  213. package/lib/esm/dsl.js.map +1 -0
  214. package/lib/esm/errors.js +61 -0
  215. package/lib/esm/errors.js.map +1 -0
  216. package/lib/esm/index.js +37 -0
  217. package/lib/esm/index.js.map +1 -0
  218. package/lib/esm/iterative-generation/activities/extractToc.js +44 -0
  219. package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -0
  220. package/lib/esm/iterative-generation/activities/finalizeOutput.js +69 -0
  221. package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -0
  222. package/lib/esm/iterative-generation/activities/generatePart.js +75 -0
  223. package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -0
  224. package/lib/esm/iterative-generation/activities/generateToc.js +83 -0
  225. package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -0
  226. package/lib/esm/iterative-generation/activities/index.js +5 -0
  227. package/lib/esm/iterative-generation/activities/index.js.map +1 -0
  228. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +53 -0
  229. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -0
  230. package/lib/esm/iterative-generation/types.js +2 -0
  231. package/lib/esm/iterative-generation/types.js.map +1 -0
  232. package/lib/esm/iterative-generation/utils.js +112 -0
  233. package/lib/esm/iterative-generation/utils.js.map +1 -0
  234. package/lib/esm/result-types.js +7 -0
  235. package/lib/esm/result-types.js.map +1 -0
  236. package/lib/esm/system/notifyWebhookWorkflow.js +50 -0
  237. package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -0
  238. package/lib/esm/system/recalculateEmbeddingsWorkflow.js +30 -0
  239. package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -0
  240. package/lib/esm/utils/auth.js +8 -0
  241. package/lib/esm/utils/auth.js.map +1 -0
  242. package/lib/esm/utils/blobs.js +54 -0
  243. package/lib/esm/utils/blobs.js.map +1 -0
  244. package/lib/esm/utils/chunks.js +9 -0
  245. package/lib/esm/utils/chunks.js.map +1 -0
  246. package/lib/esm/utils/client.js +27 -0
  247. package/lib/esm/utils/client.js.map +1 -0
  248. package/lib/esm/utils/expand-vars.js +30 -0
  249. package/lib/esm/utils/expand-vars.js.map +1 -0
  250. package/lib/esm/utils/memory.js +55 -0
  251. package/lib/esm/utils/memory.js.map +1 -0
  252. package/lib/esm/utils/renditions.js +80 -0
  253. package/lib/esm/utils/renditions.js.map +1 -0
  254. package/lib/esm/utils/storage.js +45 -0
  255. package/lib/esm/utils/storage.js.map +1 -0
  256. package/lib/esm/utils/tokens.js +34 -0
  257. package/lib/esm/utils/tokens.js.map +1 -0
  258. package/lib/esm/vars.js +4 -0
  259. package/lib/esm/vars.js.map +1 -0
  260. package/lib/esm/workflows.js +8 -0
  261. package/lib/esm/workflows.js.map +1 -0
  262. package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +17 -0
  263. package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +1 -0
  264. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +39 -0
  265. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -0
  266. package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +19 -0
  267. package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +1 -0
  268. package/lib/types/activities/chunkDocument.d.ts +33 -0
  269. package/lib/types/activities/chunkDocument.d.ts.map +1 -0
  270. package/lib/types/activities/createDocumentFromOther.d.ts +21 -0
  271. package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -0
  272. package/lib/types/activities/executeInteraction.d.ts +61 -0
  273. package/lib/types/activities/executeInteraction.d.ts.map +1 -0
  274. package/lib/types/activities/extractDocumentText.d.ts +10 -0
  275. package/lib/types/activities/extractDocumentText.d.ts.map +1 -0
  276. package/lib/types/activities/generateDocumentProperties.d.ts +32 -0
  277. package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -0
  278. package/lib/types/activities/generateEmbeddings.d.ts +53 -0
  279. package/lib/types/activities/generateEmbeddings.d.ts.map +1 -0
  280. package/lib/types/activities/generateOrAssignContentType.d.ts +44 -0
  281. package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -0
  282. package/lib/types/activities/getObjectFromStore.d.ts +14 -0
  283. package/lib/types/activities/getObjectFromStore.d.ts.map +1 -0
  284. package/lib/types/activities/handleError.d.ts +6 -0
  285. package/lib/types/activities/handleError.d.ts.map +1 -0
  286. package/lib/types/activities/index-dsl.d.ts +23 -0
  287. package/lib/types/activities/index-dsl.d.ts.map +1 -0
  288. package/lib/types/activities/index.d.ts +5 -0
  289. package/lib/types/activities/index.d.ts.map +1 -0
  290. package/lib/types/activities/media/prepareVideo.d.ts +30 -0
  291. package/lib/types/activities/media/prepareVideo.d.ts.map +1 -0
  292. package/lib/types/activities/media/processPdfWithTextract.d.ts +26 -0
  293. package/lib/types/activities/media/processPdfWithTextract.d.ts.map +1 -0
  294. package/lib/types/activities/media/saveGladiaTranscription.d.ts +14 -0
  295. package/lib/types/activities/media/saveGladiaTranscription.d.ts.map +1 -0
  296. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +19 -0
  297. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +1 -0
  298. package/lib/types/activities/notifyWebhook.d.ts +27 -0
  299. package/lib/types/activities/notifyWebhook.d.ts.map +1 -0
  300. package/lib/types/activities/rateLimiter.d.ts +11 -0
  301. package/lib/types/activities/rateLimiter.d.ts.map +1 -0
  302. package/lib/types/activities/renditions/generateImageRendition.d.ts +14 -0
  303. package/lib/types/activities/renditions/generateImageRendition.d.ts.map +1 -0
  304. package/lib/types/activities/renditions/generateVideoRendition.d.ts +15 -0
  305. package/lib/types/activities/renditions/generateVideoRendition.d.ts.map +1 -0
  306. package/lib/types/activities/setDocumentStatus.d.ts +15 -0
  307. package/lib/types/activities/setDocumentStatus.d.ts.map +1 -0
  308. package/lib/types/conversion/TextractProcessor.d.ts +45 -0
  309. package/lib/types/conversion/TextractProcessor.d.ts.map +1 -0
  310. package/lib/types/conversion/image.d.ts +13 -0
  311. package/lib/types/conversion/image.d.ts.map +1 -0
  312. package/lib/types/conversion/markitdown.d.ts +2 -0
  313. package/lib/types/conversion/markitdown.d.ts.map +1 -0
  314. package/lib/types/conversion/mutool.d.ts +19 -0
  315. package/lib/types/conversion/mutool.d.ts.map +1 -0
  316. package/lib/types/conversion/pandoc.d.ts +2 -0
  317. package/lib/types/conversion/pandoc.d.ts.map +1 -0
  318. package/lib/types/dsl/conditions.d.ts +2 -0
  319. package/lib/types/dsl/conditions.d.ts.map +1 -0
  320. package/lib/types/dsl/dsl-workflow.d.ts +5 -0
  321. package/lib/types/dsl/dsl-workflow.d.ts.map +1 -0
  322. package/lib/types/dsl/dslProxyActivities.d.ts +10 -0
  323. package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -0
  324. package/lib/types/dsl/projections.d.ts +4 -0
  325. package/lib/types/dsl/projections.d.ts.map +1 -0
  326. package/lib/types/dsl/setup/ActivityContext.d.ts +17 -0
  327. package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -0
  328. package/lib/types/dsl/setup/fetch/DataProvider.d.ts +9 -0
  329. package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +1 -0
  330. package/lib/types/dsl/setup/fetch/index.d.ts +6 -0
  331. package/lib/types/dsl/setup/fetch/index.d.ts.map +1 -0
  332. package/lib/types/dsl/setup/fetch/providers.d.ts +25 -0
  333. package/lib/types/dsl/setup/fetch/providers.d.ts.map +1 -0
  334. package/lib/types/dsl/test/test-child-workflow.d.ts +4 -0
  335. package/lib/types/dsl/test/test-child-workflow.d.ts.map +1 -0
  336. package/lib/types/dsl/validation.d.ts +4 -0
  337. package/lib/types/dsl/validation.d.ts.map +1 -0
  338. package/lib/types/dsl/vars.d.ts +48 -0
  339. package/lib/types/dsl/vars.d.ts.map +1 -0
  340. package/lib/types/dsl/walk.d.ts +18 -0
  341. package/lib/types/dsl/walk.d.ts.map +1 -0
  342. package/lib/types/dsl.d.ts +4 -0
  343. package/lib/types/dsl.d.ts.map +1 -0
  344. package/lib/types/errors.d.ts +33 -0
  345. package/lib/types/errors.d.ts.map +1 -0
  346. package/lib/types/index.d.ts +36 -0
  347. package/lib/types/index.d.ts.map +1 -0
  348. package/lib/types/iterative-generation/activities/extractToc.d.ts +10 -0
  349. package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -0
  350. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +3 -0
  351. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -0
  352. package/lib/types/iterative-generation/activities/generatePart.d.ts +3 -0
  353. package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -0
  354. package/lib/types/iterative-generation/activities/generateToc.d.ts +4 -0
  355. package/lib/types/iterative-generation/activities/generateToc.d.ts.map +1 -0
  356. package/lib/types/iterative-generation/activities/index.d.ts +5 -0
  357. package/lib/types/iterative-generation/activities/index.d.ts.map +1 -0
  358. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +3 -0
  359. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -0
  360. package/lib/types/iterative-generation/types.d.ts +79 -0
  361. package/lib/types/iterative-generation/types.d.ts.map +1 -0
  362. package/lib/types/iterative-generation/utils.d.ts +26 -0
  363. package/lib/types/iterative-generation/utils.d.ts.map +1 -0
  364. package/lib/types/result-types.d.ts +22 -0
  365. package/lib/types/result-types.d.ts.map +1 -0
  366. package/lib/types/system/notifyWebhookWorkflow.d.ts +8 -0
  367. package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -0
  368. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +25 -0
  369. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -0
  370. package/lib/types/utils/auth.d.ts +4 -0
  371. package/lib/types/utils/auth.d.ts.map +1 -0
  372. package/lib/types/utils/blobs.d.ts +7 -0
  373. package/lib/types/utils/blobs.d.ts.map +1 -0
  374. package/lib/types/utils/chunks.d.ts +9 -0
  375. package/lib/types/utils/chunks.d.ts.map +1 -0
  376. package/lib/types/utils/client.d.ts +8 -0
  377. package/lib/types/utils/client.d.ts.map +1 -0
  378. package/lib/types/utils/expand-vars.d.ts +8 -0
  379. package/lib/types/utils/expand-vars.d.ts.map +1 -0
  380. package/lib/types/utils/memory.d.ts +8 -0
  381. package/lib/types/utils/memory.d.ts.map +1 -0
  382. package/lib/types/utils/renditions.d.ts +23 -0
  383. package/lib/types/utils/renditions.d.ts.map +1 -0
  384. package/lib/types/utils/storage.d.ts +16 -0
  385. package/lib/types/utils/storage.d.ts.map +1 -0
  386. package/lib/types/utils/tokens.d.ts +11 -0
  387. package/lib/types/utils/tokens.d.ts.map +1 -0
  388. package/lib/types/vars.d.ts +3 -0
  389. package/lib/types/vars.d.ts.map +1 -0
  390. package/lib/types/workflows.d.ts +8 -0
  391. package/lib/types/workflows.d.ts.map +1 -0
  392. package/lib/workflows-bundle.js +16226 -0
  393. package/package.json +6 -6
@@ -0,0 +1 @@
1
+ {"version":3,"file":"executeInteraction.js","sourceRoot":"","sources":["../../../src/activities/executeInteraction.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAEzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAIH,kBAAkB,GAIrB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,yBAAyB,EAAE,0BAA0B,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAC7G,OAAO,EAAgB,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAElC,UAAU;AACV,YAAY;AACZ,MAAM,IAAI,GAAoB;IAC1B,IAAI,EAAE,oBAAoB;IAC1B,MAAM,EAAE,CAAC,cAAc,EAAE,YAAY,EAAE,WAAW,CAAC;IACnD,MAAM,EAAE;QACJ,YAAY,EAAE,UAAU;QACxB,eAAe,EAAE,iBAAiB;QAClC,KAAK,EAAE,2BAA2B;QAClC,WAAW,EAAE,OAAO;QACpB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,GAAG;QAChB,IAAI,EAAE,CAAC,MAAM,CAAC;QACd,aAAa,EAAE,0BAA0B;QACzC,WAAW,EAAE;YACT,SAAS,EAAE,cAAc;YACzB,QAAQ,EAAE,kBAAkB;SAC/B;KACJ;IACD,KAAK,EAAE;QACH,SAAS,EAAE;YACP,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE;gBACH,EAAE,EAAE,EAAE,GAAG,EAAE,cAAc,EAAE;aAC9B;YACD,MAAM,EAAE,OAAO;SAClB;QACD,QAAQ,EAAE;YACN,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,CAAC;YACR,KAAK,EAAE;gBACH,EAAE,EAAE,eAAe;aACtB;YACD,MAAM,EAAE,OAAO;YACf,YAAY,EAAE,OAAO;SACxB;QACD,OAAO,EAAE;YACL,IAAI,EAAE,eAAe;YACrB,KAAK,EAAE,CAAC;YACR,KAAK,EAAE;gBACH,EAAE,EAAE,cAAc;aACrB;YACD,MAAM,EAAE,gBAAgB;SAC3B;KACJ;CACJ,CAAC;AAiEF,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,OAA8D;IACnG,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAA2B,OAAO,CAAC,CAAC;IAElF,MAAM,EAAE,eAAe,EAAE,WAAW,EAAE,kBAAkB,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC;IACpF,IAAI,cAAc,EAAE,CAAC;QACjB,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAC/C,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACnB,GAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;QACjD,MAAM,IAAI,0BAA0B,CAAC,iBAAiB,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9E,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QAClB,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QACjC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClD,WAAW,CAAC,GAAG,CAAC,GAAG,gBAAgB,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC;QACjE,CAAC;IACL,CAAC;IAED,IAAI,CAAC;QACD,MAAM,GAAG,GAAG,MAAM,8BAA8B,CAC5C,MAAM,EACN,eAAe,EACf,MAAM,EACN,WAAW,EACX,OAAO,CAAC,UAAU,CACrB,CAAC;QAEF,IAAI,gBAAgB,GAAuB,GAAG,CAAC,MAAM,CAAC;QAEtD,4DAA4D;QAC5D,MAAM,YAAY,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC;QACtE,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,gBAAgB,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE;gBACvC,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;oBACxB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;oBACzB,wCAAwC;oBACxC,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC,CAAC;oBACpE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;oBAEjD,oBAAoB;oBACpB,MAAM,EAAE,KAAK,EAAE,GAAG,YAAY,EAAE,CAAC,iBAAiB,CAAC;oBACnD,MAAM,EAAE,UAAU,EAAE,GAAG,YAAY,EAAE,CAAC;oBACtC,MAAM,QAAQ,GAAG,mBAAmB,KAAK,IAAI,UAAU,IAAI,KAAK,MAAM,CAAC;oBAEvE,2CAA2C;oBAC3C,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;oBAErC,MAAM,MAAM,GAAG,IAAI,gBAAgB,CAC/B,MAAM,EACN,QAAQ,EACR,WAAW,CACd,CAAC;oBAEF,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;oBACnD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAsB,CAAC;gBAC9D,CAAC;gBACD,OAAO,IAAI,CAAC;YAChB,CAAC,CAAC,CACL,CAAC;YACF,gBAAgB,GAAG,cAAc,CAAC;QACtC,CAAC;QAED,OAAO,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE;YACvC,KAAK,EAAE,GAAG,CAAC,EAAE;YACb,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,MAAM,EAAE,gBAAgB;SAC3B,CAAC,CAAC;IAEP,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QAClB,GAAG,CAAC,KAAK,CAAC,iCAAiC,eAAe,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QACzE,IAAI,KAAK,CAAC,UAAU,KAAK,GAAG,IAAI,MAAM,CAAC,2BAA2B,EAAE,CAAC;YACjE,MAAM,IAAI,sBAAsB,CAAC,KAAK,CAAC,UAAU,EAAE,0CAA0C,CAAC,CAAC;QACnG,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,yCAAyC,CAAC,EAAE,CAAC;YAC3E,wCAAwC;YACxC,MAAM,IAAI,yBAAyB,CAAC,aAAa,EAAE,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QACxF,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,qCAAqC,CAAC,EAAE,CAAC;YACvE,wCAAwC;YACxC,MAAM,IAAI,yBAAyB,CAAC,OAAO,EAAE,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QAClF,CAAC;aAAM,CAAC;YACJ,MAAM,IAAI,KAAK,CAAC,gCAAgC,eAAe,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACzF,CAAC;IACL,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAChD,MAAsB,EACtB,eAAuB,EACvB,MAAkC,EAClC,WAAgB,EAChB,KAAe;IAEf,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC;IAC7B,MAAM,IAAI,GAAG,YAAY,EAAE,CAAC;IAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC;IAC3C,IAAI,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;IACxB,IAAI,QAAQ,EAAE,CAAC;QACX,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;IACD,MAAM,QAAQ,GAAyB;QACnC,MAAM,EAAE,IAAI,CAAC,iBAAiB,CAAC,KAAK;QACpC,WAAW,EAAE,IAAI,CAAC,iBAAiB,CAAC,UAAU;QAC9C,aAAa,EAAE,IAAI,CAAC,YAAY;KACnC,CAAC;IAEF,IAAI,0BAA0B,GAA6B,SAAS,CAAC;IACrE,IAAI,MAAM,CAAC,sBAAsB,EAAE,CAAC;QAChC,iCAAiC;QACjC,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACnB,GAAG,CAAC,IAAI,CAAC,sCAAsC,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,CAAC;YACzE,MAAM,OAAO,GAAqB;gBAC9B,KAAK,EAAE,EAAE,gBAAgB,EAAE,CAAC,KAAK,CAAC,EAAE;gBACpC,KAAK,EAAE,CAAC;aACX,CAAC;YACF,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;gBAC/D,GAAG,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,CAAC;gBAC7C,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACnD,CAAC,CAAC,CAAC;YAEH,IAAI,WAAW,EAAE,CAAC;gBACd,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;gBAChD,0BAA0B,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YAC5E,CAAC;QACL,CAAC;IACL,CAAC;IACD,IAAI,KAAK,IAAI,0BAA0B,EAAE,KAAK,EAAE,CAAC;QAC7C,GAAG,CAAC,IAAI,CAAC,2BAA2B,EAAE,EAAE,KAAK,EAAE,0BAA0B,EAAE,KAAK,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,MAAM,MAAM,GAAsC;QAC9C,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,aAAa,EAAE,MAAM,CAAC,aAAa;QACnC,WAAW,EAAE,MAAM,CAAC,eAAe;KACtC,CAAC;IACF,MAAM,IAAI,GAAG;QACT,GAAG,WAAW;QACd,cAAc,EAAE,0BAA0B,EAAE,KAAK;KACpD,CAAC;IAEF,MAAM,aAAa,GAAG,MAAM,CAAC,aAAa,CAAC;IAE3C,GAAG,CAAC,KAAK,CAAC,gCAAgC,eAAe,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;IAE9G,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,YAAY;SAChC,aAAa,CAAC,eAAe,EAAE;QAC5B,MAAM;QACN,IAAI;QACJ,aAAa;QACb,IAAI;QACJ,MAAM,EAAE,KAAK;QACb,QAAQ;KACX,CAAC;SACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACX,GAAG,CAAC,KAAK,CAAC,+BAA+B,eAAe,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QACrE,MAAM,GAAG,CAAC;IACd,CAAC,CAAC,CAAC;IAEP,IAAI,KAAK,EAAE,CAAC;QACR,GAAG,CAAC,IAAI,CAAC,wBAAwB,eAAe,EAAE,EAAE,GAAG,CAAC,CAAC;IAC7D,CAAC;IAED,IAAI,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC,MAAM,KAAK,kBAAkB,CAAC,MAAM,EAAE,CAAC;QACxD,GAAG,CAAC,KAAK,CAAC,+BAA+B,eAAe,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;QAClF,MAAM,IAAI,KAAK,CAAC,gCAAgC,eAAe,KAAK,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;IACrF,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
@@ -0,0 +1,153 @@
1
+ import { log } from "@temporalio/activity";
2
+ import { markdownWithMarkitdown } from "../conversion/markitdown.js";
3
+ import { mutoolPdfToText } from "../conversion/mutool.js";
4
+ import { markdownWithPandoc } from "../conversion/pandoc.js";
5
+ import { setupActivity } from "../dsl/setup/ActivityContext.js";
6
+ import { DocumentNotFoundError } from "../errors.js";
7
+ import { TextExtractionStatus } from "../result-types.js";
8
+ import { fetchBlobAsBuffer, md5 } from "../utils/blobs.js";
9
+ import { countTokens } from "../utils/tokens.js";
10
+ //@ts-ignore
11
+ const JSON = {
12
+ name: "extractDocumentText",
13
+ };
14
+ export async function extractDocumentText(payload) {
15
+ const { client, objectId } = await setupActivity(payload);
16
+ const r = await client.objects.find({
17
+ query: { _id: objectId },
18
+ limit: 1,
19
+ select: "+text",
20
+ });
21
+ const doc = r[0];
22
+ if (!doc) {
23
+ log.error(`Document ${objectId} not found`);
24
+ throw new DocumentNotFoundError(`Document ${objectId} not found`, payload.objectIds);
25
+ }
26
+ log.info(`Extracting text for object ${doc.id}`);
27
+ if (!doc.content?.type || !doc.content?.source) {
28
+ if (doc.text) {
29
+ return createResponse(doc, doc.text, TextExtractionStatus.skipped, "Text present and no source or type");
30
+ }
31
+ else {
32
+ return createResponse(doc, "", TextExtractionStatus.error, "No source or type found");
33
+ }
34
+ }
35
+ //skip if text already extracted and proper etag
36
+ if (doc.text && doc.text.length > 0 && doc.text_etag === doc.content.etag) {
37
+ return createResponse(doc, doc.text, TextExtractionStatus.skipped, "Text already extracted");
38
+ }
39
+ let fileBuffer;
40
+ try {
41
+ fileBuffer = await fetchBlobAsBuffer(client, doc.content.source);
42
+ }
43
+ catch (e) {
44
+ log.error(`Error reading file: ${e}`);
45
+ return createResponse(doc, "", TextExtractionStatus.error, e.message);
46
+ }
47
+ let txt;
48
+ switch (doc.content.type) {
49
+ case "application/pdf":
50
+ txt = await mutoolPdfToText(fileBuffer);
51
+ break;
52
+ case "text/plain":
53
+ txt = fileBuffer.toString("utf8");
54
+ break;
55
+ //docx
56
+ case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
57
+ txt = await markdownWithMarkitdown(fileBuffer, "docx");
58
+ break;
59
+ //pptx
60
+ case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
61
+ txt = await markdownWithMarkitdown(fileBuffer, "pptx");
62
+ break;
63
+ //html
64
+ case "text/html":
65
+ txt = await markdownWithPandoc(fileBuffer, "html");
66
+ break;
67
+ //opendocument
68
+ case "application/vnd.oasis.opendocument.text":
69
+ txt = await markdownWithPandoc(fileBuffer, "odt");
70
+ break;
71
+ //rtf
72
+ case "application/rtf":
73
+ txt = await markdownWithPandoc(fileBuffer, "rtf");
74
+ break;
75
+ //markdown
76
+ case "text/markdown":
77
+ txt = fileBuffer.toString("utf8");
78
+ break;
79
+ //csv
80
+ case "text/csv":
81
+ txt = fileBuffer.toString("utf8");
82
+ break;
83
+ //typescript
84
+ case "application/typescript":
85
+ txt = fileBuffer.toString("utf8");
86
+ break;
87
+ //javascript
88
+ case "application/javascript":
89
+ txt = fileBuffer.toString("utf8");
90
+ break;
91
+ //json
92
+ case "application/json":
93
+ txt = fileBuffer.toString("utf8");
94
+ break;
95
+ default:
96
+ if (sniffIfText(fileBuffer)) {
97
+ txt = fileBuffer.toString("utf8"); //TODO: add charset detection
98
+ break;
99
+ }
100
+ return createResponse(doc, doc.text ?? "", TextExtractionStatus.skipped, `Unsupported mime type: ${doc.content.type}`);
101
+ }
102
+ const tokensData = countTokens(txt);
103
+ const etag = doc.content.etag ?? md5(txt);
104
+ const updateData = {
105
+ text: txt,
106
+ text_etag: etag,
107
+ tokens: {
108
+ ...tokensData,
109
+ etag: etag,
110
+ },
111
+ };
112
+ await client.objects.update(doc.id, updateData);
113
+ return createResponse(doc, txt, TextExtractionStatus.success);
114
+ }
115
+ function createResponse(doc, text, status, message) {
116
+ return {
117
+ status,
118
+ message,
119
+ tokens: doc.tokens,
120
+ len: text.length,
121
+ objectId: doc.id,
122
+ hasText: !!text,
123
+ };
124
+ }
125
+ function sniffIfText(buf) {
126
+ // If file is too large, don't even try
127
+ if (buf.length > 500 * 1024) {
128
+ return false;
129
+ }
130
+ // Count binary/control characters
131
+ let binaryCount = 0;
132
+ const sampleSize = Math.min(buf.length, 1000); // Check first 1000 bytes
133
+ for (let i = 0; i < sampleSize; i++) {
134
+ // Count control characters (except common whitespace)
135
+ const byte = buf[i];
136
+ if ((byte < 32 && ![9, 10, 13].includes(byte)) || byte === 0) {
137
+ binaryCount++;
138
+ }
139
+ }
140
+ // If more than 10% binary/control chars, probably not text
141
+ if (binaryCount / sampleSize > 0.1) {
142
+ return false;
143
+ }
144
+ // Additional check for valid UTF-8 encoding
145
+ try {
146
+ const s = buf.toString("utf8");
147
+ return s.length > 0 && !s.includes("\uFFFD"); // Replacement character
148
+ }
149
+ catch (e) {
150
+ return false;
151
+ }
152
+ }
153
+ //# sourceMappingURL=extractDocumentText.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extractDocumentText.js","sourceRoot":"","sources":["../../../src/activities/extractDocumentText.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAO3C,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AACrE,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAwB,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAChF,OAAO,EAAE,iBAAiB,EAAE,GAAG,EAAE,MAAM,mBAAmB,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,YAAY;AACZ,MAAM,IAAI,GAAoB;IAC1B,IAAI,EAAE,qBAAqB;CAC9B,CAAC;AASF,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACrC,OAA+D;IAE/D,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;IAE1D,MAAM,CAAC,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;QAChC,KAAK,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE;QACxB,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,OAAO;KAClB,CAAC,CAAC;IACH,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAkB,CAAC;IAClC,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,GAAG,CAAC,KAAK,CAAC,YAAY,QAAQ,YAAY,CAAC,CAAC;QAC5C,MAAM,IAAI,qBAAqB,CAAC,YAAY,QAAQ,YAAY,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IACzF,CAAC;IAED,GAAG,CAAC,IAAI,CAAC,8BAA8B,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;IAEjD,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAC7C,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE,oBAAoB,CAAC,OAAO,EAAE,oCAAoC,CAAC,CAAC;QAC7G,CAAC;aAAM,CAAC;YACJ,OAAO,cAAc,CAAC,GAAG,EAAE,EAAE,EAAE,oBAAoB,CAAC,KAAK,EAAE,yBAAyB,CAAC,CAAC;QAC1F,CAAC;IACL,CAAC;IAED,gDAAgD;IAChD,IAAI,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,GAAG,CAAC,SAAS,KAAK,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACxE,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE,oBAAoB,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC;IACjG,CAAC;IAED,IAAI,UAAkB,CAAC;IACvB,IAAI,CAAC;QACD,UAAU,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACrE,CAAC;IAAC,OAAO,CAAM,EAAE,CAAC;QACd,GAAG,CAAC,KAAK,CAAC,uBAAuB,CAAC,EAAE,CAAC,CAAC;QACtC,OAAO,cAAc,CAAC,GAAG,EAAE,EAAE,EAAE,oBAAoB,CAAC,KAAK,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;IAC1E,CAAC;IAED,IAAI,GAAW,CAAC;IAEhB,QAAQ,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACvB,KAAK,iBAAiB;YAClB,GAAG,GAAG,MAAM,eAAe,CAAC,UAAU,CAAC,CAAC;YACxC,MAAM;QAEV,KAAK,YAAY;YACb,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,MAAM;QACN,KAAK,yEAAyE;YAC1E,GAAG,GAAG,MAAM,sBAAsB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YACvD,MAAM;QAEV,MAAM;QACN,KAAK,2EAA2E;YAC5E,GAAG,GAAG,MAAM,sBAAsB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YACvD,MAAM;QAEV,MAAM;QACN,KAAK,WAAW;YACZ,GAAG,GAAG,MAAM,kBAAkB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YACnD,MAAM;QAEV,cAAc;QACd,KAAK,yCAAyC;YAC1C,GAAG,GAAG,MAAM,kBAAkB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAClD,MAAM;QAEV,KAAK;QACL,KAAK,iBAAiB;YAClB,GAAG,GAAG,MAAM,kBAAkB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAClD,MAAM;QAEV,UAAU;QACV,KAAK,eAAe;YAChB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,KAAK;QACL,KAAK,UAAU;YACX,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,YAAY;QACZ,KAAK,wBAAwB;YACzB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,YAAY;QACZ,KAAK,wBAAwB;YACzB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,MAAM;QACN,KAAK,kBAAkB;YACnB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV;YACI,IAAI,WAAW,CAAC,UAAU,CAAC,EAAE,CAAC;gBAC1B,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,6BAA6B;gBAChE,MAAM;YACV,CAAC;YACD,OAAO,cAAc,CACjB,GAAG,EACH,GAAG,CAAC,IAAI,IAAI,EAAE,EACd,oBAAoB,CAAC,OAAO,EAC5B,0BAA0B,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAC/C,CAAC;IACV,CAAC;IAED,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAE1C,MAAM,UAAU,GAA+B;QAC3C,IAAI,EAAE,GAAG;QACT,SAAS,EAAE,IAAI;QACf,MAAM,EAAE;YACJ,GAAG,UAAU;YACb,IAAI,EAAE,IAAI;SACb;KACJ,CAAC;IAEF,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;IAEhD,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,oBAAoB,CAAC,OAAO,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,cAAc,CACnB,GAAkB,EAClB,IAAY,EACZ,MAA4B,EAC5B,OAAgB;IAEhB,OAAO;QACH,MAAM;QACN,OAAO;QACP,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,GAAG,EAAE,IAAI,CAAC,MAAM;QAChB,QAAQ,EAAE,GAAG,CAAC,EAAE;QAChB,OAAO,EAAE,CAAC,CAAC,IAAI;KAClB,CAAC;AACN,CAAC;AAED,SAAS,WAAW,CAAC,GAAW;IAC5B,uCAAuC;IACvC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG,GAAG,IAAI,EAAE,CAAC;QAC1B,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,kCAAkC;IAClC,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,yBAAyB;IAExE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,sDAAsD;QACtD,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,IAAI,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3D,WAAW,EAAE,CAAC;QAClB,CAAC;IACL,CAAC;IAED,2DAA2D;IAC3D,IAAI,WAAW,GAAG,UAAU,GAAG,GAAG,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,4CAA4C;IAC5C,IAAI,CAAC;QACD,MAAM,CAAC,GAAG,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,wBAAwB;IAC1E,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACT,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC"}
@@ -0,0 +1,80 @@
1
+ import { log } from "@temporalio/activity";
2
+ import { setupActivity } from "../dsl/setup/ActivityContext.js";
3
+ import { truncByMaxTokens } from "../utils/tokens.js";
4
+ import { executeInteractionFromActivity } from "./executeInteraction.js";
5
+ const INT_EXTRACT_INFORMATION = "sys:ExtractInformation";
6
+ export async function generateDocumentProperties(payload) {
7
+ const context = await setupActivity(payload);
8
+ const { params, client, objectId } = context;
9
+ const interactionName = params.interactionName ?? INT_EXTRACT_INFORMATION;
10
+ const project = await context.fetchProject();
11
+ const doc = await client.objects.retrieve(objectId, "+text");
12
+ const type = doc.type ? await client.types.retrieve(doc.type.id) : undefined;
13
+ if (!doc?.text && !params.use_vision && !doc?.content?.type?.startsWith("image/")) {
14
+ log.warn(`Object ${objectId} not found or text is empty`);
15
+ return { status: "failed", error: "no-text" };
16
+ }
17
+ if (!type || !type.object_schema) {
18
+ log.info(`Object ${objectId} has no schema`);
19
+ return { document: objectId, status: "skipped", message: "no schema defined on type" };
20
+ }
21
+ const getImageRef = () => {
22
+ if (doc.content?.type?.startsWith("image/")) {
23
+ return "store:" + doc.id;
24
+ }
25
+ if (params.use_vision && doc.content?.type?.startsWith("application/pdf")) {
26
+ return "store:" + doc.id;
27
+ }
28
+ log.info(`Object ${objectId} is not an image or pdf`);
29
+ return undefined;
30
+ };
31
+ const content = doc.text
32
+ ? truncByMaxTokens(doc.text, params.truncate || 30000)
33
+ : undefined;
34
+ const promptData = {
35
+ content: content,
36
+ image: getImageRef() ?? undefined,
37
+ human_context: project?.configuration?.human_context ?? undefined,
38
+ };
39
+ log.info(` Extracting information from object ${objectId} with type ${type.name}`, payload.debug_mode ? { params } : undefined);
40
+ const infoRes = await executeInteractionFromActivity(client, interactionName, {
41
+ ...params,
42
+ include_previous_error: true,
43
+ result_schema: type.object_schema,
44
+ validate_result: type.strict_mode,
45
+ }, promptData, payload.debug_mode ?? false);
46
+ const getText = () => {
47
+ if (doc.text) {
48
+ return undefined;
49
+ }
50
+ let text = "";
51
+ const jsonResult = infoRes.result.object();
52
+ if (jsonResult.title) {
53
+ text += jsonResult.title + "\n";
54
+ }
55
+ if (jsonResult.description) {
56
+ text += jsonResult.description;
57
+ }
58
+ if (text) {
59
+ return text;
60
+ }
61
+ else {
62
+ return undefined;
63
+ }
64
+ };
65
+ log.info(`Extracted information from object ${objectId} with type ${type.name}`, { runId: infoRes.id });
66
+ await client.objects.update(doc.id, {
67
+ properties: {
68
+ ...infoRes.result.object(),
69
+ etag: doc.text_etag,
70
+ },
71
+ text: getText(),
72
+ generation_run_info: {
73
+ id: infoRes.id,
74
+ date: new Date().toISOString(),
75
+ model: infoRes.modelId,
76
+ },
77
+ });
78
+ return { status: "completed" };
79
+ }
80
+ //# sourceMappingURL=generateDocumentProperties.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generateDocumentProperties.js","sourceRoot":"","sources":["../../../src/activities/generateDocumentProperties.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAE3C,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAgB,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAA8B,8BAA8B,EAAE,MAAM,yBAAyB,CAAC;AAErG,MAAM,uBAAuB,GAAG,wBAAwB,CAAC;AAgBzD,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC5C,OAAsE;IAEtE,MAAM,OAAO,GAAG,MAAM,aAAa,CAAmC,OAAO,CAAC,CAAC;IAC/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAC7C,MAAM,eAAe,GAAG,MAAM,CAAC,eAAe,IAAI,uBAAuB,CAAC;IAE1E,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,YAAY,EAAE,CAAC;IAE7C,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC7D,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE7E,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChF,GAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,6BAA6B,CAAC,CAAC;QAC1D,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;IAClD,CAAC;IAED,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;QAC/B,GAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,gBAAgB,CAAC,CAAC;QAC7C,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,2BAA2B,EAAE,CAAC;IAC3F,CAAC;IAED,MAAM,WAAW,GAAG,GAAG,EAAE;QACrB,IAAI,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1C,OAAO,QAAQ,GAAG,GAAG,CAAC,EAAE,CAAC;QAC7B,CAAC;QAED,IAAI,MAAM,CAAC,UAAU,IAAI,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,iBAAiB,CAAC,EAAE,CAAC;YACxE,OAAO,QAAQ,GAAG,GAAG,CAAC,EAAE,CAAC;QAC7B,CAAC;QAED,GAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,yBAAyB,CAAC,CAAC;QACtD,OAAO,SAAS,CAAC;IACrB,CAAC,CAAC;IAEF,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI;QACpB,CAAC,CAAC,gBAAgB,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,QAAQ,IAAI,KAAK,CAAC;QACtD,CAAC,CAAC,SAAS,CAAC;IAEhB,MAAM,UAAU,GAAG;QACf,OAAO,EAAE,OAAO;QAChB,KAAK,EAAE,WAAW,EAAE,IAAI,SAAS;QACjC,aAAa,EAAE,OAAO,EAAE,aAAa,EAAE,aAAa,IAAI,SAAS;KACpE,CAAC;IAEF,GAAG,CAAC,IAAI,CACJ,uCAAuC,QAAQ,cAAc,IAAI,CAAC,IAAI,EAAE,EACxE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS,CAC9C,CAAC;IAEF,MAAM,OAAO,GAAG,MAAM,8BAA8B,CAChD,MAAM,EACN,eAAe,EACf;QACI,GAAG,MAAM;QACT,sBAAsB,EAAE,IAAI;QAC5B,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,eAAe,EAAE,IAAI,CAAC,WAAW;KACpC,EACD,UAAU,EACV,OAAO,CAAC,UAAU,IAAI,KAAK,CAC9B,CAAC;IAEF,MAAM,OAAO,GAAG,GAAG,EAAE;QACjB,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,OAAO,SAAS,CAAC;QACrB,CAAC;QACD,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QAC3C,IAAI,UAAU,CAAC,KAAK,EAAE,CAAC;YACnB,IAAI,IAAI,UAAU,CAAC,KAAK,GAAG,IAAI,CAAC;QACpC,CAAC;QACD,IAAI,UAAU,CAAC,WAAW,EAAE,CAAC;YACzB,IAAI,IAAI,UAAU,CAAC,WAAW,CAAC;QACnC,CAAC;QACD,IAAI,IAAI,EAAE,CAAC;YACP,OAAO,IAAI,CAAC;QAChB,CAAC;aAAM,CAAC;YACJ,OAAO,SAAS,CAAC;QACrB,CAAC;IACL,CAAC,CAAC;IAEF,GAAG,CAAC,IAAI,CAAC,qCAAqC,QAAQ,cAAc,IAAI,CAAC,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC;IACxG,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE;QAChC,UAAU,EAAE;YACR,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE;YAC1B,IAAI,EAAE,GAAG,CAAC,SAAS;SACtB;QACD,IAAI,EAAE,OAAO,EAAE;QACf,mBAAmB,EAAE;YACjB,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,IAAI,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC9B,KAAK,EAAE,OAAO,CAAC,OAAO;SACzB;KACJ,CAAC,CAAC;IAEH,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;AACnC,CAAC"}
@@ -0,0 +1,225 @@
1
+ import { log } from "@temporalio/activity";
2
+ import { ImageRenditionFormat, SupportedEmbeddingTypes, } from "@vertesia/common";
3
+ import { setupActivity } from "../dsl/setup/ActivityContext.js";
4
+ import { DocumentNotFoundError } from "../errors.js";
5
+ import { fetchBlobAsBase64 } from "../utils/blobs.js";
6
+ import { countTokens } from "../utils/tokens.js";
7
+ export async function generateEmbeddings(payload) {
8
+ const { params, client, objectId, fetchProject } = await setupActivity(payload);
9
+ const { force, type } = params;
10
+ const projectData = await fetchProject();
11
+ const config = projectData?.configuration.embeddings[type];
12
+ if (!projectData) {
13
+ throw new DocumentNotFoundError("Project not found", [payload.project_id]);
14
+ }
15
+ if (!config) {
16
+ throw new DocumentNotFoundError("Embeddings configuration not found", [
17
+ objectId,
18
+ ]);
19
+ }
20
+ if (!projectData) {
21
+ throw new DocumentNotFoundError("Project not found", [payload.project_id]);
22
+ }
23
+ if (!projectData?.configuration.embeddings[type]?.enabled) {
24
+ log.info(`Embeddings generation disabled for type ${type} on project: ${projectData.name} (${projectData.namespace})`, { config });
25
+ return {
26
+ id: objectId,
27
+ status: "skipped",
28
+ message: `Embeddings generation disabled for type ${type}`,
29
+ };
30
+ }
31
+ log.info(`${type} embedding generation starting for object ${objectId}`, {
32
+ force,
33
+ config,
34
+ });
35
+ if (!config.environment) {
36
+ throw new Error("No environment found in project configuration. Set environment in project configuration to generate embeddings.");
37
+ }
38
+ const document = await client.objects.retrieve(objectId, "+text +parts +embeddings +tokens +properties");
39
+ if (!document) {
40
+ throw new DocumentNotFoundError("Document not found", [objectId]);
41
+ }
42
+ if (!document.content) {
43
+ throw new DocumentNotFoundError("Document content not found", [objectId]);
44
+ }
45
+ let res;
46
+ switch (type) {
47
+ case SupportedEmbeddingTypes.text:
48
+ res = await generateTextEmbeddings({
49
+ client,
50
+ config,
51
+ document,
52
+ type,
53
+ });
54
+ break;
55
+ case SupportedEmbeddingTypes.properties:
56
+ res = await generateTextEmbeddings({
57
+ client,
58
+ config,
59
+ document,
60
+ type,
61
+ });
62
+ break;
63
+ case SupportedEmbeddingTypes.image:
64
+ res = await generateImageEmbeddings({
65
+ client,
66
+ config,
67
+ document,
68
+ type,
69
+ });
70
+ break;
71
+ default:
72
+ res = {
73
+ id: objectId,
74
+ status: "failed",
75
+ message: `unsupported embedding type: ${type}`,
76
+ };
77
+ }
78
+ return res;
79
+ }
80
+ async function generateTextEmbeddings({ document, client, type, config }) {
81
+ if (!document) {
82
+ return { status: "error", message: "document is null or undefined" };
83
+ }
84
+ if (type !== SupportedEmbeddingTypes.text &&
85
+ type !== SupportedEmbeddingTypes.properties) {
86
+ return {
87
+ id: document.id,
88
+ status: "failed",
89
+ message: `unsupported embedding type: ${type}`,
90
+ };
91
+ }
92
+ if (type === SupportedEmbeddingTypes.text && !document.text) {
93
+ return { id: document.id, status: "failed", message: "no text found" };
94
+ }
95
+ if (type === SupportedEmbeddingTypes.properties && !document?.properties) {
96
+ return {
97
+ id: document.id,
98
+ status: "failed",
99
+ message: "no properties found",
100
+ };
101
+ }
102
+ const { environment } = config;
103
+ // Count tokens if needed, do not rely on existing token count
104
+ let tokenCount = undefined;
105
+ if (type === SupportedEmbeddingTypes.text && document.text) {
106
+ tokenCount = countTokens(document.text).count;
107
+ }
108
+ const maxTokens = config.max_tokens ?? 8000;
109
+ //generate embeddings for the main doc if document isn't too large
110
+ log.info(`Generating ${type} embeddings for document ${document.id}`);
111
+ if (type === SupportedEmbeddingTypes.text &&
112
+ tokenCount !== undefined &&
113
+ tokenCount > maxTokens) {
114
+ //TODO: Review strategy for large documents
115
+ log.warn(`Document too large for ${type} embeddings generation, skipping (${tokenCount} tokens)`);
116
+ return {
117
+ id: document.id,
118
+ status: "skipped",
119
+ message: `${type} embeddings generation, skipped for large document (${tokenCount} tokens)`,
120
+ };
121
+ }
122
+ else {
123
+ log.info(`Generating ${type} embeddings for document`);
124
+ const res = await generateEmbeddingsFromStudio(JSON.stringify(document[type]), environment, client);
125
+ if (!res || !res.values) {
126
+ return {
127
+ id: document.id,
128
+ status: "failed",
129
+ message: "no embeddings generated",
130
+ };
131
+ }
132
+ log.info(`${type} embeddings generated for document ${document.id}`, {
133
+ len: res.values.length,
134
+ });
135
+ await client.objects.setEmbedding(document.id, type, {
136
+ values: res.values,
137
+ model: res.model,
138
+ etag: document.text_etag,
139
+ });
140
+ return {
141
+ id: document.id,
142
+ type,
143
+ status: "completed",
144
+ len: res.values.length,
145
+ };
146
+ }
147
+ }
148
+ async function generateImageEmbeddings({ document, client, type, config, }) {
149
+ log.info("Generating image embeddings for document " + document.id, {
150
+ content: document.content,
151
+ });
152
+ if (!document.content?.type?.startsWith("image/") &&
153
+ !document.content?.type?.includes("pdf")) {
154
+ return {
155
+ id: document.id,
156
+ type,
157
+ status: "failed",
158
+ message: "content is not an image",
159
+ };
160
+ }
161
+ const { environment, model } = config;
162
+ const resRnd = await client.store.objects.getRendition(document.id, {
163
+ format: ImageRenditionFormat.jpeg,
164
+ generate_if_missing: true,
165
+ sign_url: true,
166
+ });
167
+ if (resRnd.status === "generating") {
168
+ throw new Error("Rendition is generating, will retry later");
169
+ }
170
+ else if (resRnd.status === "failed" ||
171
+ !resRnd.renditions ||
172
+ !resRnd.renditions.length) {
173
+ throw new DocumentNotFoundError("Rendition retrieval failed", [document.id]);
174
+ }
175
+ const renditions = resRnd.renditions;
176
+ if (!renditions?.length) {
177
+ throw new DocumentNotFoundError("No source found in rendition", [
178
+ document.id,
179
+ ]);
180
+ }
181
+ const rendition = renditions[0];
182
+ const image = await fetchBlobAsBase64(client, rendition);
183
+ const res = await client.environments
184
+ .embeddings(environment, {
185
+ image,
186
+ model,
187
+ })
188
+ .then((res) => res)
189
+ .catch((e) => {
190
+ log.error("Error generating embeddings for image", { error: e });
191
+ throw e;
192
+ });
193
+ if (!res || !res.values) {
194
+ return {
195
+ id: document.id,
196
+ status: "failed",
197
+ message: "no embeddings generated",
198
+ };
199
+ }
200
+ await client.objects.setEmbedding(document.id, SupportedEmbeddingTypes.image, {
201
+ values: res.values,
202
+ model: res.model,
203
+ etag: document.text_etag,
204
+ });
205
+ return {
206
+ id: document.id,
207
+ type,
208
+ status: "completed",
209
+ len: res.values.length,
210
+ };
211
+ }
212
+ async function generateEmbeddingsFromStudio(text, env, client, model) {
213
+ log.info(`Generating embeddings for text of ${text.length} chars with environment ${env}`);
214
+ return client.environments
215
+ .embeddings(env, {
216
+ text,
217
+ model,
218
+ })
219
+ .then((res) => res)
220
+ .catch((e) => {
221
+ log.error("Error generating embeddings for text", { error: e });
222
+ throw e;
223
+ });
224
+ }
225
+ //# sourceMappingURL=generateEmbeddings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generateEmbeddings.js","sourceRoot":"","sources":["../../../src/activities/generateEmbeddings.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAE3C,OAAO,EAIH,oBAAoB,EAEpB,uBAAuB,GAC1B,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAEtD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAoCjD,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACpC,OAA8D;IAE9D,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAC5C,MAAM,aAAa,CAA2B,OAAO,CAAC,CAAC;IAC3D,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,CAAC;IAE/B,MAAM,WAAW,GAAG,MAAM,YAAY,EAAE,CAAC;IACzC,MAAM,MAAM,GAAG,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;IAC3D,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,qBAAqB,CAAC,mBAAmB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;IAC/E,CAAC;IACD,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,IAAI,qBAAqB,CAAC,oCAAoC,EAAE;YAClE,QAAQ;SACX,CAAC,CAAC;IACP,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,qBAAqB,CAAC,mBAAmB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;IAC/E,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC;QACxD,GAAG,CAAC,IAAI,CACJ,2CAA2C,IAAI,gBAAgB,WAAW,CAAC,IAAI,KAAK,WAAW,CAAC,SAAS,GAAG,EAC5G,EAAE,MAAM,EAAE,CACb,CAAC;QACF,OAAO;YACH,EAAE,EAAE,QAAQ;YACZ,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,2CAA2C,IAAI,EAAE;SAC7D,CAAC;IACN,CAAC;IAED,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,6CAA6C,QAAQ,EAAE,EAAE;QACrE,KAAK;QACL,MAAM;KACT,CAAC,CAAC;IAEH,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACX,iHAAiH,CACpH,CAAC;IACN,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAC1C,QAAQ,EACR,8CAA8C,CACjD,CAAC;IAEF,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,qBAAqB,CAAC,oBAAoB,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,IAAI,qBAAqB,CAAC,4BAA4B,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC9E,CAAC;IAED,IAAI,GAAG,CAAC;IAER,QAAQ,IAAI,EAAE,CAAC;QACX,KAAK,uBAAuB,CAAC,IAAI;YAC7B,GAAG,GAAG,MAAM,sBAAsB,CAAC;gBAC/B,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV,KAAK,uBAAuB,CAAC,UAAU;YACnC,GAAG,GAAG,MAAM,sBAAsB,CAAC;gBAC/B,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV,KAAK,uBAAuB,CAAC,KAAK;YAC9B,GAAG,GAAG,MAAM,uBAAuB,CAAC;gBAChC,MAAM;gBACN,MAAM;gBACN,QAAQ;gBACR,IAAI;aACP,CAAC,CAAC;YACH,MAAM;QACV;YACI,GAAG,GAAG;gBACF,EAAE,EAAE,QAAQ;gBACZ,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,+BAA+B,IAAI,EAAE;aACjD,CAAC;IACV,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC;AAWD,KAAK,UAAU,sBAAsB,CACjC,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAmC;IAGnE,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,+BAA+B,EAAE,CAAC;IACzE,CAAC;IAED,IACI,IAAI,KAAK,uBAAuB,CAAC,IAAI;QACrC,IAAI,KAAK,uBAAuB,CAAC,UAAU,EAC7C,CAAC;QACC,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,+BAA+B,IAAI,EAAE;SACjD,CAAC;IACN,CAAC;IAED,IAAI,IAAI,KAAK,uBAAuB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1D,OAAO,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3E,CAAC;IACD,IAAI,IAAI,KAAK,uBAAuB,CAAC,UAAU,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;QACvE,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,qBAAqB;SACjC,CAAC;IACN,CAAC;IAED,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,CAAC;IAE/B,8DAA8D;IAC9D,IAAI,UAAU,GAAwB,SAAS,CAAC;IAChD,IAAI,IAAI,KAAK,uBAAuB,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;QACzD,UAAU,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC;IAClD,CAAC;IAED,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,IAAI,IAAI,CAAC;IAE5C,kEAAkE;IAClE,GAAG,CAAC,IAAI,CAAC,cAAc,IAAI,4BAA4B,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;IACtE,IACI,IAAI,KAAK,uBAAuB,CAAC,IAAI;QACrC,UAAU,KAAK,SAAS;QACxB,UAAU,GAAG,SAAS,EACxB,CAAC;QACC,2CAA2C;QAC3C,GAAG,CAAC,IAAI,CACJ,0BAA0B,IAAI,qCAAqC,UAAU,UAAU,CAC1F,CAAC;QACF,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,GAAG,IAAI,uDAAuD,UAAU,UAAU;SAC9F,CAAA;IACL,CAAC;SAAM,CAAC;QACJ,GAAG,CAAC,IAAI,CAAC,cAAc,IAAI,0BAA0B,CAAC,CAAC;QAEvD,MAAM,GAAG,GAAG,MAAM,4BAA4B,CAC1C,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAC9B,WAAW,EACX,MAAM,CACT,CAAC;QACF,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC;YACtB,OAAO;gBACH,EAAE,EAAE,QAAQ,CAAC,EAAE;gBACf,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,yBAAyB;aACrC,CAAC;QACN,CAAC;QAED,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,sCAAsC,QAAQ,CAAC,EAAE,EAAE,EAAE;YACjE,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;SACzB,CAAC,CAAC;QACH,MAAM,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,EAAE,IAAI,EAAE;YACjD,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,IAAI,EAAE,QAAQ,CAAC,SAAS;SAC3B,CAAC,CAAC;QAEH,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,IAAI;YACJ,MAAM,EAAE,WAAW;YACnB,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;SACzB,CAAC;IACN,CAAC;AACL,CAAC;AAED,KAAK,UAAU,uBAAuB,CAAC,EACnC,QAAQ,EACR,MAAM,EACN,IAAI,EACJ,MAAM,GACwB;IAC9B,GAAG,CAAC,IAAI,CAAC,2CAA2C,GAAG,QAAQ,CAAC,EAAE,EAAE;QAChE,OAAO,EAAE,QAAQ,CAAC,OAAO;KAC5B,CAAC,CAAC;IACH,IACI,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC;QAC7C,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,EAAE,QAAQ,CAAC,KAAK,CAAC,EAC1C,CAAC;QACC,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,IAAI;YACJ,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,yBAAyB;SACrC,CAAC;IACN,CAAC;IACD,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IAEtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,EAAE;QAChE,MAAM,EAAE,oBAAoB,CAAC,IAAI;QACjC,mBAAmB,EAAE,IAAI;QACzB,QAAQ,EAAE,IAAI;KACjB,CAAC,CAAC;IAEH,IAAI,MAAM,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;IACjE,CAAC;SAAM,IACH,MAAM,CAAC,MAAM,KAAK,QAAQ;QAC1B,CAAC,MAAM,CAAC,UAAU;QAClB,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,EAC3B,CAAC;QACC,MAAM,IAAI,qBAAqB,CAAC,4BAA4B,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACjF,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC;QACtB,MAAM,IAAI,qBAAqB,CAAC,8BAA8B,EAAE;YAC5D,QAAQ,CAAC,EAAE;SACd,CAAC,CAAC;IACP,CAAC;IAED,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAChC,MAAM,KAAK,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAEzD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,YAAY;SAChC,UAAU,CAAC,WAAW,EAAE;QACrB,KAAK;QACL,KAAK;KACR,CAAC;SACD,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC;SAClB,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,GAAG,CAAC,KAAK,CAAC,uCAAuC,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACjE,MAAM,CAAC,CAAC;IACZ,CAAC,CAAC,CAAC;IAEP,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC;QACtB,OAAO;YACH,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,OAAO,EAAE,yBAAyB;SACrC,CAAC;IACN,CAAC;IAED,MAAM,MAAM,CAAC,OAAO,CAAC,YAAY,CAC7B,QAAQ,CAAC,EAAE,EACX,uBAAuB,CAAC,KAAK,EAC7B;QACI,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,IAAI,EAAE,QAAQ,CAAC,SAAS;KAC3B,CACJ,CAAC;IAEF,OAAO;QACH,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,IAAI;QACJ,MAAM,EAAE,WAAW;QACnB,GAAG,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM;KACzB,CAAC;AACN,CAAC;AAED,KAAK,UAAU,4BAA4B,CACvC,IAAY,EACZ,GAAW,EACX,MAAsB,EACtB,KAAc;IAEd,GAAG,CAAC,IAAI,CACJ,qCAAqC,IAAI,CAAC,MAAM,2BAA2B,GAAG,EAAE,CACnF,CAAC;IAEF,OAAO,MAAM,CAAC,YAAY;SACrB,UAAU,CAAC,GAAG,EAAE;QACb,IAAI;QACJ,KAAK;KACR,CAAC;SACD,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC;SAClB,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,GAAG,CAAC,KAAK,CAAC,sCAAsC,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAChE,MAAM,CAAC,CAAC;IACZ,CAAC,CAAC,CAAC;AACX,CAAC"}
@@ -0,0 +1,122 @@
1
+ import { log } from "@temporalio/activity";
2
+ import { ImageRenditionFormat, } from "@vertesia/common";
3
+ import { setupActivity, } from "../dsl/setup/ActivityContext.js";
4
+ import { truncByMaxTokens } from "../utils/tokens.js";
5
+ import { executeInteractionFromActivity, } from "./executeInteraction.js";
6
+ const INT_SELECT_DOCUMENT_TYPE = "sys:SelectDocumentType";
7
+ const INT_GENERATE_METADATA_MODEL = "sys:GenerateMetadataModel";
8
+ export async function generateOrAssignContentType(payload) {
9
+ const context = await setupActivity(payload);
10
+ const { params, client, objectId } = context;
11
+ const interactionName = params.interactionNames?.selectDocumentType ?? INT_SELECT_DOCUMENT_TYPE;
12
+ log.info("SelectDocumentType for object: " + objectId, { payload });
13
+ const object = await client.objects.retrieve(objectId, "+text");
14
+ //Expects object.type to be null on first ingestion of content
15
+ //User initiated Content Type change via the Composable UI,
16
+ //sets object.type to null when they let Composable choose for them.
17
+ //sets object.type to chosen type (thus non-null) when user picks a type.
18
+ if (object.type) {
19
+ log.warn(`Object ${objectId} has already a type. Skipping type creation.`);
20
+ return {
21
+ status: "skipped",
22
+ message: "Object already has a type: " + object.type.name,
23
+ };
24
+ }
25
+ if (!object ||
26
+ (!object.text &&
27
+ !object.content?.type?.startsWith("image/") &&
28
+ !object.content?.type?.startsWith("application/pdf"))) {
29
+ log.info(`Object ${objectId} not found or text is empty and not an image`, {
30
+ object,
31
+ });
32
+ return { status: "failed", error: "no-text" };
33
+ }
34
+ const types = await client.types.list(undefined, {
35
+ schema: true,
36
+ });
37
+ //make a list of all existing types, and add hints if any
38
+ const existing_types = types.filter((t) => !["DocumentPart", "Rendition"].includes(t.name));
39
+ const content = object.text
40
+ ? truncByMaxTokens(object.text, params.truncate || 30000)
41
+ : undefined;
42
+ const getImage = async () => {
43
+ if (object.content?.type?.includes("pdf") &&
44
+ object.text?.length &&
45
+ object.text?.length < 100) {
46
+ return "store:" + objectId;
47
+ }
48
+ if (!object.content?.type?.startsWith("image/")) {
49
+ return undefined;
50
+ }
51
+ const res = await client.objects.getRendition(objectId, {
52
+ format: ImageRenditionFormat.jpeg,
53
+ generate_if_missing: true,
54
+ });
55
+ if (!res.renditions?.length && res.status === "generating") {
56
+ //throw to try again
57
+ throw new Error(`Rendition for object ${objectId} is in progress`);
58
+ }
59
+ else if (res.renditions) {
60
+ return "store:" + objectId;
61
+ }
62
+ };
63
+ const fileRef = await getImage();
64
+ log.info("Execute SelectDocumentType interaction on content with \nexisting types - passing full types: " +
65
+ existing_types.filter((t) => !t.tags?.includes("system")));
66
+ const res = await executeInteractionFromActivity(client, interactionName, params, {
67
+ existing_types,
68
+ content,
69
+ image: fileRef,
70
+ });
71
+ const jsonResult = res.result.object();
72
+ log.info("Selected Content Type Result: " + JSON.stringify(jsonResult));
73
+ //if type is not identified or not present in the database, generate a new type
74
+ let selectedType = undefined;
75
+ selectedType = types.find((t) => t.name === jsonResult.document_type);
76
+ if (!selectedType) {
77
+ log.warn("Document type not identified: starting type generation");
78
+ const newType = await generateNewType(context, existing_types, content, fileRef);
79
+ selectedType = { id: newType.id, name: newType.name };
80
+ }
81
+ if (!selectedType) {
82
+ log.error("Type not found: ", res.result);
83
+ throw new Error("Type not found: " + jsonResult.document_type);
84
+ }
85
+ //update object with selected type
86
+ await client.objects.update(objectId, {
87
+ type: selectedType.id,
88
+ });
89
+ return {
90
+ id: selectedType.id,
91
+ name: selectedType.name,
92
+ isNew: !types.find((t) => t.name === selectedType.name),
93
+ };
94
+ }
95
+ async function generateNewType(context, existing_types, content, fileRef) {
96
+ const { client, params } = context;
97
+ const project = await context.fetchProject();
98
+ const interactionName = params.interactionNames?.generateMetadataModel ??
99
+ INT_GENERATE_METADATA_MODEL;
100
+ const genTypeRes = await executeInteractionFromActivity(client, interactionName, params, {
101
+ existing_types,
102
+ content: content,
103
+ human_context: project?.configuration?.human_context ?? undefined,
104
+ image: fileRef ? fileRef : undefined,
105
+ });
106
+ const jsonResult = genTypeRes.result.object();
107
+ if (!jsonResult.document_type) {
108
+ log.error("No name generated for type", genTypeRes);
109
+ throw new Error("No name generated for type");
110
+ }
111
+ log.info("Generated schema for type", jsonResult.metadata_schema);
112
+ const typeData = {
113
+ name: jsonResult.document_type,
114
+ description: jsonResult.document_type_description,
115
+ object_schema: jsonResult.metadata_schema,
116
+ is_chunkable: jsonResult.is_chunkable,
117
+ table_layout: jsonResult.table_layout,
118
+ };
119
+ const type = await client.types.create(typeData);
120
+ return type;
121
+ }
122
+ //# sourceMappingURL=generateOrAssignContentType.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generateOrAssignContentType.js","sourceRoot":"","sources":["../../../src/activities/generateOrAssignContentType.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAC3C,OAAO,EAKL,oBAAoB,GACrB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAEL,aAAa,GACd,MAAM,iCAAiC,CAAC;AACzC,OAAO,EAAgB,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAEL,8BAA8B,GAC/B,MAAM,yBAAyB,CAAC;AAEjC,MAAM,wBAAwB,GAAG,wBAAwB,CAAC;AAC1D,MAAM,2BAA2B,GAAG,2BAA2B,CAAC;AAyBhE,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAC/C,OAAuE;IAEvE,MAAM,OAAO,GACX,MAAM,aAAa,CAAoC,OAAO,CAAC,CAAC;IAClE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAE7C,MAAM,eAAe,GACnB,MAAM,CAAC,gBAAgB,EAAE,kBAAkB,IAAI,wBAAwB,CAAC;IAE1E,GAAG,CAAC,IAAI,CAAC,iCAAiC,GAAG,QAAQ,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;IAEpE,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAEhE,8DAA8D;IAC9D,2DAA2D;IAC3D,oEAAoE;IACpE,yEAAyE;IACzE,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,GAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,8CAA8C,CAAC,CAAC;QAC3E,OAAO;YACL,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,6BAA6B,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI;SAC1D,CAAC;IACJ,CAAC;IAED,IACE,CAAC,MAAM;QACP,CAAC,CAAC,MAAM,CAAC,IAAI;YACX,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC;YAC3C,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,iBAAiB,CAAC,CAAC,EACvD,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,8CAA8C,EAAE;YACzE,MAAM;SACP,CAAC,CAAC;QACH,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;IAChD,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE;QAC/C,MAAM,EAAE,IAAI;KACb,CAAC,CAAC;IAEH,yDAAyD;IACzD,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CACjC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CACvD,CAAC;IACF,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI;QACzB,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,QAAQ,IAAI,KAAK,CAAC;QACzD,CAAC,CAAC,SAAS,CAAC;IAEd,MAAM,QAAQ,GAAG,KAAK,IAAI,EAAE;QAC1B,IACE,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,QAAQ,CAAC,KAAK,CAAC;YACrC,MAAM,CAAC,IAAI,EAAE,MAAM;YACnB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,EACzB,CAAC;YACD,OAAO,QAAQ,GAAG,QAAQ,CAAC;QAC7B,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChD,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,EAAE;YACtD,MAAM,EAAE,oBAAoB,CAAC,IAAI;YACjC,mBAAmB,EAAE,IAAI;SAC1B,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YAC3D,oBAAoB;YACpB,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,iBAAiB,CAAC,CAAC;QACrE,CAAC;aAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC;YAC1B,OAAO,QAAQ,GAAG,QAAQ,CAAC;QAC7B,CAAC;IACH,CAAC,CAAC;IAEF,MAAM,OAAO,GAAG,MAAM,QAAQ,EAAE,CAAC;IAEjC,GAAG,CAAC,IAAI,CACN,gGAAgG;QAChG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAC1D,CAAC;IAEF,MAAM,GAAG,GAAG,MAAM,8BAA8B,CAC9C,MAAM,EACN,eAAe,EACf,MAAM,EACN;QACE,cAAc;QACd,OAAO;QACP,KAAK,EAAE,OAAO;KACf,CACF,CAAC;IAEF,MAAM,UAAU,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;IAEvC,GAAG,CAAC,IAAI,CAAC,gCAAgC,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC;IAGxE,+EAA+E;IAC/E,IAAI,YAAY,GAA6C,SAAS,CAAC;IAEvE,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,aAAa,CAAC,CAAC;IAEtE,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,GAAG,CAAC,IAAI,CAAC,wDAAwD,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,MAAM,eAAe,CACnC,OAAO,EACP,cAAc,EACd,OAAO,EACP,OAAO,CACR,CAAC;QACF,YAAY,GAAG,EAAE,EAAE,EAAE,OAAO,CAAC,EAAE,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;IACxD,CAAC;IAED,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,GAAG,CAAC,KAAK,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,kBAAkB,GAAG,UAAU,CAAC,aAAa,CAAC,CAAC;IACjE,CAAC;IAED,kCAAkC;IAClC,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE;QACpC,IAAI,EAAE,YAAY,CAAC,EAAE;KACtB,CAAC,CAAC;IAEH,OAAO;QACL,EAAE,EAAE,YAAY,CAAC,EAAE;QACnB,IAAI,EAAE,YAAY,CAAC,IAAI;QACvB,KAAK,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,YAAY,CAAC,IAAI,CAAC;KACxD,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,eAAe,CAC5B,OAA2D,EAC3D,cAAuC,EACvC,OAAgB,EAChB,OAAgB;IAEhB,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAEnC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,YAAY,EAAE,CAAC;IAC7C,MAAM,eAAe,GACnB,MAAM,CAAC,gBAAgB,EAAE,qBAAqB;QAC9C,2BAA2B,CAAC;IAE9B,MAAM,UAAU,GAAG,MAAM,8BAA8B,CACrD,MAAM,EACN,eAAe,EACf,MAAM,EACN;QACE,cAAc;QACd,OAAO,EAAE,OAAO;QAChB,aAAa,EAAE,OAAO,EAAE,aAAa,EAAE,aAAa,IAAI,SAAS;QACjE,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;KACrC,CACF,CAAC;IAEF,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;IAE9C,IAAI,CAAC,UAAU,CAAC,aAAa,EAAE,CAAC;QAC9B,GAAG,CAAC,KAAK,CAAC,4BAA4B,EAAE,UAAU,CAAC,CAAC;QACpD,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAChD,CAAC;IAED,GAAG,CAAC,IAAI,CAAC,2BAA2B,EAAE,UAAU,CAAC,eAAe,CAAC,CAAC;IAClE,MAAM,QAAQ,GAAmC;QAC/C,IAAI,EAAE,UAAU,CAAC,aAAa;QAC9B,WAAW,EAAE,UAAU,CAAC,yBAAyB;QACjD,aAAa,EAAE,UAAU,CAAC,eAAe;QACzC,YAAY,EAAE,UAAU,CAAC,YAAY;QACrC,YAAY,EAAE,UAAU,CAAC,YAAY;KACtC,CAAC;IAEF,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IAEjD,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,17 @@
1
+ import { projectResult } from "../dsl/projections.js";
2
+ import { setupActivity } from "../dsl/setup/ActivityContext.js";
3
+ /**
4
+ * We are using a union type for the status parameter since typescript enums breaks the workflow code generation
5
+ * @param objectId
6
+ * @param status
7
+ */
8
+ export async function getObjectFromStore(payload) {
9
+ const { client, params, objectId } = await setupActivity(payload);
10
+ const obj = await client.objects.retrieve(objectId, params.select);
11
+ const projection = projectResult(payload, params, obj, obj);
12
+ return {
13
+ ...projection,
14
+ id: obj.id,
15
+ };
16
+ }
17
+ //# sourceMappingURL=getObjectFromStore.js.map