@vertesia/workflow 0.52.0 → 0.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. package/package.json +5 -4
  2. package/src/activities/createDocumentFromOther.ts +1 -1
  3. package/src/activities/executeInteraction.ts +66 -39
  4. package/src/activities/extractDocumentText.ts +67 -51
  5. package/src/activities/generateEmbeddings.ts +1 -1
  6. package/src/activities/generateImageRendition.ts +35 -14
  7. package/src/activities/generateOrAssignContentType.ts +52 -26
  8. package/src/activities/getObjectFromStore.ts +1 -1
  9. package/src/activities/handleError.ts +25 -0
  10. package/src/activities/index-dsl.ts +1 -0
  11. package/src/activities/index.ts +0 -1
  12. package/src/activities/media/processPdfWithTextract.ts +4 -4
  13. package/src/activities/media/transcribeMediaWithGladia.ts +1 -1
  14. package/src/activities/setDocumentStatus.ts +1 -1
  15. package/src/conversion/TextractProcessor.ts +9 -9
  16. package/src/conversion/image.ts +8 -2
  17. package/src/conversion/markitdown.ts +41 -0
  18. package/src/conversion/mutool.ts +1 -1
  19. package/src/conversion/pandoc.test.ts +2 -2
  20. package/src/conversion/pandoc.ts +38 -42
  21. package/src/dsl/dsl-workflow.ts +80 -12
  22. package/src/dsl/validation.test.ts +2 -2
  23. package/src/dsl/vars.test.ts +1 -1
  24. package/src/dsl/vars.ts +6 -6
  25. package/src/dsl/workflow-exec-child.test.ts +14 -4
  26. package/src/dsl/workflow-fetch.test.ts +1 -1
  27. package/src/dsl/workflow-import.test.ts +1 -1
  28. package/src/dsl/workflow.test.ts +12 -2
  29. package/src/index.ts +1 -1
  30. package/src/iterative-generation/activities/extractToc.ts +1 -1
  31. package/src/iterative-generation/activities/generatePart.ts +2 -2
  32. package/src/iterative-generation/activities/generateToc.ts +1 -1
  33. package/src/iterative-generation/iterativeGenerationWorkflow.ts +1 -1
  34. package/src/iterative-generation/types.ts +4 -4
  35. package/src/iterative-generation/utils.ts +4 -4
  36. package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js +0 -32
  37. package/lib/cjs/activities/advanced/createDocumentTypeFromInteractionRun.js.map +0 -1
  38. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +0 -72
  39. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +0 -1
  40. package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js +0 -18
  41. package/lib/cjs/activities/advanced/updateDocumentFromInteractionRun.js.map +0 -1
  42. package/lib/cjs/activities/chunkDocument.js +0 -84
  43. package/lib/cjs/activities/chunkDocument.js.map +0 -1
  44. package/lib/cjs/activities/createDocumentFromOther.js +0 -64
  45. package/lib/cjs/activities/createDocumentFromOther.js.map +0 -1
  46. package/lib/cjs/activities/executeInteraction.js +0 -140
  47. package/lib/cjs/activities/executeInteraction.js.map +0 -1
  48. package/lib/cjs/activities/extractDocumentText.js +0 -153
  49. package/lib/cjs/activities/extractDocumentText.js.map +0 -1
  50. package/lib/cjs/activities/generateDocumentProperties.js +0 -77
  51. package/lib/cjs/activities/generateDocumentProperties.js.map +0 -1
  52. package/lib/cjs/activities/generateEmbeddings.js +0 -248
  53. package/lib/cjs/activities/generateEmbeddings.js.map +0 -1
  54. package/lib/cjs/activities/generateImageRendition.js +0 -147
  55. package/lib/cjs/activities/generateImageRendition.js.map +0 -1
  56. package/lib/cjs/activities/generateOrAssignContentType.js +0 -99
  57. package/lib/cjs/activities/generateOrAssignContentType.js.map +0 -1
  58. package/lib/cjs/activities/getObjectFromStore.js +0 -20
  59. package/lib/cjs/activities/getObjectFromStore.js.map +0 -1
  60. package/lib/cjs/activities/index-dsl.js +0 -37
  61. package/lib/cjs/activities/index-dsl.js.map +0 -1
  62. package/lib/cjs/activities/index.js +0 -22
  63. package/lib/cjs/activities/index.js.map +0 -1
  64. package/lib/cjs/activities/media/processPdfWithTextract.js +0 -102
  65. package/lib/cjs/activities/media/processPdfWithTextract.js.map +0 -1
  66. package/lib/cjs/activities/media/transcribeMediaWithGladia.js +0 -51
  67. package/lib/cjs/activities/media/transcribeMediaWithGladia.js.map +0 -1
  68. package/lib/cjs/activities/notifyWebhook.js +0 -34
  69. package/lib/cjs/activities/notifyWebhook.js.map +0 -1
  70. package/lib/cjs/activities/setDocumentStatus.js +0 -15
  71. package/lib/cjs/activities/setDocumentStatus.js.map +0 -1
  72. package/lib/cjs/conversion/TextractProcessor.js +0 -417
  73. package/lib/cjs/conversion/TextractProcessor.js.map +0 -1
  74. package/lib/cjs/conversion/image.js +0 -90
  75. package/lib/cjs/conversion/image.js.map +0 -1
  76. package/lib/cjs/conversion/mutool.js +0 -147
  77. package/lib/cjs/conversion/mutool.js.map +0 -1
  78. package/lib/cjs/conversion/pandoc.js +0 -39
  79. package/lib/cjs/conversion/pandoc.js.map +0 -1
  80. package/lib/cjs/dsl/conditions.js +0 -81
  81. package/lib/cjs/dsl/conditions.js.map +0 -1
  82. package/lib/cjs/dsl/dsl-workflow.js +0 -223
  83. package/lib/cjs/dsl/dsl-workflow.js.map +0 -1
  84. package/lib/cjs/dsl/dslProxyActivities.js +0 -23
  85. package/lib/cjs/dsl/dslProxyActivities.js.map +0 -1
  86. package/lib/cjs/dsl/projections.js +0 -59
  87. package/lib/cjs/dsl/projections.js.map +0 -1
  88. package/lib/cjs/dsl/setup/ActivityContext.js +0 -120
  89. package/lib/cjs/dsl/setup/ActivityContext.js.map +0 -1
  90. package/lib/cjs/dsl/setup/fetch/DataProvider.js +0 -51
  91. package/lib/cjs/dsl/setup/fetch/DataProvider.js.map +0 -1
  92. package/lib/cjs/dsl/setup/fetch/index.js +0 -16
  93. package/lib/cjs/dsl/setup/fetch/index.js.map +0 -1
  94. package/lib/cjs/dsl/setup/fetch/providers.js +0 -67
  95. package/lib/cjs/dsl/setup/fetch/providers.js.map +0 -1
  96. package/lib/cjs/dsl/test/test-child-workflow.js +0 -10
  97. package/lib/cjs/dsl/test/test-child-workflow.js.map +0 -1
  98. package/lib/cjs/dsl/validation.js +0 -122
  99. package/lib/cjs/dsl/validation.js.map +0 -1
  100. package/lib/cjs/dsl/vars.js +0 -341
  101. package/lib/cjs/dsl/vars.js.map +0 -1
  102. package/lib/cjs/dsl/walk.js +0 -100
  103. package/lib/cjs/dsl/walk.js.map +0 -1
  104. package/lib/cjs/dsl.js +0 -20
  105. package/lib/cjs/dsl.js.map +0 -1
  106. package/lib/cjs/errors.js +0 -48
  107. package/lib/cjs/errors.js.map +0 -1
  108. package/lib/cjs/index.js +0 -50
  109. package/lib/cjs/index.js.map +0 -1
  110. package/lib/cjs/iterative-generation/activities/extractToc.js +0 -47
  111. package/lib/cjs/iterative-generation/activities/extractToc.js.map +0 -1
  112. package/lib/cjs/iterative-generation/activities/finalizeOutput.js +0 -69
  113. package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +0 -1
  114. package/lib/cjs/iterative-generation/activities/generatePart.js +0 -73
  115. package/lib/cjs/iterative-generation/activities/generatePart.js.map +0 -1
  116. package/lib/cjs/iterative-generation/activities/generateToc.js +0 -91
  117. package/lib/cjs/iterative-generation/activities/generateToc.js.map +0 -1
  118. package/lib/cjs/iterative-generation/activities/index.js +0 -12
  119. package/lib/cjs/iterative-generation/activities/index.js.map +0 -1
  120. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +0 -56
  121. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +0 -1
  122. package/lib/cjs/iterative-generation/types.js +0 -5
  123. package/lib/cjs/iterative-generation/types.js.map +0 -1
  124. package/lib/cjs/iterative-generation/utils.js +0 -121
  125. package/lib/cjs/iterative-generation/utils.js.map +0 -1
  126. package/lib/cjs/package.json +0 -3
  127. package/lib/cjs/result-types.js +0 -10
  128. package/lib/cjs/result-types.js.map +0 -1
  129. package/lib/cjs/system/notifyWebhookWorkflow.js +0 -47
  130. package/lib/cjs/system/notifyWebhookWorkflow.js.map +0 -1
  131. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +0 -28
  132. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +0 -1
  133. package/lib/cjs/utils/auth.js +0 -15
  134. package/lib/cjs/utils/auth.js.map +0 -1
  135. package/lib/cjs/utils/blobs.js +0 -69
  136. package/lib/cjs/utils/blobs.js.map +0 -1
  137. package/lib/cjs/utils/chunks.js +0 -14
  138. package/lib/cjs/utils/chunks.js.map +0 -1
  139. package/lib/cjs/utils/client.js +0 -26
  140. package/lib/cjs/utils/client.js.map +0 -1
  141. package/lib/cjs/utils/expand-vars.js +0 -33
  142. package/lib/cjs/utils/expand-vars.js.map +0 -1
  143. package/lib/cjs/utils/memory.js +0 -65
  144. package/lib/cjs/utils/memory.js.map +0 -1
  145. package/lib/cjs/utils/tokens.js +0 -38
  146. package/lib/cjs/utils/tokens.js.map +0 -1
  147. package/lib/cjs/vars.js +0 -20
  148. package/lib/cjs/vars.js.map +0 -1
  149. package/lib/cjs/workflows.js +0 -15
  150. package/lib/cjs/workflows.js.map +0 -1
  151. package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js +0 -29
  152. package/lib/esm/activities/advanced/createDocumentTypeFromInteractionRun.js.map +0 -1
  153. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +0 -69
  154. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +0 -1
  155. package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js +0 -15
  156. package/lib/esm/activities/advanced/updateDocumentFromInteractionRun.js.map +0 -1
  157. package/lib/esm/activities/chunkDocument.js +0 -81
  158. package/lib/esm/activities/chunkDocument.js.map +0 -1
  159. package/lib/esm/activities/createDocumentFromOther.js +0 -58
  160. package/lib/esm/activities/createDocumentFromOther.js.map +0 -1
  161. package/lib/esm/activities/executeInteraction.js +0 -136
  162. package/lib/esm/activities/executeInteraction.js.map +0 -1
  163. package/lib/esm/activities/extractDocumentText.js +0 -150
  164. package/lib/esm/activities/extractDocumentText.js.map +0 -1
  165. package/lib/esm/activities/generateDocumentProperties.js +0 -74
  166. package/lib/esm/activities/generateDocumentProperties.js.map +0 -1
  167. package/lib/esm/activities/generateEmbeddings.js +0 -245
  168. package/lib/esm/activities/generateEmbeddings.js.map +0 -1
  169. package/lib/esm/activities/generateImageRendition.js +0 -141
  170. package/lib/esm/activities/generateImageRendition.js.map +0 -1
  171. package/lib/esm/activities/generateOrAssignContentType.js +0 -96
  172. package/lib/esm/activities/generateOrAssignContentType.js.map +0 -1
  173. package/lib/esm/activities/getObjectFromStore.js +0 -17
  174. package/lib/esm/activities/getObjectFromStore.js.map +0 -1
  175. package/lib/esm/activities/index-dsl.js +0 -18
  176. package/lib/esm/activities/index-dsl.js.map +0 -1
  177. package/lib/esm/activities/index.js +0 -6
  178. package/lib/esm/activities/index.js.map +0 -1
  179. package/lib/esm/activities/media/processPdfWithTextract.js +0 -98
  180. package/lib/esm/activities/media/processPdfWithTextract.js.map +0 -1
  181. package/lib/esm/activities/media/transcribeMediaWithGladia.js +0 -48
  182. package/lib/esm/activities/media/transcribeMediaWithGladia.js.map +0 -1
  183. package/lib/esm/activities/notifyWebhook.js +0 -31
  184. package/lib/esm/activities/notifyWebhook.js.map +0 -1
  185. package/lib/esm/activities/setDocumentStatus.js +0 -12
  186. package/lib/esm/activities/setDocumentStatus.js.map +0 -1
  187. package/lib/esm/conversion/TextractProcessor.js +0 -410
  188. package/lib/esm/conversion/TextractProcessor.js.map +0 -1
  189. package/lib/esm/conversion/image.js +0 -84
  190. package/lib/esm/conversion/image.js.map +0 -1
  191. package/lib/esm/conversion/mutool.js +0 -139
  192. package/lib/esm/conversion/mutool.js.map +0 -1
  193. package/lib/esm/conversion/pandoc.js +0 -36
  194. package/lib/esm/conversion/pandoc.js.map +0 -1
  195. package/lib/esm/dsl/conditions.js +0 -75
  196. package/lib/esm/dsl/conditions.js.map +0 -1
  197. package/lib/esm/dsl/dsl-workflow.js +0 -216
  198. package/lib/esm/dsl/dsl-workflow.js.map +0 -1
  199. package/lib/esm/dsl/dslProxyActivities.js +0 -20
  200. package/lib/esm/dsl/dslProxyActivities.js.map +0 -1
  201. package/lib/esm/dsl/projections.js +0 -55
  202. package/lib/esm/dsl/projections.js.map +0 -1
  203. package/lib/esm/dsl/setup/ActivityContext.js +0 -115
  204. package/lib/esm/dsl/setup/ActivityContext.js.map +0 -1
  205. package/lib/esm/dsl/setup/fetch/DataProvider.js +0 -47
  206. package/lib/esm/dsl/setup/fetch/DataProvider.js.map +0 -1
  207. package/lib/esm/dsl/setup/fetch/index.js +0 -12
  208. package/lib/esm/dsl/setup/fetch/index.js.map +0 -1
  209. package/lib/esm/dsl/setup/fetch/providers.js +0 -61
  210. package/lib/esm/dsl/setup/fetch/providers.js.map +0 -1
  211. package/lib/esm/dsl/test/test-child-workflow.js +0 -5
  212. package/lib/esm/dsl/test/test-child-workflow.js.map +0 -1
  213. package/lib/esm/dsl/validation.js +0 -118
  214. package/lib/esm/dsl/validation.js.map +0 -1
  215. package/lib/esm/dsl/vars.js +0 -335
  216. package/lib/esm/dsl/vars.js.map +0 -1
  217. package/lib/esm/dsl/walk.js +0 -96
  218. package/lib/esm/dsl/walk.js.map +0 -1
  219. package/lib/esm/dsl.js +0 -4
  220. package/lib/esm/dsl.js.map +0 -1
  221. package/lib/esm/errors.js +0 -41
  222. package/lib/esm/errors.js.map +0 -1
  223. package/lib/esm/index.js +0 -32
  224. package/lib/esm/index.js.map +0 -1
  225. package/lib/esm/iterative-generation/activities/extractToc.js +0 -44
  226. package/lib/esm/iterative-generation/activities/extractToc.js.map +0 -1
  227. package/lib/esm/iterative-generation/activities/finalizeOutput.js +0 -66
  228. package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +0 -1
  229. package/lib/esm/iterative-generation/activities/generatePart.js +0 -70
  230. package/lib/esm/iterative-generation/activities/generatePart.js.map +0 -1
  231. package/lib/esm/iterative-generation/activities/generateToc.js +0 -88
  232. package/lib/esm/iterative-generation/activities/generateToc.js.map +0 -1
  233. package/lib/esm/iterative-generation/activities/index.js +0 -5
  234. package/lib/esm/iterative-generation/activities/index.js.map +0 -1
  235. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +0 -53
  236. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +0 -1
  237. package/lib/esm/iterative-generation/types.js +0 -2
  238. package/lib/esm/iterative-generation/types.js.map +0 -1
  239. package/lib/esm/iterative-generation/utils.js +0 -112
  240. package/lib/esm/iterative-generation/utils.js.map +0 -1
  241. package/lib/esm/result-types.js +0 -7
  242. package/lib/esm/result-types.js.map +0 -1
  243. package/lib/esm/system/notifyWebhookWorkflow.js +0 -44
  244. package/lib/esm/system/notifyWebhookWorkflow.js.map +0 -1
  245. package/lib/esm/system/recalculateEmbeddingsWorkflow.js +0 -25
  246. package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +0 -1
  247. package/lib/esm/utils/auth.js +0 -8
  248. package/lib/esm/utils/auth.js.map +0 -1
  249. package/lib/esm/utils/blobs.js +0 -58
  250. package/lib/esm/utils/blobs.js.map +0 -1
  251. package/lib/esm/utils/chunks.js +0 -9
  252. package/lib/esm/utils/chunks.js.map +0 -1
  253. package/lib/esm/utils/client.js +0 -23
  254. package/lib/esm/utils/client.js.map +0 -1
  255. package/lib/esm/utils/expand-vars.js +0 -30
  256. package/lib/esm/utils/expand-vars.js.map +0 -1
  257. package/lib/esm/utils/memory.js +0 -55
  258. package/lib/esm/utils/memory.js.map +0 -1
  259. package/lib/esm/utils/tokens.js +0 -34
  260. package/lib/esm/utils/tokens.js.map +0 -1
  261. package/lib/esm/vars.js +0 -4
  262. package/lib/esm/vars.js.map +0 -1
  263. package/lib/esm/workflows.js +0 -8
  264. package/lib/esm/workflows.js.map +0 -1
  265. package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts +0 -17
  266. package/lib/types/activities/advanced/createDocumentTypeFromInteractionRun.d.ts.map +0 -1
  267. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +0 -39
  268. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +0 -1
  269. package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts +0 -19
  270. package/lib/types/activities/advanced/updateDocumentFromInteractionRun.d.ts.map +0 -1
  271. package/lib/types/activities/chunkDocument.d.ts +0 -33
  272. package/lib/types/activities/chunkDocument.d.ts.map +0 -1
  273. package/lib/types/activities/createDocumentFromOther.d.ts +0 -21
  274. package/lib/types/activities/createDocumentFromOther.d.ts.map +0 -1
  275. package/lib/types/activities/executeInteraction.d.ts +0 -55
  276. package/lib/types/activities/executeInteraction.d.ts.map +0 -1
  277. package/lib/types/activities/extractDocumentText.d.ts +0 -10
  278. package/lib/types/activities/extractDocumentText.d.ts.map +0 -1
  279. package/lib/types/activities/generateDocumentProperties.d.ts +0 -32
  280. package/lib/types/activities/generateDocumentProperties.d.ts.map +0 -1
  281. package/lib/types/activities/generateEmbeddings.d.ts +0 -53
  282. package/lib/types/activities/generateEmbeddings.d.ts.map +0 -1
  283. package/lib/types/activities/generateImageRendition.d.ts +0 -15
  284. package/lib/types/activities/generateImageRendition.d.ts.map +0 -1
  285. package/lib/types/activities/generateOrAssignContentType.d.ts +0 -44
  286. package/lib/types/activities/generateOrAssignContentType.d.ts.map +0 -1
  287. package/lib/types/activities/getObjectFromStore.d.ts +0 -14
  288. package/lib/types/activities/getObjectFromStore.d.ts.map +0 -1
  289. package/lib/types/activities/index-dsl.d.ts +0 -17
  290. package/lib/types/activities/index-dsl.d.ts.map +0 -1
  291. package/lib/types/activities/index.d.ts +0 -6
  292. package/lib/types/activities/index.d.ts.map +0 -1
  293. package/lib/types/activities/media/processPdfWithTextract.d.ts +0 -26
  294. package/lib/types/activities/media/processPdfWithTextract.d.ts.map +0 -1
  295. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts +0 -14
  296. package/lib/types/activities/media/transcribeMediaWithGladia.d.ts.map +0 -1
  297. package/lib/types/activities/notifyWebhook.d.ts +0 -16
  298. package/lib/types/activities/notifyWebhook.d.ts.map +0 -1
  299. package/lib/types/activities/setDocumentStatus.d.ts +0 -15
  300. package/lib/types/activities/setDocumentStatus.d.ts.map +0 -1
  301. package/lib/types/conversion/TextractProcessor.d.ts +0 -45
  302. package/lib/types/conversion/TextractProcessor.d.ts.map +0 -1
  303. package/lib/types/conversion/image.d.ts +0 -11
  304. package/lib/types/conversion/image.d.ts.map +0 -1
  305. package/lib/types/conversion/mutool.d.ts +0 -19
  306. package/lib/types/conversion/mutool.d.ts.map +0 -1
  307. package/lib/types/conversion/pandoc.d.ts +0 -2
  308. package/lib/types/conversion/pandoc.d.ts.map +0 -1
  309. package/lib/types/dsl/conditions.d.ts +0 -2
  310. package/lib/types/dsl/conditions.d.ts.map +0 -1
  311. package/lib/types/dsl/dsl-workflow.d.ts +0 -5
  312. package/lib/types/dsl/dsl-workflow.d.ts.map +0 -1
  313. package/lib/types/dsl/dslProxyActivities.d.ts +0 -10
  314. package/lib/types/dsl/dslProxyActivities.d.ts.map +0 -1
  315. package/lib/types/dsl/projections.d.ts +0 -4
  316. package/lib/types/dsl/projections.d.ts.map +0 -1
  317. package/lib/types/dsl/setup/ActivityContext.d.ts +0 -17
  318. package/lib/types/dsl/setup/ActivityContext.d.ts.map +0 -1
  319. package/lib/types/dsl/setup/fetch/DataProvider.d.ts +0 -9
  320. package/lib/types/dsl/setup/fetch/DataProvider.d.ts.map +0 -1
  321. package/lib/types/dsl/setup/fetch/index.d.ts +0 -6
  322. package/lib/types/dsl/setup/fetch/index.d.ts.map +0 -1
  323. package/lib/types/dsl/setup/fetch/providers.d.ts +0 -25
  324. package/lib/types/dsl/setup/fetch/providers.d.ts.map +0 -1
  325. package/lib/types/dsl/test/test-child-workflow.d.ts +0 -4
  326. package/lib/types/dsl/test/test-child-workflow.d.ts.map +0 -1
  327. package/lib/types/dsl/validation.d.ts +0 -4
  328. package/lib/types/dsl/validation.d.ts.map +0 -1
  329. package/lib/types/dsl/vars.d.ts +0 -48
  330. package/lib/types/dsl/vars.d.ts.map +0 -1
  331. package/lib/types/dsl/walk.d.ts +0 -18
  332. package/lib/types/dsl/walk.d.ts.map +0 -1
  333. package/lib/types/dsl.d.ts +0 -4
  334. package/lib/types/dsl.d.ts.map +0 -1
  335. package/lib/types/errors.d.ts +0 -22
  336. package/lib/types/errors.d.ts.map +0 -1
  337. package/lib/types/index.d.ts +0 -31
  338. package/lib/types/index.d.ts.map +0 -1
  339. package/lib/types/iterative-generation/activities/extractToc.d.ts +0 -10
  340. package/lib/types/iterative-generation/activities/extractToc.d.ts.map +0 -1
  341. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts +0 -3
  342. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +0 -1
  343. package/lib/types/iterative-generation/activities/generatePart.d.ts +0 -3
  344. package/lib/types/iterative-generation/activities/generatePart.d.ts.map +0 -1
  345. package/lib/types/iterative-generation/activities/generateToc.d.ts +0 -4
  346. package/lib/types/iterative-generation/activities/generateToc.d.ts.map +0 -1
  347. package/lib/types/iterative-generation/activities/index.d.ts +0 -5
  348. package/lib/types/iterative-generation/activities/index.d.ts.map +0 -1
  349. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts +0 -3
  350. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +0 -1
  351. package/lib/types/iterative-generation/types.d.ts +0 -79
  352. package/lib/types/iterative-generation/types.d.ts.map +0 -1
  353. package/lib/types/iterative-generation/utils.d.ts +0 -27
  354. package/lib/types/iterative-generation/utils.d.ts.map +0 -1
  355. package/lib/types/result-types.d.ts +0 -22
  356. package/lib/types/result-types.d.ts.map +0 -1
  357. package/lib/types/system/notifyWebhookWorkflow.d.ts +0 -3
  358. package/lib/types/system/notifyWebhookWorkflow.d.ts.map +0 -1
  359. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +0 -25
  360. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +0 -1
  361. package/lib/types/utils/auth.d.ts +0 -4
  362. package/lib/types/utils/auth.d.ts.map +0 -1
  363. package/lib/types/utils/blobs.d.ts +0 -8
  364. package/lib/types/utils/blobs.d.ts.map +0 -1
  365. package/lib/types/utils/chunks.d.ts +0 -9
  366. package/lib/types/utils/chunks.d.ts.map +0 -1
  367. package/lib/types/utils/client.d.ts +0 -7
  368. package/lib/types/utils/client.d.ts.map +0 -1
  369. package/lib/types/utils/expand-vars.d.ts +0 -8
  370. package/lib/types/utils/expand-vars.d.ts.map +0 -1
  371. package/lib/types/utils/memory.d.ts +0 -8
  372. package/lib/types/utils/memory.d.ts.map +0 -1
  373. package/lib/types/utils/tokens.d.ts +0 -11
  374. package/lib/types/utils/tokens.d.ts.map +0 -1
  375. package/lib/types/vars.d.ts +0 -3
  376. package/lib/types/vars.d.ts.map +0 -1
  377. package/lib/types/workflows.d.ts +0 -8
  378. package/lib/types/workflows.d.ts.map +0 -1
  379. package/lib/workflows-bundle.js +0 -20689
@@ -1,140 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.executeInteraction = executeInteraction;
4
- exports.executeInteractionFromActivity = executeInteractionFromActivity;
5
- const activity_1 = require("@temporalio/activity");
6
- const common_1 = require("@vertesia/common");
7
- const projections_js_1 = require("../dsl/projections.js");
8
- const ActivityContext_js_1 = require("../dsl/setup/ActivityContext.js");
9
- const errors_js_1 = require("../errors.js");
10
- const tokens_js_1 = require("../utils/tokens.js");
11
- //Example:
12
- //@ts-ignore
13
- const JSON = {
14
- name: 'executeInteraction',
15
- import: ["defaultModel", "guidlineId", "docTypeId"],
16
- params: {
17
- defaultModel: "${model}",
18
- interactionName: "GenerateSummary",
19
- model: "${defaultModel ?? 'gpt4'}",
20
- environment: "13456",
21
- max_tokens: 100,
22
- temperature: 0.5,
23
- tags: ["test"],
24
- result_schema: "${docType.object_schema}",
25
- prompt_data: {
26
- documents: "${documents}",
27
- guidline: "${guidline.text}"
28
- }
29
- },
30
- fetch: {
31
- documents: {
32
- type: "document",
33
- query: {
34
- id: { $in: "${objectIds}" },
35
- },
36
- select: "+text",
37
- },
38
- guidline: {
39
- type: "document",
40
- limit: 1,
41
- query: {
42
- id: "${guidlineId}",
43
- },
44
- select: "+text",
45
- on_not_found: "throw"
46
- },
47
- docType: {
48
- type: "document_type",
49
- limit: 1,
50
- query: {
51
- id: "${docTypeId}",
52
- },
53
- select: "+object_schema",
54
- }
55
- }
56
- };
57
- async function executeInteraction(payload) {
58
- const { client, params } = await (0, ActivityContext_js_1.setupActivity)(payload);
59
- const { interactionName, prompt_data, static_prompt_data: wf_prompt_data } = params;
60
- if (wf_prompt_data) {
61
- Object.assign(prompt_data, wf_prompt_data);
62
- }
63
- if (!interactionName) {
64
- activity_1.log.error("Missing interactionName", { params });
65
- throw new errors_js_1.ActivityParamNotFound("interactionName", payload.activity);
66
- }
67
- if (params.truncate) {
68
- const truncate = params.truncate;
69
- for (const [key, value] of Object.entries(truncate)) {
70
- prompt_data[key] = (0, tokens_js_1.truncByMaxTokens)(prompt_data[key], value);
71
- }
72
- }
73
- const res = await executeInteractionFromActivity(client, interactionName, params, prompt_data, payload.debug_mode);
74
- return (0, projections_js_1.projectResult)(payload, params, res, {
75
- runId: res.id,
76
- status: res.status,
77
- result: res.result,
78
- });
79
- }
80
- async function executeInteractionFromActivity(client, interactionName, params, prompt_data, debug) {
81
- const userTags = params.tags;
82
- const info = (0, activity_1.activityInfo)();
83
- const runId = info.workflowExecution.runId;
84
- let tags = ["workflow", `tmpRunId:${runId}`]; //TODO use wf:wfName
85
- if (userTags) {
86
- tags = tags.concat(userTags);
87
- }
88
- let previousStudioExecutionRun = undefined;
89
- if (params.include_previous_error) {
90
- //retrieve last failed run if any
91
- if (info.attempt > 1) {
92
- activity_1.log.info("Retrying, searching for previous run", { tags: ["tmpRunId:" + runId] });
93
- const payload = {
94
- query: { tags: ["tmpRunId:" + info.workflowExecution.runId] },
95
- limit: 1,
96
- };
97
- const previousRun = await client.runs.search(payload).then((res) => {
98
- activity_1.log.info("Search results", { results: res });
99
- return res ? res[0] ?? undefined : undefined;
100
- });
101
- if (previousRun) {
102
- activity_1.log.info("Found previous run", { previousRun });
103
- previousStudioExecutionRun = await client.runs.retrieve(previousRun.id);
104
- }
105
- }
106
- }
107
- if (debug && previousStudioExecutionRun?.error) {
108
- activity_1.log.info(`Found previous run error`, { error: previousStudioExecutionRun?.error });
109
- }
110
- const config = {
111
- environment: params.environment,
112
- model: params.model,
113
- model_options: params.model_options,
114
- };
115
- const data = {
116
- ...prompt_data,
117
- previous_error: previousStudioExecutionRun?.error,
118
- };
119
- const result_schema = params.result_schema;
120
- activity_1.log.debug(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags });
121
- const res = await client.interactions.executeByName(interactionName, {
122
- config,
123
- data,
124
- result_schema,
125
- tags,
126
- stream: false,
127
- }).catch((err) => {
128
- activity_1.log.error(`Error executing interaction ${interactionName}`, { err });
129
- throw new Error(`Interaction Execution failed ${interactionName}: ${err.message}`);
130
- });
131
- if (debug) {
132
- activity_1.log.info(`Interaction executed ${interactionName}`, res);
133
- }
134
- if (res.error || res.status === common_1.ExecutionRunStatus.failed) {
135
- activity_1.log.error(`Error executing interaction ${interactionName}`, { error: res.error });
136
- throw new Error(`Interaction Execution failed ${interactionName}: ${res.error}`);
137
- }
138
- return res;
139
- }
140
- //# sourceMappingURL=executeInteraction.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"executeInteraction.js","sourceRoot":"","sources":["../../../src/activities/executeInteraction.ts"],"names":[],"mappings":";;AA+GA,gDA8BC;AAED,wEAoEC;AAlND,mDAAyD;AAEzD,6CAAuK;AACvK,0DAAsD;AACtD,wEAAgE;AAChE,4CAAqD;AACrD,kDAAoE;AAEpE,UAAU;AACV,YAAY;AACZ,MAAM,IAAI,GAAoB;IAC1B,IAAI,EAAE,oBAAoB;IAC1B,MAAM,EAAE,CAAC,cAAc,EAAE,YAAY,EAAE,WAAW,CAAC;IACnD,MAAM,EAAE;QACJ,YAAY,EAAE,UAAU;QACxB,eAAe,EAAE,iBAAiB;QAClC,KAAK,EAAE,2BAA2B;QAClC,WAAW,EAAE,OAAO;QACpB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,GAAG;QAChB,IAAI,EAAE,CAAC,MAAM,CAAC;QACd,aAAa,EAAE,0BAA0B;QACzC,WAAW,EAAE;YACT,SAAS,EAAE,cAAc;YACzB,QAAQ,EAAE,kBAAkB;SAC/B;KACJ;IACD,KAAK,EAAE;QACH,SAAS,EAAE;YACP,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE;gBACH,EAAE,EAAE,EAAE,GAAG,EAAE,cAAc,EAAE;aAC9B;YACD,MAAM,EAAE,OAAO;SAClB;QACD,QAAQ,EAAE;YACN,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,CAAC;YACR,KAAK,EAAE;gBACH,EAAE,EAAE,eAAe;aACtB;YACD,MAAM,EAAE,OAAO;YACf,YAAY,EAAE,OAAO;SACxB;QACD,OAAO,EAAE;YACL,IAAI,EAAE,eAAe;YACrB,KAAK,EAAE,CAAC;YACR,KAAK,EAAE;gBACH,EAAE,EAAE,cAAc;aACrB;YACD,MAAM,EAAE,gBAAgB;SAC3B;KACJ;CACJ,CAAA;AAyDM,KAAK,UAAU,kBAAkB,CAAC,OAA8D;IACnG,MAAM,EACF,MAAM,EAAE,MAAM,EACjB,GAAG,MAAM,IAAA,kCAAa,EAA2B,OAAO,CAAC,CAAC;IAE3D,MAAM,EAAE,eAAe,EAAE,WAAW,EAAE,kBAAkB,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC;IACpF,IAAI,cAAc,EAAE,CAAC;QACjB,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAC/C,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACnB,cAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;QACjD,MAAM,IAAI,iCAAqB,CAAC,iBAAiB,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACzE,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QAClB,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QACjC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClD,WAAW,CAAC,GAAG,CAAC,GAAG,IAAA,4BAAgB,EAAC,WAAW,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC;QACjE,CAAC;IACL,CAAC;IAED,MAAM,GAAG,GAAG,MAAM,8BAA8B,CAAC,MAAM,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;IAEnH,OAAO,IAAA,8BAAa,EAAC,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE;QACvC,KAAK,EAAE,GAAG,CAAC,EAAE;QACb,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,MAAM,EAAE,GAAG,CAAC,MAAM;KACrB,CAAC,CAAC;AAEP,CAAC;AAEM,KAAK,UAAU,8BAA8B,CAAC,MAAsB,EAAE,eAAuB,EAAE,MAAkC,EAAE,WAAgB,EAAE,KAAe;IACvK,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC;IAC7B,MAAM,IAAI,GAAG,IAAA,uBAAY,GAAE,CAAC;IAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC;IAC3C,IAAI,IAAI,GAAG,CAAC,UAAU,EAAE,YAAY,KAAK,EAAE,CAAC,CAAC,CAAC,oBAAoB;IAClE,IAAI,QAAQ,EAAE,CAAC;QACX,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,0BAA0B,GAA6B,SAAS,CAAC;IACrE,IAAI,MAAM,CAAC,sBAAsB,EAAE,CAAC;QAChC,iCAAiC;QACjC,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACnB,cAAG,CAAC,IAAI,CAAC,sCAAsC,EAAE,EAAE,IAAI,EAAE,CAAC,WAAW,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC;YAClF,MAAM,OAAO,GAAqB;gBAC9B,KAAK,EAAE,EAAE,IAAI,EAAE,CAAC,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,EAAE;gBAC7D,KAAK,EAAE,CAAC;aACX,CAAC;YACF,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;gBAC/D,cAAG,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,CAAC;gBAC7C,OAAO,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,SAAS,CAAA;YAChD,CAAC,CAAC,CAAC;YAEH,IAAI,WAAW,EAAE,CAAC;gBACd,cAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;gBAChD,0BAA0B,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YAC5E,CAAC;QACL,CAAC;IACL,CAAC;IACD,IAAI,KAAK,IAAI,0BAA0B,EAAE,KAAK,EAAE,CAAC;QAC7C,cAAG,CAAC,IAAI,CAAC,2BAA2B,EAAE,EAAE,KAAK,EAAE,0BAA0B,EAAE,KAAK,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,MAAM,MAAM,GAAsC;QAC9C,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,aAAa,EAAE,MAAM,CAAC,aAAa;KACtC,CAAA;IACD,MAAM,IAAI,GAAG;QACT,GAAG,WAAW;QACd,cAAc,EAAE,0BAA0B,EAAE,KAAK;KACpD,CAAA;IAED,MAAM,aAAa,GAAG,MAAM,CAAC,aAAa,CAAC;IAE3C,cAAG,CAAC,KAAK,CAAC,gCAAgC,eAAe,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IAEpG,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,aAAa,CAAC,eAAe,EAAE;QACjE,MAAM;QACN,IAAI;QACJ,aAAa;QACb,IAAI;QACJ,MAAM,EAAE,KAAK;KAChB,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACb,cAAG,CAAC,KAAK,CAAC,+BAA+B,eAAe,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QACrE,MAAM,IAAI,KAAK,CAAC,gCAAgC,eAAe,KAAK,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IACvF,CAAC,CAAC,CAAC;IAEH,IAAI,KAAK,EAAE,CAAC;QACR,cAAG,CAAC,IAAI,CAAC,wBAAwB,eAAe,EAAE,EAAE,GAAG,CAAC,CAAC;IAC7D,CAAC;IAED,IAAI,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC,MAAM,KAAK,2BAAkB,CAAC,MAAM,EAAE,CAAC;QACxD,cAAG,CAAC,KAAK,CAAC,+BAA+B,eAAe,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;QAClF,MAAM,IAAI,KAAK,CAAC,gCAAgC,eAAe,KAAK,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;IACrF,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
@@ -1,153 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.extractDocumentText = extractDocumentText;
4
- const activity_1 = require("@temporalio/activity");
5
- const mutool_js_1 = require("../conversion/mutool.js");
6
- const pandoc_js_1 = require("../conversion/pandoc.js");
7
- const ActivityContext_js_1 = require("../dsl/setup/ActivityContext.js");
8
- const errors_js_1 = require("../errors.js");
9
- const result_types_js_1 = require("../result-types.js");
10
- const blobs_js_1 = require("../utils/blobs.js");
11
- const tokens_js_1 = require("../utils/tokens.js");
12
- //@ts-ignore
13
- const JSON = {
14
- name: 'extractDocumentText',
15
- };
16
- ;
17
- async function extractDocumentText(payload) {
18
- const { client, objectId } = await (0, ActivityContext_js_1.setupActivity)(payload);
19
- const r = await client.objects.find({
20
- query: { _id: objectId },
21
- limit: 1,
22
- select: "+text"
23
- });
24
- const doc = r[0];
25
- if (!doc) {
26
- activity_1.log.error(`Document ${objectId} not found`);
27
- throw new errors_js_1.NoDocumentFound(`Document ${objectId} not found`, payload.objectIds);
28
- }
29
- activity_1.log.info(`Extracting text for object ${doc.id}`);
30
- if (!doc.content?.type || !doc.content?.source) {
31
- if (doc.text) {
32
- return createResponse(doc, doc.text, result_types_js_1.TextExtractionStatus.skipped, "Text present and no source or type");
33
- }
34
- else {
35
- return createResponse(doc, "", result_types_js_1.TextExtractionStatus.error, "No source or type found");
36
- }
37
- }
38
- //skip if text already extracted and proper etag
39
- if (doc.text && doc.text.length > 0 && doc.text_etag === doc.content.etag) {
40
- return createResponse(doc, doc.text, result_types_js_1.TextExtractionStatus.skipped, "Text already extracted");
41
- }
42
- let fileBuffer;
43
- try {
44
- fileBuffer = await (0, blobs_js_1.fetchBlobAsBuffer)(client, doc.content.source);
45
- }
46
- catch (e) {
47
- activity_1.log.error(`Error reading file: ${e}`);
48
- return createResponse(doc, "", result_types_js_1.TextExtractionStatus.error, e.message);
49
- }
50
- let txt;
51
- switch (doc.content.type) {
52
- case 'application/pdf':
53
- //if pdf is more than 2MB, use mutool
54
- txt = await (0, mutool_js_1.mutoolPdfToText)(fileBuffer);
55
- break;
56
- case 'text/plain':
57
- txt = fileBuffer.toString('utf8');
58
- break;
59
- //docx
60
- case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
61
- txt = await (0, pandoc_js_1.manyToMarkdown)(fileBuffer, 'docx');
62
- break;
63
- //html
64
- case 'text/html':
65
- txt = await (0, pandoc_js_1.manyToMarkdown)(fileBuffer, 'html');
66
- break;
67
- //opendocument
68
- case 'application/vnd.oasis.opendocument.text':
69
- txt = await (0, pandoc_js_1.manyToMarkdown)(fileBuffer, 'odt');
70
- break;
71
- //rtf
72
- case 'application/rtf':
73
- txt = await (0, pandoc_js_1.manyToMarkdown)(fileBuffer, 'rtf');
74
- break;
75
- //markdown
76
- case 'text/markdown':
77
- txt = fileBuffer.toString('utf8');
78
- break;
79
- //csv
80
- case 'text/csv':
81
- txt = fileBuffer.toString('utf8');
82
- break;
83
- //typescript
84
- case 'application/typescript':
85
- txt = fileBuffer.toString('utf8');
86
- break;
87
- //javascript
88
- case 'application/javascript':
89
- txt = fileBuffer.toString('utf8');
90
- break;
91
- //json
92
- case 'application/json':
93
- txt = fileBuffer.toString('utf8');
94
- break;
95
- default:
96
- if (sniffIfText(fileBuffer)) {
97
- txt = fileBuffer.toString('utf8'); //TODO: add charset detection
98
- break;
99
- }
100
- return createResponse(doc, doc.text ?? '', result_types_js_1.TextExtractionStatus.skipped, `Unsupported mime type: ${doc.content.type}`);
101
- }
102
- const tokensData = (0, tokens_js_1.countTokens)(txt);
103
- const etag = doc.content.etag ?? (0, blobs_js_1.md5)(txt);
104
- const updateData = {
105
- text: txt,
106
- text_etag: etag,
107
- tokens: {
108
- ...tokensData,
109
- etag: etag,
110
- }
111
- };
112
- await client.objects.update(doc.id, updateData);
113
- return createResponse(doc, txt, result_types_js_1.TextExtractionStatus.success);
114
- }
115
- function createResponse(doc, text, status, message) {
116
- return {
117
- status,
118
- message,
119
- tokens: doc.tokens,
120
- len: text.length,
121
- objectId: doc.id,
122
- hasText: !!text,
123
- };
124
- }
125
- function sniffIfText(buf) {
126
- // If file is too large, don't even try
127
- if (buf.length > 500 * 1024) {
128
- return false;
129
- }
130
- // Count binary/control characters
131
- let binaryCount = 0;
132
- const sampleSize = Math.min(buf.length, 1000); // Check first 1000 bytes
133
- for (let i = 0; i < sampleSize; i++) {
134
- // Count control characters (except common whitespace)
135
- const byte = buf[i];
136
- if ((byte < 32 && ![9, 10, 13].includes(byte)) || byte === 0) {
137
- binaryCount++;
138
- }
139
- }
140
- // If more than 10% binary/control chars, probably not text
141
- if (binaryCount / sampleSize > 0.1) {
142
- return false;
143
- }
144
- // Additional check for valid UTF-8 encoding
145
- try {
146
- const s = buf.toString('utf8');
147
- return s.length > 0 && !s.includes('\uFFFD'); // Replacement character
148
- }
149
- catch (e) {
150
- return false;
151
- }
152
- }
153
- //# sourceMappingURL=extractDocumentText.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"extractDocumentText.js","sourceRoot":"","sources":["../../../src/activities/extractDocumentText.ts"],"names":[],"mappings":";;AAsBA,kDAyHC;AA/ID,mDAA2C;AAE3C,uDAA0D;AAC1D,uDAAyD;AACzD,wEAAgE;AAChE,4CAA+C;AAC/C,wDAAgF;AAChF,gDAA2D;AAC3D,kDAAiD;AAEjD,YAAY;AACZ,MAAM,IAAI,GAAoB;IAC1B,IAAI,EAAE,qBAAqB;CAC9B,CAAA;AAG6C,CAAC;AAMxC,KAAK,UAAU,mBAAmB,CAAC,OAA+D;IACrG,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAA,kCAAa,EAAC,OAAO,CAAC,CAAC;IAE1D,MAAM,CAAC,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;QAChC,KAAK,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE;QACxB,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,OAAO;KAClB,CAAC,CAAA;IACF,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAkB,CAAC;IAClC,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,cAAG,CAAC,KAAK,CAAC,YAAY,QAAQ,YAAY,CAAC,CAAC;QAC5C,MAAM,IAAI,2BAAe,CAAC,YAAY,QAAQ,YAAY,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IACnF,CAAC;IAED,cAAG,CAAC,IAAI,CAAC,8BAA8B,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;IAGjD,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAC7C,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE,sCAAoB,CAAC,OAAO,EAAE,oCAAoC,CAAC,CAAC;QAC7G,CAAC;aAAM,CAAC;YACJ,OAAO,cAAc,CAAC,GAAG,EAAE,EAAE,EAAE,sCAAoB,CAAC,KAAK,EAAE,yBAAyB,CAAC,CAAC;QAC1F,CAAC;IACL,CAAC;IAED,gDAAgD;IAChD,IAAI,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,GAAG,CAAC,SAAS,KAAK,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACxE,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE,sCAAoB,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC;IACjG,CAAC;IAED,IAAI,UAAkB,CAAC;IACvB,IAAI,CAAC;QACD,UAAU,GAAG,MAAM,IAAA,4BAAiB,EAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACrE,CAAC;IAAC,OAAO,CAAM,EAAE,CAAC;QACd,cAAG,CAAC,KAAK,CAAC,uBAAuB,CAAC,EAAE,CAAC,CAAC;QACtC,OAAO,cAAc,CAAC,GAAG,EAAE,EAAE,EAAE,sCAAoB,CAAC,KAAK,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;IAC1E,CAAC;IAGD,IAAI,GAAW,CAAC;IAEhB,QAAQ,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QAEvB,KAAK,iBAAiB;YAClB,qCAAqC;YACrC,GAAG,GAAG,MAAM,IAAA,2BAAe,EAAC,UAAU,CAAC,CAAC;YACxC,MAAM;QAEV,KAAK,YAAY;YACb,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAA;YACjC,MAAM;QAEV,MAAM;QACN,KAAK,yEAAyE;YAC1E,GAAG,GAAG,MAAM,IAAA,0BAAc,EAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM;QAEV,MAAM;QACN,KAAK,WAAW;YACZ,GAAG,GAAG,MAAM,IAAA,0BAAc,EAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM;QAEV,cAAc;QACd,KAAK,yCAAyC;YAC1C,GAAG,GAAG,MAAM,IAAA,0BAAc,EAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAC9C,MAAM;QAEV,KAAK;QACL,KAAK,iBAAiB;YAClB,GAAG,GAAG,MAAM,IAAA,0BAAc,EAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAC9C,MAAM;QAEV,UAAU;QACV,KAAK,eAAe;YAChB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,KAAK;QACL,KAAK,UAAU;YACX,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,YAAY;QACZ,KAAK,wBAAwB;YACzB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,YAAY;QACZ,KAAK,wBAAwB;YACzB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV,MAAM;QACN,KAAK,kBAAkB;YACnB,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM;QAEV;YACI,IAAI,WAAW,CAAC,UAAU,CAAC,EAAE,CAAC;gBAC1B,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,6BAA6B;gBAChE,MAAM;YACV,CAAC;YACD,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,IAAI,EAAE,EAAE,sCAAoB,CAAC,OAAO,EAAE,0BAA0B,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAC/H,CAAC;IAGD,MAAM,UAAU,GAAG,IAAA,uBAAW,EAAC,GAAG,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,IAAA,cAAG,EAAC,GAAG,CAAC,CAAC;IAE1C,MAAM,UAAU,GAA+B;QAC3C,IAAI,EAAE,GAAG;QACT,SAAS,EAAE,IAAI;QACf,MAAM,EAAE;YACJ,GAAG,UAAU;YACb,IAAI,EAAE,IAAI;SACb;KACJ,CAAA;IAED,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;IAEhD,OAAO,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,sCAAoB,CAAC,OAAO,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,cAAc,CAAC,GAAkB,EAAE,IAAY,EAAE,MAA4B,EAAE,OAAgB;IACpG,OAAO;QACH,MAAM;QACN,OAAO;QACP,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,GAAG,EAAE,IAAI,CAAC,MAAM;QAChB,QAAQ,EAAE,GAAG,CAAC,EAAE;QAChB,OAAO,EAAE,CAAC,CAAC,IAAI;KAClB,CAAA;AAEL,CAAC;AAGD,SAAS,WAAW,CAAC,GAAW;IAC5B,uCAAuC;IACvC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG,GAAG,IAAI,EAAE,CAAC;QAC1B,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,kCAAkC;IAClC,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,yBAAyB;IAExE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,sDAAsD;QACtD,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,IAAI,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3D,WAAW,EAAE,CAAC;QAClB,CAAC;IACL,CAAC;IAED,2DAA2D;IAC3D,IAAI,WAAW,GAAG,UAAU,GAAG,GAAG,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,4CAA4C;IAC5C,IAAI,CAAC;QACD,MAAM,CAAC,GAAG,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,wBAAwB;IAC1E,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACT,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC"}
@@ -1,77 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.generateDocumentProperties = generateDocumentProperties;
4
- const activity_1 = require("@temporalio/activity");
5
- const ActivityContext_js_1 = require("../dsl/setup/ActivityContext.js");
6
- const executeInteraction_js_1 = require("./executeInteraction.js");
7
- const INT_EXTRACT_INFORMATION = "sys:ExtractInformation";
8
- async function generateDocumentProperties(payload) {
9
- const context = await (0, ActivityContext_js_1.setupActivity)(payload);
10
- const { params, client, objectId } = context;
11
- const interactionName = params.interactionName ?? INT_EXTRACT_INFORMATION;
12
- const project = await context.fetchProject();
13
- const doc = await client.objects.retrieve(objectId, "+text");
14
- const type = doc.type ? await client.types.retrieve(doc.type.id) : undefined;
15
- if (!doc?.text && !params.use_vision && !doc?.content?.type?.startsWith("image/")) {
16
- activity_1.log.warn(`Object ${objectId} not found or text is empty`);
17
- return { status: "failed", error: "no-text" };
18
- }
19
- if (!type || !type.object_schema) {
20
- activity_1.log.info(`Object ${objectId} has no schema`);
21
- return { document: objectId, status: "skipped", message: "no schema defined on type" };
22
- }
23
- const getImageRef = () => {
24
- if (doc.content?.type?.startsWith("image/")) {
25
- return "store:" + doc.id;
26
- }
27
- if (params.use_vision && doc.content?.type?.startsWith("application/pdf")) {
28
- return "store:" + doc.id;
29
- }
30
- activity_1.log.info(`Object ${objectId} is not an image or pdf`);
31
- return undefined;
32
- };
33
- const promptData = {
34
- content: doc.text ?? undefined,
35
- image: getImageRef() ?? undefined,
36
- human_context: project?.configuration?.human_context ?? undefined,
37
- };
38
- activity_1.log.info(` Extracting information from object ${objectId} with type ${type.name}`, payload.debug_mode ? { params } : undefined);
39
- const infoRes = await (0, executeInteraction_js_1.executeInteractionFromActivity)(client, interactionName, {
40
- ...params,
41
- include_previous_error: true,
42
- result_schema: type.object_schema,
43
- }, promptData, payload.debug_mode ?? false);
44
- const getText = () => {
45
- if (doc.text) {
46
- return undefined;
47
- }
48
- let text = "";
49
- if (infoRes.result.title) {
50
- text += infoRes.result.title + "\n";
51
- }
52
- if (infoRes.result.description) {
53
- text += infoRes.result.description;
54
- }
55
- if (text) {
56
- return text;
57
- }
58
- else {
59
- return undefined;
60
- }
61
- };
62
- activity_1.log.info(`Extracted information from object ${objectId} with type ${type.name}`, { runId: infoRes.id });
63
- await client.objects.update(doc.id, {
64
- properties: {
65
- ...infoRes.result,
66
- etag: doc.text_etag,
67
- },
68
- text: getText(),
69
- generation_run_info: {
70
- id: infoRes.id,
71
- date: new Date().toISOString(),
72
- model: infoRes.modelId,
73
- },
74
- });
75
- return { status: "completed" };
76
- }
77
- //# sourceMappingURL=generateDocumentProperties.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"generateDocumentProperties.js","sourceRoot":"","sources":["../../../src/activities/generateDocumentProperties.ts"],"names":[],"mappings":";;AAsBA,gEA2FC;AAjHD,mDAA2C;AAE3C,wEAAgE;AAEhE,mEAAqG;AAErG,MAAM,uBAAuB,GAAG,wBAAwB,CAAC;AAgBlD,KAAK,UAAU,0BAA0B,CAC5C,OAAsE;IAEtE,MAAM,OAAO,GAAG,MAAM,IAAA,kCAAa,EAAmC,OAAO,CAAC,CAAC;IAC/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAC7C,MAAM,eAAe,GAAG,MAAM,CAAC,eAAe,IAAI,uBAAuB,CAAC;IAE1E,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,YAAY,EAAE,CAAC;IAE7C,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC7D,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE7E,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChF,cAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,6BAA6B,CAAC,CAAC;QAC1D,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;IAClD,CAAC;IAED,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;QAC/B,cAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,gBAAgB,CAAC,CAAC;QAC7C,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,2BAA2B,EAAE,CAAC;IAC3F,CAAC;IAED,MAAM,WAAW,GAAG,GAAG,EAAE;QACrB,IAAI,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1C,OAAO,QAAQ,GAAG,GAAG,CAAC,EAAE,CAAC;QAC7B,CAAC;QAED,IAAI,MAAM,CAAC,UAAU,IAAI,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,iBAAiB,CAAC,EAAE,CAAC;YACxE,OAAO,QAAQ,GAAG,GAAG,CAAC,EAAE,CAAC;QAC7B,CAAC;QAED,cAAG,CAAC,IAAI,CAAC,UAAU,QAAQ,yBAAyB,CAAC,CAAC;QACtD,OAAO,SAAS,CAAC;IACrB,CAAC,CAAC;IAEF,MAAM,UAAU,GAAG;QACf,OAAO,EAAE,GAAG,CAAC,IAAI,IAAI,SAAS;QAC9B,KAAK,EAAE,WAAW,EAAE,IAAI,SAAS;QACjC,aAAa,EAAE,OAAO,EAAE,aAAa,EAAE,aAAa,IAAI,SAAS;KACpE,CAAC;IAEF,cAAG,CAAC,IAAI,CACJ,uCAAuC,QAAQ,cAAc,IAAI,CAAC,IAAI,EAAE,EACxE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS,CAC9C,CAAC;IAEF,MAAM,OAAO,GAAG,MAAM,IAAA,sDAA8B,EAChD,MAAM,EACN,eAAe,EACf;QACI,GAAG,MAAM;QACT,sBAAsB,EAAE,IAAI;QAC5B,aAAa,EAAE,IAAI,CAAC,aAAa;KACpC,EACD,UAAU,EACV,OAAO,CAAC,UAAU,IAAI,KAAK,CAC9B,CAAC;IAEF,MAAM,OAAO,GAAG,GAAG,EAAE;QACjB,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,OAAO,SAAS,CAAC;QACrB,CAAC;QACD,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YACvB,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC;QACxC,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAC7B,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC;QACvC,CAAC;QACD,IAAI,IAAI,EAAE,CAAC;YACP,OAAO,IAAI,CAAC;QAChB,CAAC;aAAM,CAAC;YACJ,OAAO,SAAS,CAAC;QACrB,CAAC;IACL,CAAC,CAAC;IAEF,cAAG,CAAC,IAAI,CAAC,qCAAqC,QAAQ,cAAc,IAAI,CAAC,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC;IACxG,MAAM,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE;QAChC,UAAU,EAAE;YACR,GAAG,OAAO,CAAC,MAAM;YACjB,IAAI,EAAE,GAAG,CAAC,SAAS;SACtB;QACD,IAAI,EAAE,OAAO,EAAE;QACf,mBAAmB,EAAE;YACjB,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,IAAI,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC9B,KAAK,EAAE,OAAO,CAAC,OAAO;SACzB;KACJ,CAAC,CAAC;IAEH,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;AACnC,CAAC"}
@@ -1,248 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.generateEmbeddings = generateEmbeddings;
4
- const activity_1 = require("@temporalio/activity");
5
- const common_1 = require("@vertesia/common");
6
- const ActivityContext_js_1 = require("../dsl/setup/ActivityContext.js");
7
- const errors_js_1 = require("../errors.js");
8
- const blobs_js_1 = require("../utils/blobs.js");
9
- const chunks_js_1 = require("../utils/chunks.js");
10
- const tokens_js_1 = require("../utils/tokens.js");
11
- async function generateEmbeddings(payload) {
12
- const { params, client, objectId, fetchProject } = await (0, ActivityContext_js_1.setupActivity)(payload);
13
- const { force, type } = params;
14
- const projectData = await fetchProject();
15
- const config = projectData?.configuration.embeddings[type];
16
- if (!projectData) {
17
- throw new errors_js_1.NoDocumentFound('Project not found', [payload.project_id]);
18
- }
19
- if (!config) {
20
- throw new errors_js_1.NoDocumentFound('Embeddings configuration not found', [objectId]);
21
- }
22
- if (!projectData) {
23
- throw new errors_js_1.NoDocumentFound('Project not found', [payload.project_id]);
24
- }
25
- if (!projectData?.configuration.embeddings[type]?.enabled) {
26
- activity_1.log.info(`Embeddings generation disabled for type ${type} on project: ${projectData.name} (${projectData.namespace})`, { config });
27
- return { id: objectId, status: "skipped", message: `Embeddings generation disabled for type ${type}` };
28
- }
29
- activity_1.log.info(`${type} embedding generation starting for object ${objectId}`, { force, config });
30
- if (!config.environment) {
31
- throw new Error('No environment found in project configuration. Set environment in project configuration to generate embeddings.');
32
- }
33
- const document = await client.objects.retrieve(objectId, "+text +parts +embeddings +tokens +properties");
34
- if (!document) {
35
- throw new errors_js_1.NoDocumentFound('Document not found', [objectId]);
36
- }
37
- if (!document.content) {
38
- throw new errors_js_1.NoDocumentFound('Document content not found', [objectId]);
39
- }
40
- let res;
41
- switch (type) {
42
- case common_1.SupportedEmbeddingTypes.text:
43
- res = await generateTextEmbeddings({
44
- client,
45
- config,
46
- document,
47
- type
48
- });
49
- break;
50
- case common_1.SupportedEmbeddingTypes.properties:
51
- res = await generateTextEmbeddings({
52
- client,
53
- config,
54
- document,
55
- type,
56
- });
57
- break;
58
- case common_1.SupportedEmbeddingTypes.image:
59
- res = await generateImageEmbeddings({
60
- client,
61
- config,
62
- document,
63
- type
64
- });
65
- break;
66
- default:
67
- res = { id: objectId, status: "failed", message: `unsupported embedding type: ${type}` };
68
- }
69
- return res;
70
- }
71
- async function generateTextEmbeddings({ document, client, type, config }, parts) {
72
- // if (!force && document.embeddings[type]?.etag === (document.text_etag ?? md5(document.text))) {
73
- // return { id: objectId, status: "skipped", message: "embeddings already generated" }
74
- // }
75
- if (!document) {
76
- return { status: "error", message: "document is null or undefined" };
77
- }
78
- if (type !== common_1.SupportedEmbeddingTypes.text && type !== common_1.SupportedEmbeddingTypes.properties) {
79
- return { id: document.id, status: "failed", message: `unsupported embedding type: ${type}` };
80
- }
81
- if (type === common_1.SupportedEmbeddingTypes.text && !document.text) {
82
- return { id: document.id, status: "failed", message: "no text found" };
83
- }
84
- if (type === common_1.SupportedEmbeddingTypes.properties && !document?.properties) {
85
- return { id: document.id, status: "failed", message: "no properties found" };
86
- }
87
- const { environment, model } = config;
88
- const partDefinitions = parts ?? [];
89
- // Count tokens if not already done
90
- if (!document.tokens?.count && type === common_1.SupportedEmbeddingTypes.text) {
91
- activity_1.log.debug('Updating token count for document: ' + document.id);
92
- const tokensData = (0, tokens_js_1.countTokens)(document.text);
93
- await client.objects.update(document.id, {
94
- tokens: {
95
- ...tokensData,
96
- etag: document.text_etag ?? (0, blobs_js_1.md5)(document.text)
97
- }
98
- });
99
- document.tokens = {
100
- ...tokensData,
101
- etag: document.text_etag ?? (0, blobs_js_1.md5)(document.text)
102
- };
103
- }
104
- const maxTokens = config.max_tokens ?? 8000;
105
- //generate embeddings for the main doc if document isn't too large
106
- //if too large, we'll just generate embeddings for the parts
107
- //then we can generate embeddings for the main document by averaging the tensors
108
- activity_1.log.info(`Generating ${type} embeddings for document ${document.id}`);
109
- if (type === common_1.SupportedEmbeddingTypes.text && document.tokens?.count && document.tokens?.count > maxTokens) {
110
- activity_1.log.info('Document too large, generating embeddings for parts');
111
- if (!document.text) {
112
- return { id: document.id, status: "failed", message: "no text found" };
113
- }
114
- if (!partDefinitions || partDefinitions.length === 0) {
115
- activity_1.log.info('No parts found for document, skipping embeddings generation');
116
- return { id: document.id, status: "failed", message: "no parts found" };
117
- }
118
- activity_1.log.info('Generating embeddings for parts', { parts: partDefinitions, max_tokens: maxTokens });
119
- const docParts = (0, chunks_js_1.getContentParts)(document.text, partDefinitions);
120
- activity_1.log.info(`Retrieved ${docParts.length} parts`);
121
- const start = new Date().getTime();
122
- const generatePartEmbeddings = async (partContent, i) => {
123
- const localStart = new Date().getTime();
124
- try {
125
- activity_1.log.info(`Generating embeddings for part ${i}`, { text_len: partContent.length });
126
- if (!partContent) {
127
- return { id: i, number: i, result: null, status: "skipped", message: "no text found" };
128
- }
129
- const e = await generateEmbeddingsFromStudio(partContent, environment, client, model).catch(e => {
130
- activity_1.log.error('Error generating embeddings for part ' + i, { text_length: partContent.length, error: e });
131
- return null;
132
- });
133
- if (!e || !e.values) {
134
- return { id: i, number: i, result: null, message: "no embeddings generated" };
135
- }
136
- if (e.values.length === 0) {
137
- return { id: i, number: i, result: null, message: "no embeddings generated" };
138
- }
139
- activity_1.log.info(`Generated embeddings for part ${i}`, { len: e.values.length, duration: new Date().getTime() - localStart });
140
- return { inumber: i, result: e };
141
- }
142
- catch (err) {
143
- activity_1.log.info(`Error generating ${type} embeddings for part ${i} of ${document.id}`, { error: err });
144
- return { number: i, result: null, message: "error generating embeddings", error: err.message };
145
- }
146
- };
147
- const partEmbeddings = await Promise.all(docParts.map((part, i) => generatePartEmbeddings(part, i)));
148
- const validPartEmbeddings = partEmbeddings.filter(e => e.result !== null).map(e => e.result);
149
- const averagedEmbedding = computeAttentionEmbedding(validPartEmbeddings.map(e => e.values));
150
- activity_1.log.info(`Averaged embeddings for document ${document.id} in ${(new Date().getTime() - start) / 1000} seconds`, { len: averagedEmbedding.length, count: validPartEmbeddings.length, max_tokens: maxTokens });
151
- await client.objects.setEmbedding(document.id, type, {
152
- values: averagedEmbedding,
153
- model: validPartEmbeddings[0].model,
154
- etag: document.text_etag
155
- });
156
- activity_1.log.info(`Object ${document.id} embedding set`, { type, len: averagedEmbedding.length });
157
- }
158
- else {
159
- activity_1.log.info(`Generating ${type} embeddings for document`);
160
- const res = await generateEmbeddingsFromStudio(JSON.stringify(document[type]), environment, client);
161
- if (!res || !res.values) {
162
- return { id: document.id, status: "failed", message: "no embeddings generated" };
163
- }
164
- activity_1.log.info(`${type} embeddings generated for document ${document.id}`, { len: res.values.length });
165
- await client.objects.setEmbedding(document.id, type, {
166
- values: res.values,
167
- model: res.model,
168
- etag: document.text_etag
169
- });
170
- return { id: document.id, type, status: "completed", len: res.values.length };
171
- }
172
- }
173
- async function generateImageEmbeddings({ document, client, type, config }) {
174
- activity_1.log.info('Generating image embeddings for document ' + document.id, { content: document.content });
175
- if (!document.content?.type?.startsWith('image/') && !document.content?.type?.includes('pdf')) {
176
- return { id: document.id, type, status: "failed", message: "content is not an image" };
177
- }
178
- const { environment, model } = config;
179
- const resRnd = await client.store.objects.getRendition(document.id, {
180
- format: "image/png",
181
- max_hw: 1024,
182
- generate_if_missing: true
183
- });
184
- if (resRnd.status === 'generating') {
185
- throw new Error("Rendition is generating, will retry later");
186
- }
187
- else if (resRnd.status === "failed" || !resRnd.rendition) {
188
- throw new errors_js_1.NoDocumentFound("Rendition retrieval failed", [document.id]);
189
- }
190
- if (!resRnd.rendition.content.source) {
191
- throw new errors_js_1.NoDocumentFound("No source found in rendition", [document.id]);
192
- }
193
- const image = await (0, blobs_js_1.fetchBlobAsBase64)(client, resRnd.rendition.content.source);
194
- const res = await client.environments.embeddings(environment, {
195
- image,
196
- model
197
- }).then(res => res).catch(e => {
198
- activity_1.log.error('Error generating embeddings for image', { error: e });
199
- throw e;
200
- });
201
- if (!res || !res.values) {
202
- return { id: document.id, status: "failed", message: "no embeddings generated" };
203
- }
204
- await client.objects.setEmbedding(document.id, common_1.SupportedEmbeddingTypes.image, {
205
- values: res.values,
206
- model: res.model,
207
- etag: document.text_etag
208
- });
209
- return { id: document.id, type, status: "completed", len: res.values.length };
210
- }
211
- async function generateEmbeddingsFromStudio(text, env, client, model) {
212
- activity_1.log.info(`Generating embeddings for text of ${text.length} chars with environment ${env}`);
213
- return client.environments.embeddings(env, {
214
- text,
215
- model
216
- }).then(res => res).catch(e => {
217
- activity_1.log.error('Error generating embeddings for text', { error: e });
218
- throw e;
219
- });
220
- }
221
- //Simplified attention mechanism
222
- // This is a naive implementation and should be replaced with a more sophisticated
223
- // using tensorflow in a specific package
224
- function computeAttentionEmbedding(chunkEmbeddings) {
225
- if (chunkEmbeddings.length === 0)
226
- return [];
227
- const start = new Date().getTime();
228
- // Generate random attention weights
229
- const attentionWeights = chunkEmbeddings.map(() => Math.random());
230
- // Apply softmax to get attention scores
231
- const expWeights = attentionWeights.map(w => Math.exp(w));
232
- const sumExpWeights = expWeights.reduce((sum, val) => sum + val, 0);
233
- const attentionScores = expWeights.map(w => w / sumExpWeights);
234
- // Get embedding dimension
235
- const embeddingDim = chunkEmbeddings[0].length;
236
- // Initialize document embedding
237
- const documentEmbedding = new Array(embeddingDim).fill(0);
238
- // Weighted sum of embeddings
239
- for (let i = 0; i < chunkEmbeddings.length; i++) {
240
- for (let j = 0; j < embeddingDim; j++) {
241
- documentEmbedding[j] += chunkEmbeddings[i][j] * attentionScores[i];
242
- }
243
- }
244
- const duration = new Date().getTime() - start;
245
- console.log(`Computed document embedding in ${duration}ms for ${chunkEmbeddings.length} chunks`);
246
- return documentEmbedding;
247
- }
248
- //# sourceMappingURL=generateEmbeddings.js.map