@vertesia/workflow 0.50.1 → 0.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +7 -1
  2. package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -1
  3. package/lib/cjs/activities/chunkDocument.js +39 -34
  4. package/lib/cjs/activities/chunkDocument.js.map +1 -1
  5. package/lib/cjs/activities/createDocumentFromOther.js +2 -2
  6. package/lib/cjs/activities/createDocumentFromOther.js.map +1 -1
  7. package/lib/cjs/activities/executeInteraction.js +12 -7
  8. package/lib/cjs/activities/executeInteraction.js.map +1 -1
  9. package/lib/cjs/activities/extractDocumentText.js +25 -13
  10. package/lib/cjs/activities/extractDocumentText.js.map +1 -1
  11. package/lib/cjs/activities/generateDocumentProperties.js +22 -4
  12. package/lib/cjs/activities/generateDocumentProperties.js.map +1 -1
  13. package/lib/cjs/activities/generateEmbeddings.js +58 -102
  14. package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
  15. package/lib/cjs/activities/generateImageRendition.js +77 -34
  16. package/lib/cjs/activities/generateImageRendition.js.map +1 -1
  17. package/lib/cjs/activities/generateOrAssignContentType.js +3 -7
  18. package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -1
  19. package/lib/cjs/activities/notifyWebhook.js.map +1 -1
  20. package/lib/cjs/conversion/image.js +80 -12
  21. package/lib/cjs/conversion/image.js.map +1 -1
  22. package/lib/cjs/dsl/setup/ActivityContext.js +32 -8
  23. package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -1
  24. package/lib/cjs/dsl.js +20 -0
  25. package/lib/cjs/dsl.js.map +1 -0
  26. package/lib/cjs/errors.js +13 -1
  27. package/lib/cjs/errors.js.map +1 -1
  28. package/lib/cjs/index.js +8 -2
  29. package/lib/cjs/index.js.map +1 -1
  30. package/lib/cjs/iterative-generation/activities/extractToc.js +2 -2
  31. package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -1
  32. package/lib/cjs/iterative-generation/activities/finalizeOutput.js +1 -1
  33. package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -1
  34. package/lib/cjs/iterative-generation/activities/generatePart.js +1 -1
  35. package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -1
  36. package/lib/cjs/iterative-generation/activities/generateToc.js +1 -1
  37. package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -1
  38. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +2 -1
  39. package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -1
  40. package/lib/cjs/iterative-generation/utils.js +7 -4
  41. package/lib/cjs/iterative-generation/utils.js.map +1 -1
  42. package/lib/cjs/system/notifyWebhookWorkflow.js +2 -1
  43. package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -1
  44. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +1 -1
  45. package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -1
  46. package/lib/cjs/utils/blobs.js +13 -7
  47. package/lib/cjs/utils/blobs.js.map +1 -1
  48. package/lib/cjs/utils/chunks.js +14 -0
  49. package/lib/cjs/utils/chunks.js.map +1 -0
  50. package/lib/cjs/utils/client.js +6 -5
  51. package/lib/cjs/utils/client.js.map +1 -1
  52. package/lib/cjs/utils/memory.js +2 -9
  53. package/lib/cjs/utils/memory.js.map +1 -1
  54. package/lib/cjs/workflows.js +1 -3
  55. package/lib/cjs/workflows.js.map +1 -1
  56. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +7 -1
  57. package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -1
  58. package/lib/esm/activities/chunkDocument.js +39 -34
  59. package/lib/esm/activities/chunkDocument.js.map +1 -1
  60. package/lib/esm/activities/createDocumentFromOther.js +1 -1
  61. package/lib/esm/activities/createDocumentFromOther.js.map +1 -1
  62. package/lib/esm/activities/executeInteraction.js +12 -7
  63. package/lib/esm/activities/executeInteraction.js.map +1 -1
  64. package/lib/esm/activities/extractDocumentText.js +25 -13
  65. package/lib/esm/activities/extractDocumentText.js.map +1 -1
  66. package/lib/esm/activities/generateDocumentProperties.js +22 -4
  67. package/lib/esm/activities/generateDocumentProperties.js.map +1 -1
  68. package/lib/esm/activities/generateEmbeddings.js +58 -69
  69. package/lib/esm/activities/generateEmbeddings.js.map +1 -1
  70. package/lib/esm/activities/generateImageRendition.js +78 -35
  71. package/lib/esm/activities/generateImageRendition.js.map +1 -1
  72. package/lib/esm/activities/generateOrAssignContentType.js +3 -7
  73. package/lib/esm/activities/generateOrAssignContentType.js.map +1 -1
  74. package/lib/esm/activities/notifyWebhook.js.map +1 -1
  75. package/lib/esm/conversion/image.js +80 -12
  76. package/lib/esm/conversion/image.js.map +1 -1
  77. package/lib/esm/dsl/setup/ActivityContext.js +34 -10
  78. package/lib/esm/dsl/setup/ActivityContext.js.map +1 -1
  79. package/lib/esm/dsl.js +4 -0
  80. package/lib/esm/dsl.js.map +1 -0
  81. package/lib/esm/errors.js +11 -0
  82. package/lib/esm/errors.js.map +1 -1
  83. package/lib/esm/index.js +8 -2
  84. package/lib/esm/index.js.map +1 -1
  85. package/lib/esm/iterative-generation/activities/extractToc.js +3 -3
  86. package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -1
  87. package/lib/esm/iterative-generation/activities/finalizeOutput.js +2 -2
  88. package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -1
  89. package/lib/esm/iterative-generation/activities/generatePart.js +2 -2
  90. package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -1
  91. package/lib/esm/iterative-generation/activities/generateToc.js +2 -2
  92. package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -1
  93. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +2 -1
  94. package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -1
  95. package/lib/esm/iterative-generation/utils.js +7 -4
  96. package/lib/esm/iterative-generation/utils.js.map +1 -1
  97. package/lib/esm/system/notifyWebhookWorkflow.js +2 -1
  98. package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -1
  99. package/lib/esm/system/recalculateEmbeddingsWorkflow.js +2 -2
  100. package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -1
  101. package/lib/esm/utils/blobs.js +13 -7
  102. package/lib/esm/utils/blobs.js.map +1 -1
  103. package/lib/esm/utils/chunks.js +9 -0
  104. package/lib/esm/utils/chunks.js.map +1 -0
  105. package/lib/esm/utils/client.js +5 -4
  106. package/lib/esm/utils/client.js.map +1 -1
  107. package/lib/esm/utils/memory.js +2 -7
  108. package/lib/esm/utils/memory.js.map +1 -1
  109. package/lib/esm/workflows.js +0 -1
  110. package/lib/esm/workflows.js.map +1 -1
  111. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +10 -0
  112. package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -1
  113. package/lib/types/activities/chunkDocument.d.ts +15 -0
  114. package/lib/types/activities/chunkDocument.d.ts.map +1 -1
  115. package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -1
  116. package/lib/types/activities/executeInteraction.d.ts +19 -4
  117. package/lib/types/activities/executeInteraction.d.ts.map +1 -1
  118. package/lib/types/activities/extractDocumentText.d.ts.map +1 -1
  119. package/lib/types/activities/generateDocumentProperties.d.ts +1 -1
  120. package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -1
  121. package/lib/types/activities/generateEmbeddings.d.ts +21 -17
  122. package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
  123. package/lib/types/activities/generateImageRendition.d.ts +3 -5
  124. package/lib/types/activities/generateImageRendition.d.ts.map +1 -1
  125. package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -1
  126. package/lib/types/activities/notifyWebhook.d.ts +1 -2
  127. package/lib/types/activities/notifyWebhook.d.ts.map +1 -1
  128. package/lib/types/conversion/image.d.ts +8 -6
  129. package/lib/types/conversion/image.d.ts.map +1 -1
  130. package/lib/types/dsl/dslProxyActivities.d.ts +2 -2
  131. package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -1
  132. package/lib/types/dsl/setup/ActivityContext.d.ts +3 -0
  133. package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -1
  134. package/lib/types/dsl.d.ts +4 -0
  135. package/lib/types/dsl.d.ts.map +1 -0
  136. package/lib/types/errors.d.ts +6 -0
  137. package/lib/types/errors.d.ts.map +1 -1
  138. package/lib/types/index.d.ts +8 -2
  139. package/lib/types/index.d.ts.map +1 -1
  140. package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -1
  141. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -1
  142. package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -1
  143. package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -1
  144. package/lib/types/iterative-generation/utils.d.ts +2 -2
  145. package/lib/types/iterative-generation/utils.d.ts.map +1 -1
  146. package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -1
  147. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +2 -17
  148. package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -1
  149. package/lib/types/utils/blobs.d.ts.map +1 -1
  150. package/lib/types/utils/chunks.d.ts +9 -0
  151. package/lib/types/utils/chunks.d.ts.map +1 -0
  152. package/lib/types/utils/client.d.ts +2 -2
  153. package/lib/types/utils/client.d.ts.map +1 -1
  154. package/lib/types/utils/memory.d.ts +1 -5
  155. package/lib/types/utils/memory.d.ts.map +1 -1
  156. package/lib/types/workflows.d.ts +0 -1
  157. package/lib/types/workflows.d.ts.map +1 -1
  158. package/lib/workflows-bundle.js +8311 -5790
  159. package/package.json +28 -10
  160. package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +20 -1
  161. package/src/activities/chunkDocument.ts +62 -42
  162. package/src/activities/createDocumentFromOther.ts +2 -2
  163. package/src/activities/executeInteraction.ts +33 -12
  164. package/src/activities/extractDocumentText.ts +30 -14
  165. package/src/activities/generateDocumentProperties.ts +37 -16
  166. package/src/activities/generateEmbeddings.ts +91 -79
  167. package/src/activities/generateImageRendition.ts +100 -53
  168. package/src/activities/generateOrAssignContentType.ts +5 -11
  169. package/src/activities/notifyWebhook.ts +2 -2
  170. package/src/conversion/image.test.ts +110 -18
  171. package/src/conversion/image.ts +90 -15
  172. package/src/conversion/pandoc.test.ts +7 -5
  173. package/src/dsl/dslProxyActivities.ts +2 -2
  174. package/src/dsl/setup/ActivityContext.ts +60 -19
  175. package/src/dsl.ts +3 -0
  176. package/src/errors.ts +27 -6
  177. package/src/index.ts +9 -2
  178. package/src/iterative-generation/activities/extractToc.ts +3 -3
  179. package/src/iterative-generation/activities/finalizeOutput.ts +3 -3
  180. package/src/iterative-generation/activities/generatePart.ts +3 -3
  181. package/src/iterative-generation/activities/generateToc.ts +2 -2
  182. package/src/iterative-generation/iterativeGenerationWorkflow.ts +2 -1
  183. package/src/iterative-generation/utils.ts +10 -6
  184. package/src/system/notifyWebhookWorkflow.ts +3 -2
  185. package/src/system/recalculateEmbeddingsWorkflow.ts +2 -2
  186. package/src/utils/blobs.ts +12 -7
  187. package/src/utils/chunks.ts +17 -0
  188. package/src/utils/client.ts +6 -5
  189. package/src/utils/memory.ts +3 -8
  190. package/src/workflows.ts +0 -2
  191. package/lib/cjs/conversion/pdf.js +0 -13
  192. package/lib/cjs/conversion/pdf.js.map +0 -1
  193. package/lib/cjs/system/generateObjectText.js +0 -76
  194. package/lib/cjs/system/generateObjectText.js.map +0 -1
  195. package/lib/esm/conversion/pdf.js +0 -7
  196. package/lib/esm/conversion/pdf.js.map +0 -1
  197. package/lib/esm/system/generateObjectText.js +0 -73
  198. package/lib/esm/system/generateObjectText.js.map +0 -1
  199. package/lib/types/conversion/pdf.d.ts +0 -2
  200. package/lib/types/conversion/pdf.d.ts.map +0 -1
  201. package/lib/types/system/generateObjectText.d.ts +0 -4
  202. package/lib/types/system/generateObjectText.d.ts.map +0 -1
  203. package/src/conversion/pdf.test.ts +0 -35
  204. package/src/conversion/pdf.ts +0 -8
  205. package/src/system/generateObjectText.ts +0 -95
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vertesia/workflow",
3
- "version": "0.50.1",
3
+ "version": "0.52.0",
4
4
  "type": "module",
5
5
  "description": "Composable prompts workflow dsl",
6
6
  "main": "./lib/esm/index.js",
@@ -18,28 +18,28 @@
18
18
  },
19
19
  "license": "Apache-2.0",
20
20
  "devDependencies": {
21
+ "@smithy/types": "^3.7.2",
21
22
  "@temporalio/testing": "^1.11.5",
22
23
  "@temporalio/worker": "^1.11.5",
24
+ "@types/fluent-ffmpeg": "^2.1.27",
23
25
  "@types/jsonwebtoken": "^9.0.7",
24
26
  "@types/node": "^22.5.1",
25
27
  "@types/papaparse": "^5.3.15",
26
28
  "@types/tmp": "^0.2.6",
27
29
  "ts-dual-module": "^0.6.3",
28
- "vitest": "^2.1.8",
29
- "@smithy/types": "^3.7.2"
30
+ "vitest": "^3.0.9"
30
31
  },
31
32
  "dependencies": {
32
33
  "@aws-sdk/client-s3": "^3.693.0",
33
34
  "@aws-sdk/client-textract": "^3.693.0",
34
35
  "@aws-sdk/credential-providers": "^3.693.0",
35
- "@vertesia/memory": "^0.43.0",
36
- "@opendocsg/pdf2md": "0.2.0",
37
36
  "@temporalio/activity": "^1.11.5",
38
37
  "@temporalio/workflow": "^1.11.5",
39
- "@tensorflow/tfjs-node": "^4.19.0",
40
38
  "@types/json-schema": "^7.0.15",
39
+ "@vertesia/memory": "^0.43.0",
41
40
  "api-fetch-client": "^0.13.0",
42
41
  "fast-deep-equal": "^3.1.3",
42
+ "fluent-ffmpeg": "^2.1.3",
43
43
  "jsonwebtoken": "^9.0.2",
44
44
  "ms": "3.0.0-canary.1",
45
45
  "node-web-stream-adapters": "^0.2.1",
@@ -48,15 +48,17 @@
48
48
  "sharp": "^0.33.4",
49
49
  "tiktoken": "^1.0.15",
50
50
  "tmp": "^0.2.3",
51
+ "tmp-promise": "^3.0.3",
51
52
  "yaml": "^2.6.0",
52
- "@llumiverse/core": "0.15.0",
53
- "@vertesia/client": "0.50.1",
54
- "@vertesia/common": "0.50.1"
53
+ "@vertesia/common": "0.52.0",
54
+ "@vertesia/client": "0.53.0",
55
+ "@llumiverse/core": "0.17.0"
55
56
  },
56
57
  "ts_dual_module": {
57
58
  "outDir": "lib",
58
59
  "exports": {
59
- "activities": "./activities",
60
+ "activities": "./activities/index.js",
61
+ "dsl-activities": "./activities/index-dsl.js",
60
62
  "workflows": "./workflows",
61
63
  "workflows-bundle": "./workflows-bundle.js",
62
64
  "vars": "./vars"
@@ -78,11 +80,21 @@
78
80
  "import": "./lib/esm/activities/index.js",
79
81
  "require": "./lib/cjs/activities/index.js"
80
82
  },
83
+ "./dsl-activities": {
84
+ "types": "./lib/types/activities/index-dsl.d.ts",
85
+ "import": "./lib/esm/activities/index-dsl.js",
86
+ "require": "./lib/cjs/activities/index-dsl.js"
87
+ },
81
88
  "./workflows": {
82
89
  "types": "./lib/types/workflows.d.ts",
83
90
  "import": "./lib/esm/workflows.js",
84
91
  "require": "./lib/cjs/workflows.js"
85
92
  },
93
+ "./dsl": {
94
+ "types": "./lib/types/dsl.d.ts",
95
+ "import": "./lib/esm/dsl.js",
96
+ "require": "./lib/cjs/dsl.js"
97
+ },
86
98
  "./workflows-bundle": {
87
99
  "import": "./lib/workflows-bundle.js"
88
100
  }
@@ -93,9 +105,15 @@
93
105
  "activities": [
94
106
  "./lib/types/activities/index.d.ts"
95
107
  ],
108
+ "dsl-activities": [
109
+ "./lib/types/activities/index-dsl.d.ts"
110
+ ],
96
111
  "workflows": [
97
112
  "./lib/types/workflows.d.ts"
98
113
  ],
114
+ "dsl": [
115
+ "./lib/types/dsl.d.ts"
116
+ ],
99
117
  "vars": [
100
118
  "./lib/types/dsl/vars.d.ts"
101
119
  ]
@@ -1,5 +1,5 @@
1
- import { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
2
1
  import { log } from "@temporalio/activity";
2
+ import { ContentObjectStatus, DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
3
3
  import { setupActivity } from "../../dsl/setup/ActivityContext.js";
4
4
  import { ActivityParamNotFound, NoDocumentFound } from "../../errors.js";
5
5
  interface CreateOrUpdateObjectFromInteractionRunParams {
@@ -19,9 +19,21 @@ interface CreateOrUpdateObjectFromInteractionRunParams {
19
19
  */
20
20
  update_existing_id?: string,
21
21
 
22
+ /**
23
+ * The name of the object to use. If not provided, the name will be generated from the interaction result
24
+ */
22
25
  fallback_name?: string, // a name to use if no one was generated by the interaction
23
26
 
27
+ /**
28
+ * The name of the parent object to use. If not provided, the document will be created at the root level
29
+ */
24
30
  parent?: string, // the parent object id
31
+
32
+ /**
33
+ * The name of the property to use for the text. If not provided, the text will be set to the result of the interaction
34
+ */
35
+ update_text_from_property?: string,
36
+
25
37
  }
26
38
 
27
39
  export interface CreateOrUpdateObjectFromInteractionRun extends DSLActivitySpec<CreateOrUpdateObjectFromInteractionRunParams> {
@@ -80,6 +92,13 @@ export async function createOrUpdateDocumentFromInteractionRun(payload: DSLActiv
80
92
  }
81
93
  };
82
94
 
95
+ if (params.update_text_from_property) {
96
+ const text = docPayload.properties[params.update_text_from_property];
97
+ if (text) {
98
+ docPayload.text = text;
99
+ }
100
+ }
101
+
83
102
  //create or update the document
84
103
  let newDoc: boolean = false;
85
104
  let doc = undefined;
@@ -1,18 +1,12 @@
1
- import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
2
1
  import { log } from "@temporalio/activity";
2
+ import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
3
3
  import { setupActivity } from "../dsl/setup/ActivityContext.js";
4
+ import { DocPart } from "../utils/chunks.js";
4
5
  import { InteractionExecutionParams, executeInteractionFromActivity } from "./executeInteraction.js";
5
6
 
6
7
  const INT_CHUNK_DOCUMENT = "sys:ChunkDocument"
7
8
 
8
- interface DocPart {
9
9
 
10
- line_number_start: number
11
- line_number_end: number
12
- name: string
13
- type: string
14
-
15
- }
16
10
 
17
11
  export interface ChunkDocumentResult {
18
12
  id: string
@@ -22,9 +16,28 @@ export interface ChunkDocumentResult {
22
16
  }
23
17
 
24
18
  export interface ChunkDocumentParams extends InteractionExecutionParams {
19
+
20
+ /**
21
+ * If true, force chunking even if the document is already chunked
22
+ */
25
23
  force?: boolean;
24
+
25
+ /**
26
+ * The interaction name to use for chunking
27
+ * If not set, the default interaction will be used
28
+ */
26
29
  interactionName?: string;
30
+
31
+ /**
32
+ * The object type to use for the document parts
33
+ * If not set, the type of the document will be used
34
+ */
27
35
  docPartType?: string;
36
+
37
+ /**
38
+ * If true, create parts as document objects
39
+ */
40
+ createParts?: boolean;
28
41
  }
29
42
 
30
43
  export interface ChunkDocument extends DSLActivitySpec<ChunkDocumentParams> {
@@ -75,48 +88,55 @@ export async function chunkDocument(payload: DSLActivityExecutionPayload<ChunkDo
75
88
  return { id: objectId, status: "failed", parts: [], message: "no parts found" }
76
89
  }
77
90
 
78
- const partDocs = await Promise.all(parts.map(async (part, i) => {
79
91
 
80
- const text = lines.filter((_l, i) => i >= part.line_number_start && i <= part.line_number_end).join('\n');
92
+ /**
93
+ * Only create parts as document if the flag is set
94
+ */
95
+ if (params.createParts) {
96
+
97
+ const partDocs = await Promise.all(parts.map(async (part, i) => {
81
98
 
82
- const location = () => {
83
- let location = document.location;
84
- if (location.endsWith('/')) {
85
- location += document.name + "/" + part.type
99
+ const text = lines.filter((_l, i) => i >= part.line_number_start && i <= part.line_number_end).join('\n');
100
+
101
+ const location = () => {
102
+ let location = document.location;
103
+ if (location.endsWith('/')) {
104
+ location += document.name + "/" + part.type
105
+ }
106
+ location += '/' + document.name + "/" + part.type;
107
+ return location;
86
108
  }
87
- location += '/' + document.name + "/" + part.type;
88
- return location;
109
+
110
+ const docPart = await client.objects.create({
111
+ name: part.name,
112
+ parent: objectId,
113
+ text: text,
114
+ location: location(),
115
+ properties: {
116
+ part_number: i + 1,
117
+ etag: document.text_etag,
118
+ source_line_start: part.line_number_start,
119
+ source_line_end: part.line_number_end,
120
+ title: part.name
121
+ }
122
+ });
123
+ return docPart;
124
+ }));
125
+
126
+ //delete previous parts
127
+ if (document.parts && document.parts.length > 0) {
128
+ log.info('Deleting previous parts for object ID: ' + objectId, { parts: document.parts });
129
+ await Promise.all(document.parts.map(async (partId) => {
130
+ await client.objects.delete(partId);
131
+ }));
89
132
  }
90
133
 
91
- const docPart = await client.objects.create({
92
- name: part.name,
93
- parent: objectId,
94
- text: text,
95
- location: location(),
96
- properties: {
97
- part_number: i + 1,
98
- etag: document.text_etag,
99
- source_line_start: part.line_number_start,
100
- source_line_end: part.line_number_end,
101
- title: part.name
102
- }
134
+ await client.objects.update(objectId, {
135
+ parts: partDocs.map(p => p.id),
136
+ parts_etag: document.text_etag
103
137
  });
104
- return docPart;
105
- }));
106
-
107
- //delete previous parts
108
- if (document.parts && document.parts.length > 0) {
109
- log.info('Deleting previous parts for object ID: ' + objectId, { parts: document.parts });
110
- await Promise.all(document.parts.map(async (partId) => {
111
- await client.objects.delete(partId);
112
- }));
113
138
  }
114
139
 
115
- await client.objects.update(objectId, {
116
- parts: partDocs.map(p => p.id),
117
- parts_etag: document.text_etag
118
- });
119
-
120
140
  log.info(`Object ${objectId} chunking completed`, { parts: document.parts });
121
141
 
122
142
  return { id: objectId, status: "completed", parts: document.parts }
@@ -1,11 +1,11 @@
1
- import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
2
1
  import { log } from "@temporalio/activity";
2
+ import { NodeStreamSource } from "@vertesia/client/node";
3
+ import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
3
4
  import fs from 'fs';
4
5
  import { pdfExtractPages } from "../conversion/mutool.js";
5
6
  import { setupActivity } from "../dsl/setup/ActivityContext.js";
6
7
  import { NoDocumentFound } from "../errors.js";
7
8
  import { saveBlobToTempFile } from "../utils/blobs.js";
8
- import { NodeStreamSource } from "../utils/memory.js";
9
9
 
10
10
  interface CreatePdfDocumentFromSourceParams {
11
11
 
@@ -1,10 +1,11 @@
1
+ import { ModelOptions } from "@llumiverse/core";
2
+ import { activityInfo, log } from "@temporalio/activity";
1
3
  import { VertesiaClient } from "@vertesia/client";
2
4
  import { DSLActivityExecutionPayload, DSLActivitySpec, ExecutionRun, ExecutionRunStatus, InteractionExecutionConfiguration, RunSearchPayload } from "@vertesia/common";
3
- import { activityInfo, log } from "@temporalio/activity";
4
5
  import { projectResult } from "../dsl/projections.js";
5
6
  import { setupActivity } from "../dsl/setup/ActivityContext.js";
7
+ import { ActivityParamNotFound } from "../errors.js";
6
8
  import { TruncateSpec, truncByMaxTokens } from "../utils/tokens.js";
7
- import { ModelOptions } from "@llumiverse/core";
8
9
 
9
10
  //Example:
10
11
  //@ts-ignore
@@ -52,8 +53,7 @@ const JSON: DSLActivitySpec = {
52
53
  }
53
54
  }
54
55
  }
55
-
56
- export interface InteractionExecutionParams extends ModelOptions {
56
+ export interface InteractionExecutionParams {
57
57
  /**
58
58
  * The environment to use. If not specified the project default environment will be used.
59
59
  * If the latter is not specified an exeption will be thrown.
@@ -80,13 +80,29 @@ export interface InteractionExecutionParams extends ModelOptions {
80
80
  * Wether or not to include the previous error in the interaction prompt data
81
81
  */
82
82
  include_previous_error?: boolean;
83
- }
84
83
 
84
+ /**
85
+ * Options to control generation
86
+ */
87
+ model_options?: ModelOptions;
88
+ }
85
89
 
90
+ /**
91
+ * TODO: must be kept in sync with InteractionAsyncExecutionPayload form @vertesia/common
92
+ * Also see the executeInteractionAsync endpoint on the server for how the client payload is sent to the workflow.
93
+ * (interaction is translsted to interactionName)
94
+ */
86
95
  export interface ExecuteInteractionParams extends InteractionExecutionParams {
96
+ //TODO rename to interaction as in InteractionAsyncExecutionPayload
87
97
  interactionName: string;
88
98
  prompt_data: Record<string, any>;
89
- truncate?: Record<string, TruncateSpec>
99
+ /**
100
+ * Additional prompt data passed by the workflow configuration. This will be merged with prompt_data if any.
101
+ * You should use `import: ["static_prompt_data"]` to import the workflow prompt data as static_prompt_data param.
102
+ * Otherwise the workflow prompt data will be ignored.
103
+ */
104
+ static_prompt_data?: Record<string, any>;
105
+ truncate?: Record<string, TruncateSpec>;
90
106
  }
91
107
 
92
108
  export interface ExecuteInteraction extends DSLActivitySpec<ExecuteInteractionParams> {
@@ -98,7 +114,15 @@ export async function executeInteraction(payload: DSLActivityExecutionPayload<Ex
98
114
  client, params
99
115
  } = await setupActivity<ExecuteInteractionParams>(payload);
100
116
 
101
- const { interactionName, prompt_data } = params;
117
+ const { interactionName, prompt_data, static_prompt_data: wf_prompt_data } = params;
118
+ if (wf_prompt_data) {
119
+ Object.assign(prompt_data, wf_prompt_data);
120
+ }
121
+
122
+ if (!interactionName) {
123
+ log.error("Missing interactionName", { params });
124
+ throw new ActivityParamNotFound("interactionName", payload.activity);
125
+ }
102
126
 
103
127
  if (params.truncate) {
104
128
  const truncate = params.truncate;
@@ -153,8 +177,7 @@ export async function executeInteractionFromActivity(client: VertesiaClient, int
153
177
  const config: InteractionExecutionConfiguration = {
154
178
  environment: params.environment,
155
179
  model: params.model,
156
- max_tokens: params.max_tokens,
157
- temperature: params.temperature
180
+ model_options: params.model_options,
158
181
  }
159
182
  const data = {
160
183
  ...prompt_data,
@@ -163,9 +186,7 @@ export async function executeInteractionFromActivity(client: VertesiaClient, int
163
186
 
164
187
  const result_schema = params.result_schema;
165
188
 
166
- if (debug) {
167
- log.info(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags });
168
- }
189
+ log.debug(`About to execute interaction ${interactionName}`, { config, data, result_schema, tags });
169
190
 
170
191
  const res = await client.interactions.executeByName(interactionName, {
171
192
  config,
@@ -1,8 +1,7 @@
1
- import { ContentObject, CreateContentObjectPayload, DSLActivityExecutionPayload, DSLActivitySpec } from '@vertesia/common';
2
1
  import { log } from "@temporalio/activity";
2
+ import { ContentObject, CreateContentObjectPayload, DSLActivityExecutionPayload, DSLActivitySpec } from '@vertesia/common';
3
3
  import { mutoolPdfToText } from '../conversion/mutool.js';
4
4
  import { manyToMarkdown } from '../conversion/pandoc.js';
5
- import { trasformPdfToMarkdown } from '../conversion/pdf.js';
6
5
  import { setupActivity } from "../dsl/setup/ActivityContext.js";
7
6
  import { NoDocumentFound } from '../errors.js';
8
7
  import { TextExtractionResult, TextExtractionStatus } from '../result-types.js';
@@ -66,11 +65,7 @@ export async function extractDocumentText(payload: DSLActivityExecutionPayload<E
66
65
 
67
66
  case 'application/pdf':
68
67
  //if pdf is more than 2MB, use mutool
69
- if (fileBuffer.length > 2 * 1024 * 1024) {
70
- txt = await mutoolPdfToText(fileBuffer);
71
- } else {
72
- txt = await trasformPdfToMarkdown(fileBuffer);
73
- }
68
+ txt = await mutoolPdfToText(fileBuffer);
74
69
  break;
75
70
 
76
71
  case 'text/plain':
@@ -161,13 +156,34 @@ function createResponse(doc: ContentObject, text: string, status: TextExtraction
161
156
  }
162
157
 
163
158
 
164
- //if file is less than 100KB, check if it looks like text
165
159
  function sniffIfText(buf: Buffer) {
166
- if (buf.length < 100 * 1024) {
167
- const s = buf.toString('utf8');
168
- if (s.length > 0) {
169
- return true;
160
+ // If file is too large, don't even try
161
+ if (buf.length > 500 * 1024) {
162
+ return false;
163
+ }
164
+
165
+ // Count binary/control characters
166
+ let binaryCount = 0;
167
+ const sampleSize = Math.min(buf.length, 1000); // Check first 1000 bytes
168
+
169
+ for (let i = 0; i < sampleSize; i++) {
170
+ // Count control characters (except common whitespace)
171
+ const byte = buf[i];
172
+ if ((byte < 32 && ![9, 10, 13].includes(byte)) || byte === 0) {
173
+ binaryCount++;
170
174
  }
171
175
  }
172
- return false;
173
- }
176
+
177
+ // If more than 10% binary/control chars, probably not text
178
+ if (binaryCount / sampleSize > 0.1) {
179
+ return false;
180
+ }
181
+
182
+ // Additional check for valid UTF-8 encoding
183
+ try {
184
+ const s = buf.toString('utf8');
185
+ return s.length > 0 && !s.includes('\uFFFD'); // Replacement character
186
+ } catch (e) {
187
+ return false;
188
+ }
189
+ }
@@ -1,10 +1,10 @@
1
- import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
2
1
  import { log } from "@temporalio/activity";
2
+ import { DSLActivityExecutionPayload, DSLActivitySpec } from "@vertesia/common";
3
3
  import { setupActivity } from "../dsl/setup/ActivityContext.js";
4
4
  import { TruncateSpec } from "../utils/tokens.js";
5
5
  import { InteractionExecutionParams, executeInteractionFromActivity } from "./executeInteraction.js";
6
6
 
7
- const INT_EXTRACT_INFORMATION = "sys:ExtractInformation"
7
+ const INT_EXTRACT_INFORMATION = "sys:ExtractInformation";
8
8
  export interface GenerateDocumentPropertiesParams extends InteractionExecutionParams {
9
9
  typesHint?: string[];
10
10
  /**
@@ -17,10 +17,12 @@ export interface GenerateDocumentPropertiesParams extends InteractionExecutionPa
17
17
  use_vision?: boolean;
18
18
  }
19
19
  export interface GenerateDocumentProperties extends DSLActivitySpec<GenerateDocumentPropertiesParams> {
20
- name: 'generateDocumentProperties';
20
+ name: "generateDocumentProperties";
21
21
  }
22
22
 
23
- export async function generateDocumentProperties(payload: DSLActivityExecutionPayload<GenerateDocumentPropertiesParams>) {
23
+ export async function generateDocumentProperties(
24
+ payload: DSLActivityExecutionPayload<GenerateDocumentPropertiesParams>,
25
+ ) {
24
26
  const context = await setupActivity<GenerateDocumentPropertiesParams>(payload);
25
27
  const { params, client, objectId } = context;
26
28
  const interactionName = params.interactionName ?? INT_EXTRACT_INFORMATION;
@@ -32,7 +34,7 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
32
34
 
33
35
  if (!doc?.text && !params.use_vision && !doc?.content?.type?.startsWith("image/")) {
34
36
  log.warn(`Object ${objectId} not found or text is empty`);
35
- return { status: "failed", error: "no-text" }
37
+ return { status: "failed", error: "no-text" };
36
38
  }
37
39
 
38
40
  if (!type || !type.object_schema) {
@@ -50,16 +52,19 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
50
52
  }
51
53
 
52
54
  log.info(`Object ${objectId} is not an image or pdf`);
53
- return undefined
54
- }
55
+ return undefined;
56
+ };
55
57
 
56
58
  const promptData = {
57
59
  content: doc.text ?? undefined,
58
60
  image: getImageRef() ?? undefined,
59
61
  human_context: project?.configuration?.human_context ?? undefined,
60
- }
62
+ };
61
63
 
62
- log.info(` Extracting information from object ${objectId} with type ${type.name}`, payload.debug_mode ? { params, } : undefined);
64
+ log.info(
65
+ ` Extracting information from object ${objectId} with type ${type.name}`,
66
+ payload.debug_mode ? { params } : undefined,
67
+ );
63
68
 
64
69
  const infoRes = await executeInteractionFromActivity(
65
70
  client,
@@ -70,24 +75,40 @@ export async function generateDocumentProperties(payload: DSLActivityExecutionPa
70
75
  result_schema: type.object_schema,
71
76
  },
72
77
  promptData,
73
- payload.debug_mode ?? false
78
+ payload.debug_mode ?? false,
74
79
  );
75
80
 
81
+ const getText = () => {
82
+ if (doc.text) {
83
+ return undefined;
84
+ }
85
+ let text = "";
86
+ if (infoRes.result.title) {
87
+ text += infoRes.result.title + "\n";
88
+ }
89
+ if (infoRes.result.description) {
90
+ text += infoRes.result.description;
91
+ }
92
+ if (text) {
93
+ return text;
94
+ } else {
95
+ return undefined;
96
+ }
97
+ };
98
+
76
99
  log.info(`Extracted information from object ${objectId} with type ${type.name}`, { runId: infoRes.id });
77
100
  await client.objects.update(doc.id, {
78
101
  properties: {
79
102
  ...infoRes.result,
80
- etag: doc.text_etag
103
+ etag: doc.text_etag,
81
104
  },
82
- text: infoRes.result.description ?? undefined,
105
+ text: getText(),
83
106
  generation_run_info: {
84
107
  id: infoRes.id,
85
108
  date: new Date().toISOString(),
86
109
  model: infoRes.modelId,
87
- }
110
+ },
88
111
  });
89
112
 
90
-
91
113
  return { status: "completed" };
92
-
93
- }
114
+ }