@vertesia/workflow 0.50.1 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +7 -1
- package/lib/cjs/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -1
- package/lib/cjs/activities/chunkDocument.js +39 -34
- package/lib/cjs/activities/chunkDocument.js.map +1 -1
- package/lib/cjs/activities/createDocumentFromOther.js +2 -2
- package/lib/cjs/activities/createDocumentFromOther.js.map +1 -1
- package/lib/cjs/activities/executeInteraction.js +12 -7
- package/lib/cjs/activities/executeInteraction.js.map +1 -1
- package/lib/cjs/activities/extractDocumentText.js +25 -13
- package/lib/cjs/activities/extractDocumentText.js.map +1 -1
- package/lib/cjs/activities/generateDocumentProperties.js +22 -4
- package/lib/cjs/activities/generateDocumentProperties.js.map +1 -1
- package/lib/cjs/activities/generateEmbeddings.js +58 -102
- package/lib/cjs/activities/generateEmbeddings.js.map +1 -1
- package/lib/cjs/activities/generateImageRendition.js +77 -34
- package/lib/cjs/activities/generateImageRendition.js.map +1 -1
- package/lib/cjs/activities/generateOrAssignContentType.js +3 -7
- package/lib/cjs/activities/generateOrAssignContentType.js.map +1 -1
- package/lib/cjs/activities/notifyWebhook.js.map +1 -1
- package/lib/cjs/conversion/image.js +80 -12
- package/lib/cjs/conversion/image.js.map +1 -1
- package/lib/cjs/dsl/setup/ActivityContext.js +32 -8
- package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -1
- package/lib/cjs/dsl.js +20 -0
- package/lib/cjs/dsl.js.map +1 -0
- package/lib/cjs/errors.js +13 -1
- package/lib/cjs/errors.js.map +1 -1
- package/lib/cjs/index.js +8 -2
- package/lib/cjs/index.js.map +1 -1
- package/lib/cjs/iterative-generation/activities/extractToc.js +2 -2
- package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -1
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js +1 -1
- package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -1
- package/lib/cjs/iterative-generation/activities/generatePart.js +1 -1
- package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -1
- package/lib/cjs/iterative-generation/activities/generateToc.js +1 -1
- package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -1
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js +2 -1
- package/lib/cjs/iterative-generation/iterativeGenerationWorkflow.js.map +1 -1
- package/lib/cjs/iterative-generation/utils.js +7 -4
- package/lib/cjs/iterative-generation/utils.js.map +1 -1
- package/lib/cjs/system/notifyWebhookWorkflow.js +2 -1
- package/lib/cjs/system/notifyWebhookWorkflow.js.map +1 -1
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js +1 -1
- package/lib/cjs/system/recalculateEmbeddingsWorkflow.js.map +1 -1
- package/lib/cjs/utils/blobs.js +13 -7
- package/lib/cjs/utils/blobs.js.map +1 -1
- package/lib/cjs/utils/chunks.js +14 -0
- package/lib/cjs/utils/chunks.js.map +1 -0
- package/lib/cjs/utils/client.js +6 -5
- package/lib/cjs/utils/client.js.map +1 -1
- package/lib/cjs/utils/memory.js +2 -9
- package/lib/cjs/utils/memory.js.map +1 -1
- package/lib/cjs/workflows.js +1 -3
- package/lib/cjs/workflows.js.map +1 -1
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js +7 -1
- package/lib/esm/activities/advanced/createOrUpdateDocumentFromInteractionRun.js.map +1 -1
- package/lib/esm/activities/chunkDocument.js +39 -34
- package/lib/esm/activities/chunkDocument.js.map +1 -1
- package/lib/esm/activities/createDocumentFromOther.js +1 -1
- package/lib/esm/activities/createDocumentFromOther.js.map +1 -1
- package/lib/esm/activities/executeInteraction.js +12 -7
- package/lib/esm/activities/executeInteraction.js.map +1 -1
- package/lib/esm/activities/extractDocumentText.js +25 -13
- package/lib/esm/activities/extractDocumentText.js.map +1 -1
- package/lib/esm/activities/generateDocumentProperties.js +22 -4
- package/lib/esm/activities/generateDocumentProperties.js.map +1 -1
- package/lib/esm/activities/generateEmbeddings.js +58 -69
- package/lib/esm/activities/generateEmbeddings.js.map +1 -1
- package/lib/esm/activities/generateImageRendition.js +78 -35
- package/lib/esm/activities/generateImageRendition.js.map +1 -1
- package/lib/esm/activities/generateOrAssignContentType.js +3 -7
- package/lib/esm/activities/generateOrAssignContentType.js.map +1 -1
- package/lib/esm/activities/notifyWebhook.js.map +1 -1
- package/lib/esm/conversion/image.js +80 -12
- package/lib/esm/conversion/image.js.map +1 -1
- package/lib/esm/dsl/setup/ActivityContext.js +34 -10
- package/lib/esm/dsl/setup/ActivityContext.js.map +1 -1
- package/lib/esm/dsl.js +4 -0
- package/lib/esm/dsl.js.map +1 -0
- package/lib/esm/errors.js +11 -0
- package/lib/esm/errors.js.map +1 -1
- package/lib/esm/index.js +8 -2
- package/lib/esm/index.js.map +1 -1
- package/lib/esm/iterative-generation/activities/extractToc.js +3 -3
- package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -1
- package/lib/esm/iterative-generation/activities/finalizeOutput.js +2 -2
- package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -1
- package/lib/esm/iterative-generation/activities/generatePart.js +2 -2
- package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -1
- package/lib/esm/iterative-generation/activities/generateToc.js +2 -2
- package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -1
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js +2 -1
- package/lib/esm/iterative-generation/iterativeGenerationWorkflow.js.map +1 -1
- package/lib/esm/iterative-generation/utils.js +7 -4
- package/lib/esm/iterative-generation/utils.js.map +1 -1
- package/lib/esm/system/notifyWebhookWorkflow.js +2 -1
- package/lib/esm/system/notifyWebhookWorkflow.js.map +1 -1
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js +2 -2
- package/lib/esm/system/recalculateEmbeddingsWorkflow.js.map +1 -1
- package/lib/esm/utils/blobs.js +13 -7
- package/lib/esm/utils/blobs.js.map +1 -1
- package/lib/esm/utils/chunks.js +9 -0
- package/lib/esm/utils/chunks.js.map +1 -0
- package/lib/esm/utils/client.js +5 -4
- package/lib/esm/utils/client.js.map +1 -1
- package/lib/esm/utils/memory.js +2 -7
- package/lib/esm/utils/memory.js.map +1 -1
- package/lib/esm/workflows.js +0 -1
- package/lib/esm/workflows.js.map +1 -1
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts +10 -0
- package/lib/types/activities/advanced/createOrUpdateDocumentFromInteractionRun.d.ts.map +1 -1
- package/lib/types/activities/chunkDocument.d.ts +15 -0
- package/lib/types/activities/chunkDocument.d.ts.map +1 -1
- package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -1
- package/lib/types/activities/executeInteraction.d.ts +19 -4
- package/lib/types/activities/executeInteraction.d.ts.map +1 -1
- package/lib/types/activities/extractDocumentText.d.ts.map +1 -1
- package/lib/types/activities/generateDocumentProperties.d.ts +1 -1
- package/lib/types/activities/generateDocumentProperties.d.ts.map +1 -1
- package/lib/types/activities/generateEmbeddings.d.ts +21 -17
- package/lib/types/activities/generateEmbeddings.d.ts.map +1 -1
- package/lib/types/activities/generateImageRendition.d.ts +3 -5
- package/lib/types/activities/generateImageRendition.d.ts.map +1 -1
- package/lib/types/activities/generateOrAssignContentType.d.ts.map +1 -1
- package/lib/types/activities/notifyWebhook.d.ts +1 -2
- package/lib/types/activities/notifyWebhook.d.ts.map +1 -1
- package/lib/types/conversion/image.d.ts +8 -6
- package/lib/types/conversion/image.d.ts.map +1 -1
- package/lib/types/dsl/dslProxyActivities.d.ts +2 -2
- package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -1
- package/lib/types/dsl/setup/ActivityContext.d.ts +3 -0
- package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -1
- package/lib/types/dsl.d.ts +4 -0
- package/lib/types/dsl.d.ts.map +1 -0
- package/lib/types/errors.d.ts +6 -0
- package/lib/types/errors.d.ts.map +1 -1
- package/lib/types/index.d.ts +8 -2
- package/lib/types/index.d.ts.map +1 -1
- package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -1
- package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -1
- package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -1
- package/lib/types/iterative-generation/iterativeGenerationWorkflow.d.ts.map +1 -1
- package/lib/types/iterative-generation/utils.d.ts +2 -2
- package/lib/types/iterative-generation/utils.d.ts.map +1 -1
- package/lib/types/system/notifyWebhookWorkflow.d.ts.map +1 -1
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts +2 -17
- package/lib/types/system/recalculateEmbeddingsWorkflow.d.ts.map +1 -1
- package/lib/types/utils/blobs.d.ts.map +1 -1
- package/lib/types/utils/chunks.d.ts +9 -0
- package/lib/types/utils/chunks.d.ts.map +1 -0
- package/lib/types/utils/client.d.ts +2 -2
- package/lib/types/utils/client.d.ts.map +1 -1
- package/lib/types/utils/memory.d.ts +1 -5
- package/lib/types/utils/memory.d.ts.map +1 -1
- package/lib/types/workflows.d.ts +0 -1
- package/lib/types/workflows.d.ts.map +1 -1
- package/lib/workflows-bundle.js +8311 -5790
- package/package.json +28 -10
- package/src/activities/advanced/createOrUpdateDocumentFromInteractionRun.ts +20 -1
- package/src/activities/chunkDocument.ts +62 -42
- package/src/activities/createDocumentFromOther.ts +2 -2
- package/src/activities/executeInteraction.ts +33 -12
- package/src/activities/extractDocumentText.ts +30 -14
- package/src/activities/generateDocumentProperties.ts +37 -16
- package/src/activities/generateEmbeddings.ts +91 -79
- package/src/activities/generateImageRendition.ts +100 -53
- package/src/activities/generateOrAssignContentType.ts +5 -11
- package/src/activities/notifyWebhook.ts +2 -2
- package/src/conversion/image.test.ts +110 -18
- package/src/conversion/image.ts +90 -15
- package/src/conversion/pandoc.test.ts +7 -5
- package/src/dsl/dslProxyActivities.ts +2 -2
- package/src/dsl/setup/ActivityContext.ts +60 -19
- package/src/dsl.ts +3 -0
- package/src/errors.ts +27 -6
- package/src/index.ts +9 -2
- package/src/iterative-generation/activities/extractToc.ts +3 -3
- package/src/iterative-generation/activities/finalizeOutput.ts +3 -3
- package/src/iterative-generation/activities/generatePart.ts +3 -3
- package/src/iterative-generation/activities/generateToc.ts +2 -2
- package/src/iterative-generation/iterativeGenerationWorkflow.ts +2 -1
- package/src/iterative-generation/utils.ts +10 -6
- package/src/system/notifyWebhookWorkflow.ts +3 -2
- package/src/system/recalculateEmbeddingsWorkflow.ts +2 -2
- package/src/utils/blobs.ts +12 -7
- package/src/utils/chunks.ts +17 -0
- package/src/utils/client.ts +6 -5
- package/src/utils/memory.ts +3 -8
- package/src/workflows.ts +0 -2
- package/lib/cjs/conversion/pdf.js +0 -13
- package/lib/cjs/conversion/pdf.js.map +0 -1
- package/lib/cjs/system/generateObjectText.js +0 -76
- package/lib/cjs/system/generateObjectText.js.map +0 -1
- package/lib/esm/conversion/pdf.js +0 -7
- package/lib/esm/conversion/pdf.js.map +0 -1
- package/lib/esm/system/generateObjectText.js +0 -73
- package/lib/esm/system/generateObjectText.js.map +0 -1
- package/lib/types/conversion/pdf.d.ts +0 -2
- package/lib/types/conversion/pdf.d.ts.map +0 -1
- package/lib/types/system/generateObjectText.d.ts +0 -4
- package/lib/types/system/generateObjectText.d.ts.map +0 -1
- package/src/conversion/pdf.test.ts +0 -35
- package/src/conversion/pdf.ts +0 -8
- package/src/system/generateObjectText.ts +0 -95
@@ -1,26 +1,118 @@
|
|
1
|
-
import fs from
|
2
|
-
import path from
|
3
|
-
import
|
4
|
-
import {
|
5
|
-
import {
|
1
|
+
import fs from "fs";
|
2
|
+
import path from "path";
|
3
|
+
import { exec } from "child_process";
|
4
|
+
import { promisify } from "util";
|
5
|
+
import { expect, test, vi, describe } from "vitest";
|
6
6
|
|
7
|
+
// Mock Temporal activity context
|
8
|
+
vi.mock("@temporalio/activity", () => ({
|
9
|
+
log: {
|
10
|
+
info: vi.fn(),
|
11
|
+
warn: vi.fn(),
|
12
|
+
error: vi.fn(),
|
13
|
+
},
|
14
|
+
}));
|
7
15
|
|
8
|
-
|
9
|
-
|
10
|
-
const format: keyof sharp.FormatEnum = 'jpeg';
|
11
|
-
const imageFile = fs.readFileSync(path.join(__dirname, '../../fixtures', 'cat-picture.jpg'));
|
16
|
+
// Import after mocking
|
17
|
+
import { imageResizer } from "../conversion/image";
|
12
18
|
|
13
|
-
|
19
|
+
const execAsync = promisify(exec);
|
14
20
|
|
15
|
-
|
16
|
-
|
17
|
-
|
21
|
+
describe("ImageMagick image resizing", () => {
|
22
|
+
test("should resize an image to a maximum height or width using ImageMagick", async () => {
|
23
|
+
const max_hw = 1024;
|
24
|
+
const format = "jpeg";
|
25
|
+
const inputImagePath = path.join(__dirname, "../../fixtures", "cat-picture.jpg");
|
18
26
|
|
19
|
-
|
20
|
-
|
27
|
+
// Make sure the input file exists
|
28
|
+
expect(fs.existsSync(inputImagePath)).toBe(true);
|
21
29
|
|
22
|
-
|
23
|
-
|
24
|
-
expect(metadata.format).to.equal(format);
|
30
|
+
// Call the imageResizer function with a file path
|
31
|
+
const resizedImagePath = await imageResizer(inputImagePath, max_hw, format);
|
25
32
|
|
33
|
+
// Make sure the output file exists
|
34
|
+
expect(fs.existsSync(resizedImagePath)).toBe(true);
|
35
|
+
|
36
|
+
// Use ImageMagick identify to get metadata about the resized image
|
37
|
+
const { stdout } = await execAsync(`identify -format "%w %h %m" "${resizedImagePath}"`);
|
38
|
+
const [width, height, imageFormat] = stdout.trim().split(" ");
|
39
|
+
|
40
|
+
console.log({ width, height, imageFormat });
|
41
|
+
|
42
|
+
// Check dimensions
|
43
|
+
expect(parseInt(width)).to.be.lessThanOrEqual(max_hw);
|
44
|
+
expect(parseInt(height)).to.be.lessThanOrEqual(max_hw);
|
45
|
+
|
46
|
+
// Check format (JPEG)
|
47
|
+
expect(imageFormat.toLowerCase()).to.equal("jpeg");
|
48
|
+
});
|
49
|
+
|
50
|
+
test("should throw an error for non-existent input file", async () => {
|
51
|
+
const max_hw = 1024;
|
52
|
+
const format = "jpeg";
|
53
|
+
const nonExistentPath = path.join(__dirname, "non-existent-image.jpg");
|
54
|
+
|
55
|
+
// Verify file doesn't exist
|
56
|
+
expect(fs.existsSync(nonExistentPath)).toBe(false);
|
57
|
+
|
58
|
+
// Expect the function to throw an error
|
59
|
+
await expect(imageResizer(nonExistentPath, max_hw, format)).rejects.toThrow("Input file does not exist");
|
60
|
+
});
|
61
|
+
|
62
|
+
test("should throw error with empty format", async () => {
|
63
|
+
const max_hw = 1024;
|
64
|
+
const format = "";
|
65
|
+
const inputImagePath = path.join(__dirname, "../../fixtures", "cat-picture.jpg");
|
66
|
+
|
67
|
+
// Test for empty format validation
|
68
|
+
await expect(imageResizer(inputImagePath, max_hw, format)).rejects.toThrow("Invalid format");
|
69
|
+
});
|
70
|
+
|
71
|
+
test("should create progressive/interlaced image when enabled", async () => {
|
72
|
+
const max_hw = 800;
|
73
|
+
const format = "jpeg";
|
74
|
+
const inputImagePath = path.join(__dirname, "../../fixtures", "cat-picture.jpg");
|
75
|
+
|
76
|
+
// Make sure the input file exists
|
77
|
+
expect(fs.existsSync(inputImagePath)).toBe(true);
|
78
|
+
|
79
|
+
// Call the imageResizer function with progressive=true
|
80
|
+
const resizedImagePath = await imageResizer(inputImagePath, max_hw, format, true);
|
81
|
+
|
82
|
+
// Make sure the output file exists
|
83
|
+
expect(fs.existsSync(resizedImagePath)).toBe(true);
|
84
|
+
|
85
|
+
// Use ImageMagick identify to check if the image is interlaced
|
86
|
+
const { stdout } = await execAsync(`identify -format "%[interlace]" "${resizedImagePath}"`);
|
87
|
+
const interlaceMode = stdout.trim();
|
88
|
+
|
89
|
+
console.log({ interlaceMode });
|
90
|
+
|
91
|
+
// Check that interlace is enabled (should be 'JPEG' or 'Line' for progressive JPEG)
|
92
|
+
expect(["JPEG", "Line", "Plane"]).to.include(interlaceMode);
|
93
|
+
});
|
94
|
+
|
95
|
+
test("should create non-interlaced image when progressive is disabled", async () => {
|
96
|
+
const max_hw = 800;
|
97
|
+
const format = "jpeg";
|
98
|
+
const inputImagePath = path.join(__dirname, "../../fixtures", "cat-picture.jpg");
|
99
|
+
|
100
|
+
// Make sure the input file exists
|
101
|
+
expect(fs.existsSync(inputImagePath)).toBe(true);
|
102
|
+
|
103
|
+
// Call the imageResizer function with progressive=false
|
104
|
+
const resizedImagePath = await imageResizer(inputImagePath, max_hw, format, false);
|
105
|
+
|
106
|
+
// Make sure the output file exists
|
107
|
+
expect(fs.existsSync(resizedImagePath)).toBe(true);
|
108
|
+
|
109
|
+
// Use ImageMagick identify to check if the image is interlaced
|
110
|
+
const { stdout } = await execAsync(`identify -format "%[interlace]" "${resizedImagePath}"`);
|
111
|
+
const interlaceMode = stdout.trim().toLowerCase();
|
112
|
+
|
113
|
+
console.log({ interlaceMode });
|
114
|
+
|
115
|
+
// Check that interlace is disabled (should be 'none' or empty string)
|
116
|
+
expect(["none", ""]).to.include(interlaceMode);
|
117
|
+
});
|
26
118
|
});
|
package/src/conversion/image.ts
CHANGED
@@ -1,22 +1,97 @@
|
|
1
|
-
|
2
|
-
import
|
3
|
-
|
1
|
+
import { log } from "@temporalio/activity";
|
2
|
+
import { execFile as execFileCallback } from "child_process";
|
3
|
+
import fs from "fs";
|
4
|
+
import { file } from "tmp-promise";
|
5
|
+
import { promisify } from "util";
|
6
|
+
const execFile = promisify(execFileCallback);
|
4
7
|
|
5
8
|
/**
|
6
|
-
* Resizes an image to a maximum height or width
|
7
|
-
*
|
8
|
-
* @param
|
9
|
-
* @
|
9
|
+
* Resizes an image to a maximum height or width using ImageMagick
|
10
|
+
* with progressive loading when supported
|
11
|
+
* @param inputPath Input file path
|
12
|
+
* @param max_hw Maximum height or width
|
13
|
+
* @param format Output format
|
14
|
+
* @param progressive Enable progressive loading for supported formats (defaults to true)
|
15
|
+
* @returns Path to the resized image
|
10
16
|
*/
|
11
|
-
export function imageResizer(
|
17
|
+
export async function imageResizer(
|
18
|
+
inputPath: string,
|
19
|
+
max_hw: number,
|
20
|
+
format: string,
|
21
|
+
progressive: boolean = true,
|
22
|
+
): Promise<string> {
|
23
|
+
const allowedFormats = ["jpg", "jpeg", "png", "webp"];
|
12
24
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
fit: sharp.fit.inside,
|
17
|
-
withoutEnlargement: true,
|
25
|
+
if (!format || format.trim() === "") {
|
26
|
+
throw new Error(`Invalid format: ${format}.Supported : ${allowedFormats.join(", ")}`);
|
27
|
+
}
|
18
28
|
|
19
|
-
|
29
|
+
//check that max_hw is valid
|
30
|
+
if (!Number.isInteger(max_hw) || max_hw <= 0) {
|
31
|
+
throw new Error(`Invalid max_hw value: ${max_hw}`);
|
32
|
+
}
|
20
33
|
|
21
|
-
|
34
|
+
//check that inputPath exists
|
35
|
+
if (!fs.existsSync(inputPath)) {
|
36
|
+
throw new Error(`Input file does not exist: ${inputPath}`);
|
37
|
+
}
|
38
|
+
|
39
|
+
// Create a temporary file
|
40
|
+
const { path: outputPath, cleanup } = await file({ postfix: `.${format}` });
|
41
|
+
try {
|
42
|
+
// Check if input file exists
|
43
|
+
if (!fs.existsSync(inputPath)) {
|
44
|
+
throw new Error(`Input file does not exist: ${inputPath}`);
|
45
|
+
}
|
46
|
+
// Validate max_hw
|
47
|
+
if (!Number.isInteger(max_hw) || max_hw <= 0) {
|
48
|
+
throw new Error(`Invalid max_hw value: ${max_hw}`);
|
49
|
+
}
|
50
|
+
|
51
|
+
// Progressive loading options
|
52
|
+
let conversionOption = "";
|
53
|
+
|
54
|
+
// Only add progressive option for formats that support it
|
55
|
+
if (progressive) {
|
56
|
+
// JPEG and some other formats support progressive loading
|
57
|
+
const lowerFormat = format.toLowerCase();
|
58
|
+
if (lowerFormat === "jpg" || lowerFormat === "jpeg") {
|
59
|
+
conversionOption = "-interlace JPEG";
|
60
|
+
log.info(`Enabling interlaced ${lowerFormat.toUpperCase()} format`);
|
61
|
+
} else if (lowerFormat === "png") {
|
62
|
+
conversionOption = "-interlace PNG";
|
63
|
+
log.info(`Enabling interlaced ${lowerFormat.toUpperCase()} format`);
|
64
|
+
} else if (lowerFormat === "gif") {
|
65
|
+
conversionOption = "-interlace GIF";
|
66
|
+
log.info(`Enabling interlaced ${lowerFormat.toUpperCase()} format`);
|
67
|
+
}
|
68
|
+
}
|
22
69
|
|
70
|
+
log.info(`Resizing image using ImageMagick: ${inputPath} -> ${outputPath}`);
|
71
|
+
|
72
|
+
const { stderr } = await execFile("convert", [
|
73
|
+
inputPath,
|
74
|
+
"-resize",
|
75
|
+
`${max_hw}x${max_hw}>`,
|
76
|
+
...(conversionOption ? conversionOption.split(" ") : []),
|
77
|
+
outputPath,
|
78
|
+
]);
|
79
|
+
|
80
|
+
if (stderr) {
|
81
|
+
log.warn(`ImageMagick warning: ${stderr}`);
|
82
|
+
}
|
83
|
+
|
84
|
+
// Verify output exists and has content
|
85
|
+
if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
|
86
|
+
throw new Error(`ImageMagick conversion failed: output file not created or empty`);
|
87
|
+
}
|
88
|
+
|
89
|
+
return outputPath;
|
90
|
+
} catch (error) {
|
91
|
+
// Clean up the temporary file
|
92
|
+
await cleanup();
|
93
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
94
|
+
log.error(`Image conversion failed: ${errorMessage}`);
|
95
|
+
throw new Error(`Image conversion failed: ${errorMessage}`);
|
96
|
+
}
|
97
|
+
}
|
@@ -9,14 +9,16 @@ let testEnv: TestWorkflowEnvironment;
|
|
9
9
|
let activityContext: MockActivityEnvironment;
|
10
10
|
|
11
11
|
beforeAll(async () => {
|
12
|
-
|
13
|
-
|
12
|
+
testEnv = await TestWorkflowEnvironment.createLocal();
|
13
|
+
activityContext = new MockActivityEnvironment();
|
14
14
|
});
|
15
15
|
|
16
16
|
|
17
17
|
// Add more test cases for other file types (ODT, DOCX) if needed
|
18
18
|
test('should convert docx to markdown', async () => {
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
const filepath = path.join(__dirname, '../../fixtures', 'us-ciia.docx');
|
20
|
+
console.log("Converting file from", filepath);
|
21
|
+
const docx = fs.readFileSync(filepath);
|
22
|
+
const result = await activityContext.run(manyToMarkdown, Buffer.from(docx), 'docx');
|
23
|
+
expect(result).to.include('confidential');
|
22
24
|
});
|
@@ -1,12 +1,12 @@
|
|
1
1
|
import { ActivityOptions, proxyActivities } from "@temporalio/workflow";
|
2
|
-
import { DSLActivityExecutionPayload, WorkflowExecutionPayload } from "@vertesia/common";
|
2
|
+
import { DSLActivityExecutionPayload, WorkflowExecutionBaseParams, WorkflowExecutionPayload } from "@vertesia/common";
|
3
3
|
|
4
4
|
export interface DslActivityFunction<ParamsT extends Record<string, any> = any, ReturnT = any> {
|
5
5
|
(payload: DSLActivityExecutionPayload<ParamsT>): Promise<ReturnT>;
|
6
6
|
}
|
7
7
|
|
8
8
|
export interface DslSimplifiedActivityFunction<ParamsT = any, ReturnT = any> {
|
9
|
-
(payload:
|
9
|
+
(payload: WorkflowExecutionBaseParams, params: ParamsT): Promise<ReturnT>;
|
10
10
|
}
|
11
11
|
|
12
12
|
export function dslProxyActivities<
|
@@ -1,13 +1,17 @@
|
|
1
|
+
import { log, activityInfo } from "@temporalio/activity";
|
1
2
|
import { VertesiaClient } from "@vertesia/client";
|
2
|
-
import {
|
3
|
-
|
3
|
+
import {
|
4
|
+
DSLActivityExecutionPayload,
|
5
|
+
DSLWorkflowExecutionPayload,
|
6
|
+
Project,
|
7
|
+
WorkflowExecutionPayload,
|
8
|
+
} from "@vertesia/common";
|
4
9
|
import { NoDocumentFound, WorkflowParamNotFound } from "../../errors.js";
|
5
|
-
import {
|
10
|
+
import { getProjectFromToken } from "../../utils/auth.js";
|
11
|
+
import { getVertesiaClient } from "../../utils/client.js";
|
6
12
|
import { Vars } from "../vars.js";
|
7
13
|
import { getFetchProvider, registerFetchProviderFactory } from "./fetch/index.js";
|
8
14
|
import { DocumentProvider, DocumentTypeProvider, InteractionRunProvider } from "./fetch/providers.js";
|
9
|
-
import { getProjectFromToken } from "../../utils/auth.js";
|
10
|
-
|
11
15
|
|
12
16
|
registerFetchProviderFactory(DocumentProvider.ID, DocumentProvider.factory);
|
13
17
|
registerFetchProviderFactory(DocumentTypeProvider.ID, DocumentTypeProvider.factory);
|
@@ -17,7 +21,11 @@ export class ActivityContext<ParamsT extends Record<string, any>> {
|
|
17
21
|
client: VertesiaClient;
|
18
22
|
_project?: Promise<Project | undefined>;
|
19
23
|
|
20
|
-
constructor(
|
24
|
+
constructor(
|
25
|
+
public payload: DSLActivityExecutionPayload<ParamsT>,
|
26
|
+
client: VertesiaClient,
|
27
|
+
public params: ParamsT,
|
28
|
+
) {
|
21
29
|
this.client = client;
|
22
30
|
this.fetchProject = this.fetchProject.bind(this);
|
23
31
|
}
|
@@ -29,24 +37,54 @@ export class ActivityContext<ParamsT extends Record<string, any>> {
|
|
29
37
|
get objectId() {
|
30
38
|
const objectId = this.payload.objectIds && this.payload.objectIds[0];
|
31
39
|
if (!objectId) {
|
32
|
-
log.error(
|
33
|
-
throw new WorkflowParamNotFound(
|
40
|
+
log.error("No objectId found in payload");
|
41
|
+
throw new WorkflowParamNotFound(
|
42
|
+
"objectIds[0]",
|
43
|
+
(this.payload as WorkflowExecutionPayload as DSLWorkflowExecutionPayload).workflow,
|
44
|
+
);
|
34
45
|
}
|
35
46
|
return objectId;
|
36
47
|
}
|
37
48
|
|
49
|
+
get activityInfo() {
|
50
|
+
return activityInfo();
|
51
|
+
}
|
52
|
+
|
53
|
+
get runId() {
|
54
|
+
const runId = activityInfo().workflowExecution.runId;
|
55
|
+
if (!runId) {
|
56
|
+
log.error("No runId found in activityInfo");
|
57
|
+
throw new WorkflowParamNotFound(
|
58
|
+
"runId",
|
59
|
+
(this.payload as WorkflowExecutionPayload as DSLWorkflowExecutionPayload).workflow,
|
60
|
+
);
|
61
|
+
}
|
62
|
+
return runId;
|
63
|
+
}
|
64
|
+
|
65
|
+
get workflowId() {
|
66
|
+
const workflowId = activityInfo().workflowExecution.workflowId;
|
67
|
+
if (!workflowId) {
|
68
|
+
log.error("No workflowId found in activityInfo");
|
69
|
+
throw new WorkflowParamNotFound(
|
70
|
+
"workflowId",
|
71
|
+
(this.payload as WorkflowExecutionPayload as DSLWorkflowExecutionPayload).workflow,
|
72
|
+
);
|
73
|
+
}
|
74
|
+
return workflowId;
|
75
|
+
}
|
76
|
+
|
38
77
|
fetchProject() {
|
39
78
|
if (!this._project) {
|
40
79
|
this._project = _fetchProject(this.client, this.payload);
|
41
80
|
}
|
42
81
|
return this._project;
|
43
82
|
}
|
44
|
-
|
45
83
|
}
|
46
84
|
|
47
|
-
|
48
|
-
|
49
|
-
|
85
|
+
export async function setupActivity<ParamsT extends Record<string, any>>(
|
86
|
+
payload: DSLActivityExecutionPayload<ParamsT>,
|
87
|
+
) {
|
50
88
|
const isDebugMode = !!payload.debug_mode;
|
51
89
|
|
52
90
|
const vars = new Vars({
|
@@ -56,13 +94,17 @@ export async function setupActivity<ParamsT extends Record<string, any>>(payload
|
|
56
94
|
|
57
95
|
//}
|
58
96
|
if (isDebugMode) {
|
59
|
-
log.info(`Setting up activity ${payload.activity.name}`, {
|
97
|
+
log.info(`Setting up activity ${payload.activity.name}`, {
|
98
|
+
config: payload.config,
|
99
|
+
activity: payload.activity,
|
100
|
+
params: payload.params,
|
101
|
+
vars,
|
102
|
+
});
|
60
103
|
}
|
61
104
|
|
62
|
-
const client =
|
105
|
+
const client = getVertesiaClient(payload);
|
63
106
|
const fetchSpecs = payload.activity.fetch;
|
64
107
|
if (fetchSpecs) {
|
65
|
-
|
66
108
|
const keys = Object.keys(fetchSpecs);
|
67
109
|
if (keys.length > 0) {
|
68
110
|
// create a new Vars instance to store the fetched data
|
@@ -74,7 +116,7 @@ export async function setupActivity<ParamsT extends Record<string, any>>(payload
|
|
74
116
|
fetchSpec = { ...fetchSpec, query };
|
75
117
|
}
|
76
118
|
|
77
|
-
const provider =
|
119
|
+
const provider = getFetchProvider(client, fetchSpec);
|
78
120
|
|
79
121
|
log.info(`Fetching data for ${key} with provider ${provider.name}`, { fetchSpec });
|
80
122
|
const result = await provider.fetch(fetchSpec);
|
@@ -84,7 +126,7 @@ export async function setupActivity<ParamsT extends Record<string, any>>(payload
|
|
84
126
|
} else {
|
85
127
|
vars.setValue(key, result);
|
86
128
|
}
|
87
|
-
} else if (fetchSpec.on_not_found ===
|
129
|
+
} else if (fetchSpec.on_not_found === "throw") {
|
88
130
|
throw new NoDocumentFound("No documents found for: " + JSON.stringify(fetchSpec));
|
89
131
|
} else {
|
90
132
|
vars.setValue(key, null);
|
@@ -94,12 +136,11 @@ export async function setupActivity<ParamsT extends Record<string, any>>(payload
|
|
94
136
|
}
|
95
137
|
|
96
138
|
const params = vars.resolve() as ParamsT;
|
97
|
-
log.info(`Activity ${payload.activity.name} setup complete
|
139
|
+
log.info(`Activity ${payload.activity.name} setup complete`);
|
98
140
|
|
99
141
|
return new ActivityContext<ParamsT>(payload, client, params);
|
100
142
|
}
|
101
143
|
|
102
|
-
|
103
144
|
async function _fetchProject(client: VertesiaClient, payload: WorkflowExecutionPayload) {
|
104
145
|
const project = await getProjectFromToken(payload.auth_token);
|
105
146
|
return project ? await client.projects.retrieve(project.id) : undefined;
|
package/src/dsl.ts
ADDED
package/src/errors.ts
CHANGED
@@ -1,24 +1,45 @@
|
|
1
1
|
import { DSLActivitySpec, DSLWorkflowSpec } from "@vertesia/common";
|
2
2
|
|
3
|
-
|
4
3
|
export class NoDocumentFound extends Error {
|
5
|
-
constructor(
|
4
|
+
constructor(
|
5
|
+
message: string,
|
6
|
+
public ids?: string[],
|
7
|
+
) {
|
6
8
|
super(message);
|
7
9
|
this.name = "NoDocumentFound";
|
8
|
-
this.ids = ids
|
10
|
+
this.ids = ids;
|
9
11
|
}
|
10
12
|
}
|
11
13
|
|
12
14
|
export class ActivityParamNotFound extends Error {
|
13
|
-
constructor(
|
15
|
+
constructor(
|
16
|
+
public paramName: string,
|
17
|
+
public activity: DSLActivitySpec,
|
18
|
+
) {
|
14
19
|
super(`Required parameter ${paramName} not found in activity ${activity.name}`);
|
15
20
|
this.name = "ActivityParamNotFound";
|
16
21
|
}
|
17
22
|
}
|
18
23
|
|
24
|
+
export class ActivityParamInvalid extends Error {
|
25
|
+
constructor(
|
26
|
+
public paramName: string,
|
27
|
+
public activity: DSLActivitySpec,
|
28
|
+
reason?: string,
|
29
|
+
) {
|
30
|
+
super(`${paramName} in activity ${activity.name} is invalid${reason ? ` ${reason}` : ""}`);
|
31
|
+
this.name = "ActivityParamInvalid";
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
19
35
|
export class WorkflowParamNotFound extends Error {
|
20
|
-
constructor(
|
36
|
+
constructor(
|
37
|
+
public paramName: string,
|
38
|
+
public workflow?: DSLWorkflowSpec,
|
39
|
+
) {
|
21
40
|
super(`Required parameter ${paramName} not found in workflow ${workflow?.name}`);
|
22
41
|
this.name = "WorkflowParamNotFound";
|
23
42
|
}
|
24
|
-
}
|
43
|
+
}
|
44
|
+
|
45
|
+
export const WF_NON_RETRYABLE_ERRORS = ["NoDocumentFound", "ActivityParamNotFound", "WorkflowParamNotFound"];
|
package/src/index.ts
CHANGED
@@ -20,9 +20,16 @@ export * from "./activities/generateDocumentProperties.js";
|
|
20
20
|
export * from "./activities/generateEmbeddings.js";
|
21
21
|
export * from "./activities/generateImageRendition.js";
|
22
22
|
export * from "./activities/generateOrAssignContentType.js";
|
23
|
-
export * from "./activities/setDocumentStatus.js";
|
24
23
|
export * from "./activities/notifyWebhook.js";
|
24
|
+
export * from "./activities/setDocumentStatus.js";
|
25
25
|
export * from "./iterative-generation/activities/index.js";
|
26
26
|
|
27
|
+
export * from "./dsl/setup/ActivityContext.js";
|
28
|
+
export * from "./errors.js";
|
27
29
|
export * from "./result-types.js";
|
28
|
-
export * from "./
|
30
|
+
export * from "./utils/blobs.js";
|
31
|
+
export * from "./utils/client.js";
|
32
|
+
export * from "./utils/memory.js";
|
33
|
+
export * from "./utils/tokens.js";
|
34
|
+
|
35
|
+
export * from "./conversion/image.js";
|
@@ -1,10 +1,10 @@
|
|
1
|
+
import { log } from "@temporalio/activity";
|
1
2
|
import { WorkflowExecutionPayload } from "@vertesia/common";
|
2
3
|
import { parse as parseYaml } from "yaml";
|
3
|
-
import {
|
4
|
+
import { getVertesiaClient } from "../../utils/client.js";
|
4
5
|
import { buildAndPublishMemoryPack, loadMemoryPack } from "../../utils/memory.js";
|
5
6
|
import { IterativeGenerationPayload, OutputMemoryMeta, Toc, TocIndex } from "../types.js";
|
6
7
|
import { tocIndex } from "../utils.js";
|
7
|
-
import { log } from "@temporalio/activity";
|
8
8
|
|
9
9
|
/**
|
10
10
|
* This activity is called if the toc was provided in the payload. Otherwise
|
@@ -15,7 +15,7 @@ import { log } from "@temporalio/activity";
|
|
15
15
|
export async function it_gen_extractToc(payload: WorkflowExecutionPayload): Promise<TocIndex | null> {
|
16
16
|
const vars = payload.vars as IterativeGenerationPayload;
|
17
17
|
const memory = vars.memory;
|
18
|
-
const client =
|
18
|
+
const client = getVertesiaClient(payload);
|
19
19
|
|
20
20
|
const inMemory = await loadMemoryPack(client, `${memory}/input`);
|
21
21
|
let tocJson: string | null = null;
|
@@ -1,6 +1,6 @@
|
|
1
|
-
import { WorkflowExecutionPayload } from "@vertesia/common";
|
2
1
|
import { log } from "@temporalio/activity";
|
3
|
-
import {
|
2
|
+
import { WorkflowExecutionPayload } from "@vertesia/common";
|
3
|
+
import { getVertesiaClient } from "../../utils/client.js";
|
4
4
|
import { expandVars } from "../../utils/expand-vars.js";
|
5
5
|
import { buildAndPublishMemoryPack, loadMemoryPack } from "../../utils/memory.js";
|
6
6
|
import { IterativeGenerationPayload, Section, SECTION_ID_PLACEHOLDER, TocSection } from "../types.js";
|
@@ -9,7 +9,7 @@ export async function it_gen_finalizeOutput(payload: WorkflowExecutionPayload):
|
|
9
9
|
const vars = payload.vars as IterativeGenerationPayload;
|
10
10
|
|
11
11
|
const memory = vars.memory;
|
12
|
-
const client =
|
12
|
+
const client = getVertesiaClient(payload);
|
13
13
|
const inMemory = await loadMemoryPack(client, `${memory}/input`);
|
14
14
|
const outMemory = await loadMemoryPack(client, `${memory}/output`);
|
15
15
|
|
@@ -1,14 +1,14 @@
|
|
1
|
+
import { ApplicationFailure } from "@temporalio/workflow";
|
1
2
|
import { WorkflowExecutionPayload } from "@vertesia/common";
|
2
3
|
import { MemoryPack } from "@vertesia/memory";
|
3
|
-
import {
|
4
|
-
import { getClient } from "../../utils/client.js";
|
4
|
+
import { getVertesiaClient } from "../../utils/client.js";
|
5
5
|
import { buildAndPublishMemoryPack, loadMemoryPack } from "../../utils/memory.js";
|
6
6
|
import { IterativeGenerationPayload, OutputMemoryMeta, Section, TocPart, TocSection } from "../types.js";
|
7
7
|
import { executeWithVars, expectMemoryIsConsistent } from "../utils.js";
|
8
8
|
|
9
9
|
export async function it_gen_generatePart(payload: WorkflowExecutionPayload, path: number[]) {
|
10
10
|
const vars = payload.vars as IterativeGenerationPayload;
|
11
|
-
const client =
|
11
|
+
const client = getVertesiaClient(payload);
|
12
12
|
const memory = vars.memory;
|
13
13
|
|
14
14
|
const [sectionIndex, partIndex] = path;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import { WorkflowExecutionPayload } from "@vertesia/common";
|
2
|
-
import {
|
2
|
+
import { getVertesiaClient } from "../../utils/client.js";
|
3
3
|
import { buildAndPublishMemoryPack } from "../../utils/memory.js";
|
4
4
|
import { IterativeGenerationPayload, OutputMemoryMeta, Toc, TocIndex } from "../types.js";
|
5
5
|
import { executeWithVars, tocIndex } from "../utils.js";
|
@@ -80,7 +80,7 @@ export async function it_gen_generateToc(payload: WorkflowExecutionPayload): Pro
|
|
80
80
|
|
81
81
|
const schema = vars.toc_schema || defaultTocSchema;
|
82
82
|
|
83
|
-
const client =
|
83
|
+
const client = getVertesiaClient(payload);
|
84
84
|
|
85
85
|
const run = await executeWithVars(client, vars.interaction, vars, undefined, schema);
|
86
86
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import { WorkflowExecutionPayload } from "@vertesia/common";
|
2
2
|
|
3
3
|
import { log, proxyActivities } from "@temporalio/workflow";
|
4
|
+
import { WF_NON_RETRYABLE_ERRORS } from "../errors.js";
|
4
5
|
import * as activities from "./activities/index.js";
|
5
6
|
import { IterativeGenerationPayload, PartIndex, SECTION_ID_PLACEHOLDER } from "./types.js";
|
6
7
|
|
@@ -16,7 +17,7 @@ const {
|
|
16
17
|
backoffCoefficient: 2,
|
17
18
|
maximumAttempts: 20,
|
18
19
|
maximumInterval: 100 * 30 * 1000, //ms
|
19
|
-
nonRetryableErrorTypes:
|
20
|
+
nonRetryableErrorTypes: WF_NON_RETRYABLE_ERRORS,
|
20
21
|
},
|
21
22
|
});
|
22
23
|
|
@@ -2,15 +2,16 @@ import { VertesiaClient } from "@vertesia/client";
|
|
2
2
|
import { ExecutionRun } from "@vertesia/common";
|
3
3
|
import { ApplicationFailure } from "@temporalio/workflow";
|
4
4
|
import { OutputMemoryMeta, PartIndex, Toc, TocIndex, TocSection } from "./types.js";
|
5
|
+
import { ModelOptions, TextFallbackOptions } from "@llumiverse/core";
|
5
6
|
|
7
|
+
//TODO: For whole file, support for options beyond max_tokens and temperature and multiple modalities.
|
6
8
|
export interface ExecuteOptions {
|
7
9
|
interaction: string;
|
8
10
|
memory: string;
|
9
11
|
memory_mapping?: Record<string, any>;
|
10
12
|
environment?: string;
|
11
13
|
model?: string;
|
12
|
-
|
13
|
-
temperature?: number;
|
14
|
+
model_options?: ModelOptions;
|
14
15
|
result_schema?: Record<string, any>;
|
15
16
|
}
|
16
17
|
|
@@ -24,8 +25,7 @@ export async function execute<T = any>(client: VertesiaClient, options: ExecuteO
|
|
24
25
|
config: {
|
25
26
|
environment: options.environment,
|
26
27
|
model: options.model,
|
27
|
-
|
28
|
-
temperature: options.temperature,
|
28
|
+
model_options: options.model_options,
|
29
29
|
}
|
30
30
|
});
|
31
31
|
}
|
@@ -36,14 +36,18 @@ export function executeWithVars<T = any>(client: VertesiaClient, interaction: st
|
|
36
36
|
} else {
|
37
37
|
mapping = vars.input_mapping;
|
38
38
|
}
|
39
|
+
const model_options: TextFallbackOptions = {
|
40
|
+
_option_id: "text-fallback",
|
41
|
+
max_tokens: vars.max_tokens,
|
42
|
+
temperature: vars.temperature
|
43
|
+
}
|
39
44
|
return execute(client, {
|
40
45
|
interaction: interaction,
|
41
46
|
memory: `${vars.memory}/input`,
|
42
47
|
memory_mapping: mapping,
|
43
48
|
environment: vars.environment,
|
44
49
|
model: vars.model,
|
45
|
-
|
46
|
-
temperature: vars.temperature,
|
50
|
+
model_options: model_options,
|
47
51
|
result_schema: result_schema
|
48
52
|
});
|
49
53
|
}
|