@vertesia/workflow 1.4.0-dev.20260615.042549Z → 1.4.0-dev.20260629.130134Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vertesia/workflow",
3
- "version": "1.4.0-dev.20260615.042549Z",
3
+ "version": "1.4.0-dev.20260629.130134Z",
4
4
  "type": "module",
5
5
  "description": "Vertesia workflow DSL",
6
6
  "main": "./lib/index.js",
@@ -15,23 +15,23 @@
15
15
  "license": "Apache-2.0",
16
16
  "devDependencies": {
17
17
  "@smithy/types": "^4.14.3",
18
- "@temporalio/proto": "^1.17.2",
19
- "@temporalio/testing": "^1.17.2",
20
- "@temporalio/worker": "^1.17.2",
18
+ "@temporalio/proto": "^1.18.1",
19
+ "@temporalio/testing": "^1.18.1",
20
+ "@temporalio/worker": "^1.18.1",
21
21
  "@types/jsonwebtoken": "^9.0.10",
22
- "@types/node": "^24.12.4",
22
+ "@types/node": "^24.13.2",
23
23
  "@types/papaparse": "^5.5.2",
24
24
  "@types/tmp": "^0.2.6",
25
25
  "typescript": "^6.0.3",
26
- "vitest": "^4.1.8",
26
+ "vitest": "^4.1.9",
27
27
  "@vertesia/tsconfig": "0.1.0"
28
28
  },
29
29
  "dependencies": {
30
- "@aws-sdk/client-s3": "^3.1061.0",
31
- "@aws-sdk/client-textract": "^3.1061.0",
32
- "@aws-sdk/credential-providers": "^3.1061.0",
33
- "@temporalio/activity": "^1.17.2",
34
- "@temporalio/workflow": "^1.17.2",
30
+ "@aws-sdk/client-s3": "^3.1068.0",
31
+ "@aws-sdk/client-textract": "^3.1068.0",
32
+ "@aws-sdk/credential-providers": "^3.1068.0",
33
+ "@temporalio/activity": "^1.18.1",
34
+ "@temporalio/workflow": "^1.18.1",
35
35
  "fast-deep-equal": "^3.1.3",
36
36
  "jsonwebtoken": "^9.0.3",
37
37
  "mime": "^4.1.0",
@@ -44,12 +44,12 @@
44
44
  "tiktoken": "^1.0.22",
45
45
  "tmp": "^0.2.7",
46
46
  "tmp-promise": "^3.0.3",
47
- "undici": "^7.27.0",
47
+ "undici": "^7.27.2",
48
48
  "yaml": "^2.9.0",
49
- "@llumiverse/common": "1.3.0",
50
- "@vertesia/client": "1.4.0-dev.20260615.042549Z",
51
- "@vertesia/common": "1.4.0-dev.20260615.042549Z",
52
- "@vertesia/api-fetch-client": "1.4.0-dev.20260615.042549Z"
49
+ "@vertesia/client": "1.4.0-dev.20260629.130134Z",
50
+ "@llumiverse/common": "1.4.0-dev.20260629.090753Z",
51
+ "@vertesia/common": "1.4.0-dev.20260629.130134Z",
52
+ "@vertesia/api-fetch-client": "1.4.0-dev.20260629.130134Z"
53
53
  },
54
54
  "exports": {
55
55
  ".": {
@@ -107,7 +107,8 @@
107
107
  "lint": "biome lint src",
108
108
  "test": "vitest run",
109
109
  "typecheck:test": "tsc -p tsconfig.test.json --noEmit",
110
- "build": "rm -rf ./lib ./tsconfig.tsbuildinfo && tsc && node ./bin/bundle-workflows.mjs lib/workflows.js lib/workflows-bundle.js",
111
- "clean": "rm -rf ./lib tsconfig.tsbuildinfo"
110
+ "clean:lib": "rimraf ./lib ./tsconfig.tsbuildinfo",
111
+ "build": "pnpm run clean:lib && tsc -p tsconfig.json && node ./bin/bundle-workflows.mjs lib/workflows.js lib/workflows-bundle.js",
112
+ "clean": "rimraf ./node_modules ./lib ./tsconfig.tsbuildinfo"
112
113
  }
113
114
  }
@@ -102,6 +102,10 @@ export async function generateEmbeddings(payload: DSLActivityExecutionPayload<Ge
102
102
  throw new DocumentNotFoundError('Document not found', [objectId]);
103
103
  }
104
104
 
105
+ if (!document.content) {
106
+ throw new DocumentNotFoundError('Document content not found', [objectId]);
107
+ }
108
+
105
109
  let res:
106
110
  | Awaited<ReturnType<typeof generateTextEmbeddings>>
107
111
  | Awaited<ReturnType<typeof generateImageEmbeddings>>
@@ -168,19 +172,14 @@ async function generateTextEmbeddings({ document, client, type, config, force }:
168
172
  };
169
173
  }
170
174
 
171
- const sourceText =
172
- type === SupportedEmbeddingTypes.text
173
- ? document.text
174
- : document.properties
175
- ? JSON.stringify(document.properties)
176
- : undefined;
177
-
178
- if (!sourceText) {
175
+ if (type === SupportedEmbeddingTypes.text && !document.text) {
176
+ return { id: document.id, status: 'failed', message: 'no text found' };
177
+ }
178
+ if (type === SupportedEmbeddingTypes.properties && !document?.properties) {
179
179
  return {
180
180
  id: document.id,
181
- type,
182
- status: 'skipped',
183
- message: type === SupportedEmbeddingTypes.text ? 'no text found' : 'no properties found',
181
+ status: 'failed',
182
+ message: 'no properties found',
184
183
  };
185
184
  }
186
185
 
@@ -191,12 +190,12 @@ async function generateTextEmbeddings({ document, client, type, config, force }:
191
190
  );
192
191
  }
193
192
 
194
- const sourceEtag =
195
- type === SupportedEmbeddingTypes.text ? (document.text_etag ?? md5(sourceText)) : md5(sourceText);
193
+ // Compute text etag for comparison
194
+ const textEtag = document.text_etag ?? (document.text ? md5(document.text) : undefined);
196
195
 
197
196
  // Skip if embeddings already exist with matching etag (unless force=true)
198
197
  const existingEmbedding = document.embeddings?.[type];
199
- if (!force && existingEmbedding?.etag && existingEmbedding.etag === sourceEtag) {
198
+ if (!force && existingEmbedding?.etag && textEtag && existingEmbedding.etag === textEtag) {
200
199
  log.debug(`Skipping ${type} embeddings for document ${document.id} - etag unchanged`);
201
200
  return {
202
201
  id: document.id,
@@ -207,7 +206,15 @@ async function generateTextEmbeddings({ document, client, type, config, force }:
207
206
  }
208
207
 
209
208
  // Count tokens if needed, do not rely on existing token count
210
- const tokenCount = countTokens(sourceText).count;
209
+ let tokenCount: number | undefined;
210
+ if (type === SupportedEmbeddingTypes.text && document.text) {
211
+ tokenCount = countTokens(document.text).count;
212
+ }
213
+
214
+ if (type === SupportedEmbeddingTypes.properties && document.properties) {
215
+ const propertiesText = JSON.stringify(document.properties);
216
+ tokenCount = countTokens(propertiesText).count;
217
+ }
211
218
 
212
219
  const maxTokens = config.max_tokens ?? 8000;
213
220
 
@@ -224,7 +231,7 @@ async function generateTextEmbeddings({ document, client, type, config, force }:
224
231
  } else {
225
232
  log.debug(`Generating ${type} embeddings for document`);
226
233
 
227
- const res = await generateEmbeddingsFromStudio(sourceText, environment, client);
234
+ const res = await generateEmbeddingsFromStudio(JSON.stringify(document[type]), environment, client);
228
235
  const values = res?.results?.[0]?.outputs?.[0]?.values;
229
236
  if (!values) {
230
237
  return {
@@ -240,7 +247,7 @@ async function generateTextEmbeddings({ document, client, type, config, force }:
240
247
  await client.objects.setEmbedding(document.id, type, {
241
248
  values,
242
249
  model: res.model,
243
- etag: sourceEtag,
250
+ etag: textEtag,
244
251
  });
245
252
 
246
253
  return {
@@ -14,16 +14,6 @@ export type DslSimplifiedActivityFunction<ParamsT extends object = Record<string
14
14
  params: ParamsT,
15
15
  ) => Promise<ReturnT>;
16
16
 
17
- export function stripWorkflowContinuationFromVars<T>(vars: T): T {
18
- if (!vars || typeof vars !== 'object' || Array.isArray(vars) || !('_continuation' in vars)) {
19
- return vars;
20
- }
21
-
22
- const rest = { ...(vars as Record<string, unknown>) };
23
- delete rest._continuation;
24
- return rest as T;
25
- }
26
-
27
17
  export function dslProxyActivities<ActivitiesT extends object>(workflowName: string, options: ActivityOptions = {}) {
28
18
  type DslActivities = {
29
19
  [K in keyof ActivitiesT]: ActivitiesT[K] extends DslActivityFunction<infer ParamsT, infer ReturnT>
@@ -44,7 +34,6 @@ export function dslProxyActivities<ActivitiesT extends object>(workflowName: str
44
34
  return (payload: WorkflowExecutionPayload, params: Record<string, unknown>) => {
45
35
  return activityFn({
46
36
  ...payload,
47
- vars: stripWorkflowContinuationFromVars(payload.vars),
48
37
  activity: {
49
38
  name: prop as string,
50
39
  },
@@ -93,7 +93,7 @@ export class ActivityContext<ParamsT extends object> {
93
93
  */
94
94
  get file(): WorkflowInputFile {
95
95
  const input = this.payload.input;
96
- if (!input || input.inputType !== 'files') {
96
+ if (input?.inputType !== 'files') {
97
97
  throw new WorkflowExecutionError('Activity expects files but received objectIds');
98
98
  }
99
99
  // TypeScript now knows input is { inputType: 'files', files: WorkflowInputFile[] }
@@ -115,7 +115,7 @@ export class ActivityContext<ParamsT extends object> {
115
115
  */
116
116
  get files(): WorkflowInputFile[] {
117
117
  const input = this.payload.input;
118
- if (!input || input.inputType !== 'files') {
118
+ if (input?.inputType !== 'files') {
119
119
  throw new WorkflowExecutionError('Activity expects files but received objectIds');
120
120
  }
121
121
  // TypeScript now knows input is { inputType: 'files', files: WorkflowInputFile[] }
package/src/errors.ts CHANGED
@@ -116,9 +116,4 @@ export const WF_NON_RETRYABLE_ERRORS = [
116
116
  'TokenExpiredError',
117
117
  'ZenoClientNotFoundError',
118
118
  'WorkflowExecutionError',
119
- // Temporal raises NotFoundError when an activity isn't registered on the worker
120
- // (e.g. a new tool whose activity barrel export was missed). Retrying can never
121
- // make it appear within a worker version, and the retried failure carries the
122
- // full activity list which blows the payload size limit — so fail fast.
123
- 'NotFoundError',
124
119
  ];
@@ -2,17 +2,19 @@
2
2
  * get a zeno client for a given token
3
3
  */
4
4
 
5
- import type { FETCH_FN } from '@vertesia/api-fetch-client';
6
5
  import { decodeJWT, VertesiaClient, type VertesiaClientProps } from '@vertesia/client';
7
6
  import type { WorkflowExecutionBaseParams } from '@vertesia/common';
8
- import { Agent } from 'undici';
9
7
  import { WorkflowParamNotFoundError } from '../errors.js';
10
8
 
11
- const DEFAULT_WORKFLOW_FETCH_TIMEOUT_MS = 30 * 60 * 1000;
9
+ // Short default timeout for ordinary workflow -> server/store calls (object GETs, status updates,
10
+ // etc.). A stale/dead pooled connection (a server pod scaled down/rolled mid-request) used to hang
11
+ // for the whole 30-minute undici headersTimeout; this bounds it to seconds so it fails fast and the
12
+ // activity is retried. The long path — synchronous interaction execution, which blocks on the model —
13
+ // sets its own long per-request timeout in @vertesia/client (executeInteraction*), overriding this.
14
+ // Override the default via VERTESIA_WORKFLOW_FETCH_TIMEOUT_MS (0/false disables it).
15
+ const DEFAULT_WORKFLOW_FETCH_TIMEOUT_MS = 60 * 1000;
12
16
  const WORKFLOW_FETCH_TIMEOUT_ENV = 'VERTESIA_WORKFLOW_FETCH_TIMEOUT_MS';
13
17
 
14
- let workflowFetch: Promise<FETCH_FN> | undefined;
15
-
16
18
  export function getVertesiaClient(payload: WorkflowExecutionBaseParams<unknown>) {
17
19
  return new VertesiaClient(getVertesiaClientOptions(payload));
18
20
  }
@@ -41,46 +43,32 @@ export function getVertesiaClientOptions(payload: WorkflowExecutionBaseParams<un
41
43
  storeUrl: payload.config.store_url,
42
44
  tokenServerUrl: token.iss,
43
45
  apikey: payload.auth_token,
44
- fetch: getWorkflowFetch(),
46
+ timeout: parseWorkflowFetchTimeoutMs(),
45
47
  };
46
48
  }
47
49
 
48
- function getWorkflowFetch(): Promise<FETCH_FN> {
49
- workflowFetch ??= createWorkflowFetch();
50
- return workflowFetch;
51
- }
52
-
53
- async function createWorkflowFetch(): Promise<FETCH_FN> {
54
- if (typeof globalThis.fetch !== 'function') {
55
- throw new Error('No Fetch implementation found');
56
- }
57
-
58
- const timeoutMs = parseWorkflowFetchTimeoutMs();
59
- if (timeoutMs === 0) {
60
- return globalThis.fetch.bind(globalThis);
61
- }
62
-
63
- const dispatcher = new Agent({
64
- headersTimeout: timeoutMs,
65
- bodyTimeout: timeoutMs,
66
- });
67
-
68
- return (input, init) =>
69
- globalThis.fetch(input, {
70
- ...init,
71
- dispatcher,
72
- } as unknown as RequestInit);
73
- }
74
-
75
- function parseWorkflowFetchTimeoutMs(): number {
76
- const raw = process.env[WORKFLOW_FETCH_TIMEOUT_ENV];
50
+ function parseWorkflowFetchTimeoutMs(): number | false {
51
+ const raw = typeof process !== 'undefined' ? process.env?.[WORKFLOW_FETCH_TIMEOUT_ENV] : undefined;
77
52
  if (!raw) {
78
53
  return DEFAULT_WORKFLOW_FETCH_TIMEOUT_MS;
79
54
  }
80
55
 
81
- const parsed = Number.parseInt(raw, 10);
82
- if (!Number.isFinite(parsed) || parsed < 0) {
56
+ // Only an explicit 0/false disables the fail-fast timeout.
57
+ const normalized = raw.trim().toLowerCase();
58
+ if (normalized === '0' || normalized === 'false') {
59
+ return false;
60
+ }
61
+
62
+ // Require a plain positive integer count of milliseconds. parseInt() is too lenient — it
63
+ // reads "30s" as 30 (i.e. 30ms, off by 1000x) and "60000ms" as 60000 — so validate the raw
64
+ // string first. Any other garbage (typo, negative, unit suffix) keeps the fail-fast default
65
+ // rather than silently disabling the timeout, which would reintroduce the multi-minute hang.
66
+ if (!/^\d+$/.test(normalized) || Number.parseInt(normalized, 10) <= 0) {
67
+ console.warn(
68
+ `[workflow] Invalid ${WORKFLOW_FETCH_TIMEOUT_ENV}="${raw}"; ` +
69
+ `expected a positive integer in milliseconds. Falling back to ${DEFAULT_WORKFLOW_FETCH_TIMEOUT_MS}ms`,
70
+ );
83
71
  return DEFAULT_WORKFLOW_FETCH_TIMEOUT_MS;
84
72
  }
85
- return parsed;
73
+ return Number.parseInt(normalized, 10);
86
74
  }
@@ -1,114 +0,0 @@
1
- import { MockActivityEnvironment } from '@temporalio/testing';
2
- import type { VertesiaClient } from '@vertesia/client';
3
- import {
4
- ContentEventName,
5
- type ContentObject,
6
- type DSLActivityExecutionPayload,
7
- SupportedEmbeddingTypes,
8
- } from '@vertesia/common';
9
- import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
10
- import type { ActivityContext } from '../dsl/setup/ActivityContext.js';
11
- import { type GenerateEmbeddingsParams, generateEmbeddings } from './generateEmbeddings.js';
12
-
13
- vi.mock('../dsl/setup/ActivityContext.js', async (importOriginal) => {
14
- const actual = await importOriginal<typeof import('../dsl/setup/ActivityContext.js')>();
15
- return { ...actual, setupActivity: vi.fn() };
16
- });
17
-
18
- let testEnv: MockActivityEnvironment;
19
-
20
- beforeAll(() => {
21
- testEnv = new MockActivityEnvironment();
22
- });
23
-
24
- beforeEach(() => {
25
- vi.clearAllMocks();
26
- });
27
-
28
- const createPayload = (params: GenerateEmbeddingsParams): DSLActivityExecutionPayload<GenerateEmbeddingsParams> => {
29
- const activityParams: GenerateEmbeddingsParams & Record<string, unknown> = { ...params };
30
- return {
31
- auth_token: 'mock-token',
32
- account_id: 'test-account',
33
- project_id: 'test-project',
34
- params,
35
- config: { studio_url: 'http://mock-studio', store_url: 'http://mock-store' },
36
- workflow_name: 'StandardDocumentIntake',
37
- event: ContentEventName.create,
38
- objectIds: ['properties-only-object'],
39
- input: { inputType: 'objectIds', objectIds: ['properties-only-object'] },
40
- vars: {},
41
- activity: { name: 'generateEmbeddings', params: activityParams },
42
- };
43
- };
44
-
45
- describe('generateEmbeddings', () => {
46
- it('should generate property embeddings for an object without content', async () => {
47
- const { setupActivity } = await import('../dsl/setup/ActivityContext.js');
48
- const document = {
49
- id: 'properties-only-object',
50
- properties: {
51
- title: 'Properties-only object',
52
- category: 'metadata',
53
- },
54
- } satisfies Partial<ContentObject>;
55
- const embeddingResponse = {
56
- model: 'embedding-model',
57
- results: [{ outputs: [{ values: [0.1, 0.2, 0.3] }] }],
58
- };
59
- const client = {
60
- objects: {
61
- retrieve: vi.fn().mockResolvedValue(document),
62
- setEmbedding: vi.fn().mockResolvedValue(undefined),
63
- },
64
- environments: {
65
- embeddings: vi.fn().mockResolvedValue(embeddingResponse),
66
- },
67
- } as unknown as VertesiaClient;
68
- const params = {
69
- type: SupportedEmbeddingTypes.properties,
70
- force: false,
71
- } satisfies GenerateEmbeddingsParams;
72
-
73
- vi.mocked(setupActivity).mockResolvedValue({
74
- client,
75
- objectId: document.id,
76
- params,
77
- fetchProject: vi.fn().mockResolvedValue({
78
- name: 'Test Project',
79
- namespace: 'test-project',
80
- configuration: {
81
- embeddings: {
82
- [SupportedEmbeddingTypes.properties]: {
83
- enabled: true,
84
- environment: 'test-environment',
85
- max_tokens: 8000,
86
- },
87
- },
88
- },
89
- }),
90
- } as unknown as ActivityContext<GenerateEmbeddingsParams>);
91
-
92
- const result = await testEnv.run(generateEmbeddings, createPayload(params));
93
-
94
- expect(result).toEqual({
95
- id: document.id,
96
- type: SupportedEmbeddingTypes.properties,
97
- status: 'completed',
98
- len: 3,
99
- });
100
- expect(client.objects.retrieve).toHaveBeenCalledWith(
101
- document.id,
102
- '+text +parts +embeddings +tokens +properties',
103
- );
104
- expect(client.environments.embeddings).toHaveBeenCalledWith('test-environment', {
105
- inputs: [{ type: 'text', text: JSON.stringify(document.properties) }],
106
- model: undefined,
107
- });
108
- expect(client.objects.setEmbedding).toHaveBeenCalledWith(document.id, SupportedEmbeddingTypes.properties, {
109
- values: embeddingResponse.results[0].outputs[0].values,
110
- model: embeddingResponse.model,
111
- etag: expect.any(String),
112
- });
113
- });
114
- });
@@ -1,23 +0,0 @@
1
- import { describe, expect, it } from 'vitest';
2
- import { stripWorkflowContinuationFromVars } from './dslProxyActivities.js';
3
-
4
- describe('stripWorkflowContinuationFromVars', () => {
5
- it('removes continue-as-new state from activity vars', () => {
6
- const vars = {
7
- interaction: 'sys:AppDeveloper',
8
- _continuation: {
9
- conversationState: { large: true },
10
- },
11
- };
12
-
13
- expect(stripWorkflowContinuationFromVars(vars)).toEqual({
14
- interaction: 'sys:AppDeveloper',
15
- });
16
- expect(vars).toHaveProperty('_continuation');
17
- });
18
-
19
- it('returns primitive and array vars unchanged', () => {
20
- expect(stripWorkflowContinuationFromVars(undefined)).toBeUndefined();
21
- expect(stripWorkflowContinuationFromVars(['a'])).toEqual(['a']);
22
- });
23
- });