@workglow/task-graph 0.2.37 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +174 -46
- package/dist/browser.js +639 -368
- package/dist/browser.js.map +19 -15
- package/dist/bun.js +639 -368
- package/dist/bun.js.map +19 -15
- package/dist/cache/CacheJanitor.d.ts +27 -0
- package/dist/cache/CacheJanitor.d.ts.map +1 -0
- package/dist/cache/CachePolicy.d.ts +16 -0
- package/dist/cache/CachePolicy.d.ts.map +1 -0
- package/dist/cache/CacheRegistry.d.ts +30 -0
- package/dist/cache/CacheRegistry.d.ts.map +1 -0
- package/dist/cache/RunPrivateCacheRepo.d.ts +56 -0
- package/dist/cache/RunPrivateCacheRepo.d.ts.map +1 -0
- package/dist/cache/index.d.ts +10 -0
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/common.d.ts +1 -0
- package/dist/common.d.ts.map +1 -1
- package/dist/node.js +639 -368
- package/dist/node.js.map +19 -15
- package/dist/storage/TaskOutputRepository.d.ts +40 -4
- package/dist/storage/TaskOutputRepository.d.ts.map +1 -1
- package/dist/storage/TaskOutputTabularRepository.d.ts +27 -0
- package/dist/storage/TaskOutputTabularRepository.d.ts.map +1 -1
- package/dist/task/CacheCoordinator.d.ts +17 -0
- package/dist/task/CacheCoordinator.d.ts.map +1 -1
- package/dist/task/FallbackTask.d.ts +0 -1
- package/dist/task/FallbackTask.d.ts.map +1 -1
- package/dist/task/FallbackTaskRunner.d.ts +8 -0
- package/dist/task/FallbackTaskRunner.d.ts.map +1 -1
- package/dist/task/ITask.d.ts +21 -1
- package/dist/task/ITask.d.ts.map +1 -1
- package/dist/task/Task.d.ts +50 -0
- package/dist/task/Task.d.ts.map +1 -1
- package/dist/task/TaskJSON.d.ts +0 -13
- package/dist/task/TaskJSON.d.ts.map +1 -1
- package/dist/task/TaskRunner.d.ts +28 -0
- package/dist/task/TaskRunner.d.ts.map +1 -1
- package/dist/task-graph/Conversions.d.ts.map +1 -1
- package/dist/task-graph/StreamPump.d.ts +8 -0
- package/dist/task-graph/StreamPump.d.ts.map +1 -1
- package/dist/task-graph/TaskGraph.d.ts +7 -0
- package/dist/task-graph/TaskGraph.d.ts.map +1 -1
- package/dist/task-graph/TaskGraphRunner.d.ts +45 -0
- package/dist/task-graph/TaskGraphRunner.d.ts.map +1 -1
- package/package.json +7 -7
- package/src/EXECUTION_MODEL.md +91 -2
package/README.md
CHANGED
|
@@ -182,7 +182,7 @@ You can define schemas using plain JSON Schema, TypeBox, or Zod. Here are exampl
|
|
|
182
182
|
#### Using Plain JSON Schema
|
|
183
183
|
|
|
184
184
|
```typescript
|
|
185
|
-
import { Task, IExecuteContext } from "@workglow/task-graph";
|
|
185
|
+
import { Task, IExecuteContext, type CachePolicy } from "@workglow/task-graph";
|
|
186
186
|
import { DataPortSchema, FromSchema } from "@workglow/util";
|
|
187
187
|
|
|
188
188
|
const MyInputSchema = {
|
|
@@ -232,7 +232,7 @@ class TextProcessorTask extends Task<MyInput, MyOutput> {
|
|
|
232
232
|
static readonly title = "Text Processor";
|
|
233
233
|
static readonly description = "Processes text";
|
|
234
234
|
static readonly category = "Text Processing";
|
|
235
|
-
static readonly
|
|
235
|
+
static readonly cachePolicy: CachePolicy = { kind: "deterministic" };
|
|
236
236
|
|
|
237
237
|
static inputSchema() {
|
|
238
238
|
return MyInputSchema;
|
|
@@ -268,7 +268,7 @@ class TextProcessorTask extends Task<MyInput, MyOutput> {
|
|
|
268
268
|
TypeBox schemas are JSON Schema compatible and can be used directly:
|
|
269
269
|
|
|
270
270
|
```typescript
|
|
271
|
-
import { Task, IExecuteContext } from "@workglow/task-graph";
|
|
271
|
+
import { Task, IExecuteContext, type CachePolicy } from "@workglow/task-graph";
|
|
272
272
|
import { Type } from "@sinclair/typebox";
|
|
273
273
|
import { DataPortSchema, FromSchema } from "@workglow/util";
|
|
274
274
|
|
|
@@ -291,7 +291,7 @@ class TextProcessorTask extends Task<MyInput, MyOutput> {
|
|
|
291
291
|
static readonly title = "Text Processor";
|
|
292
292
|
static readonly description = "Processes text";
|
|
293
293
|
static readonly category = "Text Processing";
|
|
294
|
-
static readonly
|
|
294
|
+
static readonly cachePolicy: CachePolicy = { kind: "deterministic" };
|
|
295
295
|
|
|
296
296
|
static inputSchema() {
|
|
297
297
|
return MyInputSchema;
|
|
@@ -343,7 +343,7 @@ class TextProcessorTask extends Task<MyInput, MyOutput> {
|
|
|
343
343
|
Zod 4 has built-in JSON Schema support using the `.toJSONSchema()` method:
|
|
344
344
|
|
|
345
345
|
```typescript
|
|
346
|
-
import { Task, IExecuteContext } from "@workglow/task-graph";
|
|
346
|
+
import { Task, IExecuteContext, type CachePolicy } from "@workglow/task-graph";
|
|
347
347
|
import { z } from "zod";
|
|
348
348
|
import { DataPortSchema } from "@workglow/util";
|
|
349
349
|
|
|
@@ -371,7 +371,7 @@ class TextProcessorTask extends Task<MyInput, MyOutput> {
|
|
|
371
371
|
static readonly title = "Text Processor";
|
|
372
372
|
static readonly description = "Processes text";
|
|
373
373
|
static readonly category = "Text Processing";
|
|
374
|
-
static readonly
|
|
374
|
+
static readonly cachePolicy: CachePolicy = { kind: "deterministic" };
|
|
375
375
|
|
|
376
376
|
static inputSchema() {
|
|
377
377
|
return MyInputSchema;
|
|
@@ -711,26 +711,174 @@ const result = await workflow.run();
|
|
|
711
711
|
|
|
712
712
|
## Storage and Caching
|
|
713
713
|
|
|
714
|
-
###
|
|
714
|
+
### Cache Policy
|
|
715
715
|
|
|
716
|
-
|
|
716
|
+
Every task declares how its outputs may be cached through a `CachePolicy`:
|
|
717
|
+
|
|
718
|
+
```typescript
|
|
719
|
+
type CachePolicy =
|
|
720
|
+
| { kind: "deterministic" } // same inputs → same outputs; safe to share across runs
|
|
721
|
+
| { kind: "private" } // non-deterministic but worth caching; scoped to one run
|
|
722
|
+
| { kind: "none" }; // do not cache (side-effecting tasks)
|
|
723
|
+
```
|
|
724
|
+
|
|
725
|
+
The default is `{ kind: "deterministic" }`. Side-effecting tasks (writes to external systems, sends messages) declare `{ kind: "none" }`. Non-deterministic tasks worth caching for the lifetime of a single run (image generation without a seed, model calls without a temperature lock) declare `{ kind: "private" }` — their outputs are namespaced by `runId` and visible only to that run and its restarts.
|
|
726
|
+
|
|
727
|
+
For tasks whose policy depends on inputs (a seed turns "private" into "deterministic"), override `getCachePolicy(inputs)`:
|
|
728
|
+
|
|
729
|
+
```typescript
|
|
730
|
+
class AiImageOutputTask extends Task<ImageInput, ImageOutput> {
|
|
731
|
+
static readonly type = "AiImageOutputTask";
|
|
732
|
+
// Static default used when the instance method is not overridden.
|
|
733
|
+
static readonly cachePolicy: CachePolicy = { kind: "private" };
|
|
734
|
+
|
|
735
|
+
override getCachePolicy(inputs: ImageInput): CachePolicy {
|
|
736
|
+
return inputs.seed !== undefined
|
|
737
|
+
? { kind: "deterministic" }
|
|
738
|
+
: { kind: "private" };
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
```
|
|
742
|
+
|
|
743
|
+
### CacheRegistry: two slots
|
|
744
|
+
|
|
745
|
+
The runner picks a repository per task by reading `CACHE_REGISTRY` from the `ServiceRegistry`. The registry has exactly two slots:
|
|
746
|
+
|
|
747
|
+
```typescript
|
|
748
|
+
interface CacheRegistry {
|
|
749
|
+
deterministic?: TaskOutputRepository;
|
|
750
|
+
private?: TaskOutputRepository;
|
|
751
|
+
}
|
|
752
|
+
```
|
|
753
|
+
|
|
754
|
+
Both slots are optional. A missing slot is a silent no-op — the task still runs, it just runs uncached. Apps wire the slots they care about:
|
|
755
|
+
|
|
756
|
+
```typescript
|
|
757
|
+
import {
|
|
758
|
+
CACHE_REGISTRY,
|
|
759
|
+
DefaultCacheRegistry,
|
|
760
|
+
TaskOutputPrimaryKeyNames,
|
|
761
|
+
TaskOutputSchema,
|
|
762
|
+
TaskOutputTabularRepository,
|
|
763
|
+
} from "@workglow/task-graph";
|
|
764
|
+
import { ServiceRegistry } from "@workglow/util";
|
|
765
|
+
import { Sqlite, SqliteTabularStorage } from "@workglow/sqlite/storage";
|
|
766
|
+
|
|
767
|
+
await Sqlite.init();
|
|
768
|
+
|
|
769
|
+
const deterministic = new TaskOutputTabularRepository({
|
|
770
|
+
tabularRepository: new SqliteTabularStorage(
|
|
771
|
+
"./cache.sqlite",
|
|
772
|
+
"task_outputs_deterministic",
|
|
773
|
+
TaskOutputSchema,
|
|
774
|
+
TaskOutputPrimaryKeyNames,
|
|
775
|
+
["createdAt"]
|
|
776
|
+
),
|
|
777
|
+
});
|
|
778
|
+
|
|
779
|
+
const privateBacking = new TaskOutputTabularRepository({
|
|
780
|
+
tabularRepository: new SqliteTabularStorage(
|
|
781
|
+
"./cache.sqlite",
|
|
782
|
+
"task_outputs_private",
|
|
783
|
+
TaskOutputSchema,
|
|
784
|
+
TaskOutputPrimaryKeyNames,
|
|
785
|
+
["createdAt"]
|
|
786
|
+
),
|
|
787
|
+
});
|
|
788
|
+
|
|
789
|
+
const registry = new ServiceRegistry();
|
|
790
|
+
registry.registerInstance(
|
|
791
|
+
CACHE_REGISTRY,
|
|
792
|
+
new DefaultCacheRegistry({ deterministic, private: privateBacking })
|
|
793
|
+
);
|
|
794
|
+
|
|
795
|
+
// TaskGraph.run takes (input, config) — runId/registry are run config, not input.
|
|
796
|
+
await graph.run({}, { registry, runId: "run-" + crypto.randomUUID() });
|
|
797
|
+
```
|
|
798
|
+
|
|
799
|
+
The runner constructs a per-run `RunPrivateCacheRepo` wrapper over the `private` slot, namespaced by `runId`. The wrapper exists only for the duration of the run; the rows it writes survive in the backing store until either explicit cleanup (on successful completion) or the TTL janitor sweeps them (after a crashed run is abandoned).
|
|
800
|
+
|
|
801
|
+
### Run identity and durable execution
|
|
802
|
+
|
|
803
|
+
A run is identified by an opaque `runId` string supplied by the caller of `.run()` in the run config (the second argument; the first argument is graph input):
|
|
804
|
+
|
|
805
|
+
```typescript
|
|
806
|
+
await graph.run({}, { runId, registry });
|
|
807
|
+
```
|
|
808
|
+
|
|
809
|
+
- **First start** of a user-triggered run: generate a fresh `runId` (UUID is typical) and persist it alongside the rest of the run metadata.
|
|
810
|
+
- **Restart** after a crash: re-dispatch with the **same** `runId`. The new process constructs a fresh in-memory scheduler but the durable `private` repo still holds the outputs of every task that completed before the crash. Cache hits skip that work; the run finishes from where it effectively left off.
|
|
811
|
+
- **Concurrent runs** of the same workflow get different `runId`s, so they never see each other's private-tier outputs.
|
|
812
|
+
|
|
813
|
+
The runner does not generate `runId` for you. That is the caller's job — only the caller knows whether this `.run()` call is a fresh start or a restart.
|
|
814
|
+
|
|
815
|
+
If the registered `private` slot is present and the graph contains any task whose policy may resolve to `kind: "private"` (statically or via `getCachePolicy(inputs)`), the runner rejects the run synchronously when `runId` is missing. Graphs without a private slot (or without any private-policy task) don't need a `runId`.
|
|
816
|
+
|
|
817
|
+
#### Cleanup
|
|
818
|
+
|
|
819
|
+
- On `succeeded`, the runner awaits `privateRepo.clearRun()` before resolving so that a restart with the same `runId` cannot accidentally hit stale entries from the previous attempt. The wrapper already knows its `runId`, so the method takes no arguments.
|
|
820
|
+
- On crash (no terminal status reached), nothing happens at the cache layer — the entries stay on disk so the restart can find them.
|
|
821
|
+
- For abandoned runs (crashed and never restarted), schedule the `CacheJanitor`:
|
|
822
|
+
|
|
823
|
+
```typescript
|
|
824
|
+
import { CacheJanitor } from "@workglow/task-graph";
|
|
825
|
+
|
|
826
|
+
const janitor = new CacheJanitor({ privateBacking });
|
|
827
|
+
// Sweep run-private rows older than 24 hours.
|
|
828
|
+
await janitor.sweepStaleRunPrivate(24 * 60 * 60 * 1000);
|
|
829
|
+
```
|
|
830
|
+
|
|
831
|
+
The janitor only touches rows with the `__run:` prefix that `RunPrivateCacheRepo` writes; deterministic-tier rows are never affected.
|
|
832
|
+
|
|
833
|
+
#### Durability warning
|
|
834
|
+
|
|
835
|
+
At run start the runner checks whether the registered `private` repo reports `isDurable() === true`. If a graph contains a `private`-policy task but the repo is backed by, say, in-memory storage, a one-time warning is logged: restart survival cannot work against a non-durable backend. For production, point the `private` slot at SQLite, Postgres, or another durable store.
|
|
836
|
+
|
|
837
|
+
### Cache key and `cacheVersion`
|
|
838
|
+
|
|
839
|
+
The cache key is:
|
|
840
|
+
|
|
841
|
+
```
|
|
842
|
+
sha256(taskType + getCacheVersion() + fingerprint(inputs))
|
|
843
|
+
```
|
|
844
|
+
|
|
845
|
+
`fingerprint(inputs)` normalizes inputs using the existing `PortCodec` so that ports with `format` annotations hash by their stable wire representation.
|
|
846
|
+
|
|
847
|
+
`Task.version` (a static number, default `1`) feeds `getCacheVersion()`, which walks the prototype chain and combines each ancestor's version. Bump `version` when the task's semantics change (new prompt template, new defaults, fixed-bug-in-implementation) to force misses for all prior keys:
|
|
848
|
+
|
|
849
|
+
```typescript
|
|
850
|
+
class SummarizeTask extends Task<...> {
|
|
851
|
+
static readonly type = "SummarizeTask";
|
|
852
|
+
static readonly version = 3; // bump → all old cache entries become stale
|
|
853
|
+
static readonly cachePolicy: CachePolicy = { kind: "deterministic" };
|
|
854
|
+
// ...
|
|
855
|
+
}
|
|
856
|
+
```
|
|
857
|
+
|
|
858
|
+
Override `getCacheVersion()` only if you need a different versioning story (e.g., include the runtime model hash).
|
|
859
|
+
|
|
860
|
+
### End-to-end example
|
|
717
861
|
|
|
718
862
|
```typescript
|
|
719
863
|
import {
|
|
720
864
|
Task,
|
|
721
865
|
TaskGraph,
|
|
722
866
|
Workflow,
|
|
867
|
+
CACHE_REGISTRY,
|
|
868
|
+
DefaultCacheRegistry,
|
|
723
869
|
TaskOutputPrimaryKeyNames,
|
|
724
870
|
TaskOutputSchema,
|
|
725
871
|
TaskOutputTabularRepository,
|
|
872
|
+
type CachePolicy,
|
|
726
873
|
} from "@workglow/task-graph";
|
|
874
|
+
import { ServiceRegistry } from "@workglow/util";
|
|
727
875
|
import { InMemoryTabularStorage } from "@workglow/storage";
|
|
728
876
|
import { DataPortSchema } from "@workglow/util";
|
|
729
877
|
|
|
730
|
-
// A
|
|
878
|
+
// A task with deterministic cache policy that simulates expensive work
|
|
731
879
|
class ExpensiveTask extends Task<{ n: number }, { result: number }> {
|
|
732
880
|
static readonly type = "ExpensiveTask";
|
|
733
|
-
static readonly
|
|
881
|
+
static readonly cachePolicy: CachePolicy = { kind: "deterministic" };
|
|
734
882
|
|
|
735
883
|
static inputSchema() {
|
|
736
884
|
return {
|
|
@@ -761,57 +909,38 @@ class ExpensiveTask extends Task<{ n: number }, { result: number }> {
|
|
|
761
909
|
}
|
|
762
910
|
}
|
|
763
911
|
|
|
764
|
-
//
|
|
765
|
-
|
|
912
|
+
// Build a CacheRegistry with a deterministic slot. (Private slot omitted here —
|
|
913
|
+
// ExpensiveTask is deterministic, so it never needs the private tier.)
|
|
914
|
+
const deterministic = new TaskOutputTabularRepository({
|
|
766
915
|
tabularRepository: new InMemoryTabularStorage(TaskOutputSchema, TaskOutputPrimaryKeyNames, [
|
|
767
916
|
"createdAt",
|
|
768
917
|
]),
|
|
769
918
|
});
|
|
770
919
|
|
|
771
|
-
|
|
772
|
-
|
|
920
|
+
const registry = new ServiceRegistry();
|
|
921
|
+
registry.registerInstance(
|
|
922
|
+
CACHE_REGISTRY,
|
|
923
|
+
new DefaultCacheRegistry({ deterministic })
|
|
924
|
+
);
|
|
925
|
+
|
|
926
|
+
const graph = new TaskGraph();
|
|
773
927
|
graph.addTask(new ExpensiveTask({ n: 42 }, { id: "exp" }));
|
|
774
928
|
|
|
929
|
+
// TaskGraph.run takes (input, config). registry/runId live in config.
|
|
775
930
|
let t = Date.now();
|
|
776
|
-
await graph.run();
|
|
931
|
+
await graph.run({}, { registry, runId: "run-1" });
|
|
777
932
|
const firstRunMs = Date.now() - t;
|
|
778
933
|
|
|
779
934
|
t = Date.now();
|
|
780
|
-
await graph.run(); //
|
|
935
|
+
await graph.run({}, { registry, runId: "run-2" }); // different run, same inputs → cache hit
|
|
781
936
|
const secondRunMs = Date.now() - t;
|
|
782
937
|
|
|
783
938
|
console.log({ firstRunMs, secondRunMs });
|
|
784
939
|
// e.g. { firstRunMs: ~500, secondRunMs: ~1-5 }
|
|
785
|
-
|
|
786
|
-
// Example 2: Direct Task caching across instances
|
|
787
|
-
const missTask = new ExpensiveTask({ n: 43 }, { outputCache });
|
|
788
|
-
t = Date.now();
|
|
789
|
-
await missTask.run(); // cache miss -> compute and store
|
|
790
|
-
const missMs = Date.now() - t;
|
|
791
|
-
|
|
792
|
-
const hitTask = new ExpensiveTask({ n: 43 }, { outputCache });
|
|
793
|
-
t = Date.now();
|
|
794
|
-
await hitTask.run(); // cache hit -> instant
|
|
795
|
-
const hitMs = Date.now() - t;
|
|
796
|
-
|
|
797
|
-
console.log({ missMs, hitMs });
|
|
798
|
-
// e.g. { missMs: ~500, hitMs: ~1-5 }
|
|
799
|
-
|
|
800
|
-
// Example 3: Workflow with the same cache
|
|
801
|
-
const workflow = new Workflow(outputCache);
|
|
802
|
-
workflow.addTask(new ExpensiveTask({ n: 10 }));
|
|
803
|
-
|
|
804
|
-
t = Date.now();
|
|
805
|
-
await workflow.run(); // compute
|
|
806
|
-
const wfFirstMs = Date.now() - t;
|
|
807
|
-
|
|
808
|
-
t = Date.now();
|
|
809
|
-
await workflow.run(); // cached
|
|
810
|
-
const wfSecondMs = Date.now() - t;
|
|
811
|
-
|
|
812
|
-
console.log({ wfFirstMs, wfSecondMs });
|
|
813
940
|
```
|
|
814
941
|
|
|
942
|
+
The deterministic slot is shared across runs — that is the whole point. The private slot is per-run on read and per-run on cleanup, but the underlying storage handle is long-lived (one connection, many runs). Set up the registry once at app startup; bind it to every `.run()` call.
|
|
943
|
+
|
|
815
944
|
### Task Graph Persistence
|
|
816
945
|
|
|
817
946
|
```typescript
|
|
@@ -859,9 +988,8 @@ import {
|
|
|
859
988
|
FsFolderTabularStorage,
|
|
860
989
|
InMemoryTabularStorage,
|
|
861
990
|
IndexedDbTabularStorage,
|
|
862
|
-
SqliteTabularStorage,
|
|
863
991
|
} from "@workglow/storage";
|
|
864
|
-
import { Sqlite } from "@workglow/storage
|
|
992
|
+
import { Sqlite, SqliteTabularStorage } from "@workglow/sqlite/storage";
|
|
865
993
|
|
|
866
994
|
// In-memory (e.g. tests)
|
|
867
995
|
const memoryOutput = new TaskOutputTabularRepository({
|