@workglow/dataset 0.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +1134 -0
  3. package/dist/browser.js +1053 -0
  4. package/dist/browser.js.map +20 -0
  5. package/dist/bun.js +1054 -0
  6. package/dist/bun.js.map +20 -0
  7. package/dist/common-server.d.ts +7 -0
  8. package/dist/common-server.d.ts.map +1 -0
  9. package/dist/common.d.ts +17 -0
  10. package/dist/common.d.ts.map +1 -0
  11. package/dist/document/Document.d.ts +50 -0
  12. package/dist/document/Document.d.ts.map +1 -0
  13. package/dist/document/DocumentDataset.d.ts +79 -0
  14. package/dist/document/DocumentDataset.d.ts.map +1 -0
  15. package/dist/document/DocumentDatasetRegistry.d.ts +29 -0
  16. package/dist/document/DocumentDatasetRegistry.d.ts.map +1 -0
  17. package/dist/document/DocumentNode.d.ts +31 -0
  18. package/dist/document/DocumentNode.d.ts.map +1 -0
  19. package/dist/document/DocumentSchema.d.ts +1668 -0
  20. package/dist/document/DocumentSchema.d.ts.map +1 -0
  21. package/dist/document/DocumentStorageSchema.d.ts +43 -0
  22. package/dist/document/DocumentStorageSchema.d.ts.map +1 -0
  23. package/dist/document/StructuralParser.d.ts +30 -0
  24. package/dist/document/StructuralParser.d.ts.map +1 -0
  25. package/dist/document-chunk/DocumentChunkDataset.d.ts +79 -0
  26. package/dist/document-chunk/DocumentChunkDataset.d.ts.map +1 -0
  27. package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts +29 -0
  28. package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts.map +1 -0
  29. package/dist/document-chunk/DocumentChunkSchema.d.ts +55 -0
  30. package/dist/document-chunk/DocumentChunkSchema.d.ts.map +1 -0
  31. package/dist/node.js +1053 -0
  32. package/dist/node.js.map +20 -0
  33. package/dist/types.d.ts +7 -0
  34. package/dist/types.d.ts.map +1 -0
  35. package/dist/util/DatasetSchema.d.ts +85 -0
  36. package/dist/util/DatasetSchema.d.ts.map +1 -0
  37. package/package.json +54 -0
  38. package/src/document-chunk/README.md +362 -0
package/README.md ADDED
@@ -0,0 +1,1134 @@
1
+ # @workglow/storage
2
+
3
+ Modular storage solutions for Workglow.AI platform with multiple backend implementations. Provides consistent interfaces for key-value storage, tabular data storage, and job queue persistence.
4
+
5
+ - [Quick Start](#quick-start)
6
+ - [Installation](#installation)
7
+ - [Core Concepts](#core-concepts)
8
+ - [Type Safety](#type-safety)
9
+ - [Environment Compatibility](#environment-compatibility)
10
+ - [Import Patterns](#import-patterns)
11
+ - [Storage Types](#storage-types)
12
+ - [Key-Value Storage](#key-value-storage)
13
+ - [Basic Usage](#basic-usage)
14
+ - [Environment-Specific Examples](#environment-specific-examples)
15
+ - [Bulk Operations](#bulk-operations)
16
+ - [Event Handling](#event-handling)
17
+ - [Tabular Storage](#tabular-storage)
18
+ - [Schema Definition](#schema-definition)
19
+ - [CRUD Operations](#crud-operations)
20
+ - [Bulk Operations](#bulk-operations-1)
21
+ - [Searching and Filtering](#searching-and-filtering)
22
+ - [Environment-Specific Tabular Storage](#environment-specific-tabular-storage)
23
+ - [Queue Storage](#queue-storage)
24
+ - [Basic Job Queue Operations](#basic-job-queue-operations)
25
+ - [Job Management](#job-management)
26
+ - [Environment-Specific Usage](#environment-specific-usage)
27
+ - [Browser Environment](#browser-environment)
28
+ - [Node.js Environment](#nodejs-environment)
29
+ - [Bun Environment](#bun-environment)
30
+ - [Advanced Features](#advanced-features)
31
+ - [Repository Registry](#repository-registry)
32
+ - [Event-Driven Architecture](#event-driven-architecture)
33
+ - [Compound Primary Keys](#compound-primary-keys)
34
+ - [Custom File Layout (KV on filesystem)](#custom-file-layout-kv-on-filesystem)
35
+ - [API Reference](#api-reference)
36
+ - [IKvStorage\<Key, Value\>](#ikvrepositorykey-value)
37
+ - [ITabularStorage\<Schema, PrimaryKeyNames\>](#itabularrepositoryschema-primarykeynames)
38
+ - [IQueueStorage\<Input, Output\>](#iqueuestorageinput-output)
39
+ - [Examples](#examples)
40
+ - [User Management System](#user-management-system)
41
+ - [Configuration Management](#configuration-management)
42
+ - [Testing](#testing)
43
+ - [Writing Tests for Your Storage Usage](#writing-tests-for-your-storage-usage)
44
+ - [License](#license)
45
+
46
+ ## Quick Start
47
+
48
+ ```typescript
49
+ // Key-Value Storage (simple data)
50
+ import { InMemoryKvStorage } from "@workglow/storage";
51
+
52
+ const kvStore = new InMemoryKvStorage<string, { name: string; age: number }>();
53
+ await kvStore.put("user:123", { name: "Alice", age: 30 });
54
+ const kvUser = await kvStore.get("user:123"); // { name: "Alice", age: 30 }
55
+ ```
56
+
57
+ ```typescript
58
+ // Tabular Storage (structured data with schemas)
59
+ import { InMemoryTabularStorage } from "@workglow/storage";
60
+ import { JsonSchema } from "@workglow/util";
61
+
62
+ const userSchema = {
63
+ type: "object",
64
+ properties: {
65
+ id: { type: "string" },
66
+ name: { type: "string" },
67
+ email: { type: "string" },
68
+ age: { type: "number" },
69
+ },
70
+ required: ["id", "name", "email", "age"],
71
+ additionalProperties: false,
72
+ } as const satisfies JsonSchema;
73
+
74
+ const userRepo = new InMemoryTabularStorage<typeof userSchema, ["id"]>(
75
+ userSchema,
76
+ ["id"], // primary key
77
+ ["email"] // additional indexes
78
+ );
79
+
80
+ await userRepo.put({ id: "123", name: "Alice", email: "alice@example.com", age: 30 });
81
+ const user = await userRepo.get({ id: "123" });
82
+ ```
83
+
84
+ ## Installation
85
+
86
+ ```bash
87
+ # Using bun (recommended)
88
+ bun install @workglow/storage
89
+
90
+ # Using npm
91
+ npm install @workglow/storage
92
+
93
+ # Using yarn
94
+ yarn add @workglow/storage
95
+ ```
96
+
97
+ ## Core Concepts
98
+
99
+ ### Type Safety
100
+
101
+ All storage implementations are fully typed using TypeScript and JSON Schema for runtime validation:
102
+
103
+ ```typescript
104
+ import { JsonSchema, FromSchema } from "@workglow/util";
105
+
106
+ // Define your data structure
107
+ const ProductSchema = {
108
+ type: "object",
109
+ properties: {
110
+ id: { type: "string" },
111
+ name: { type: "string" },
112
+ price: { type: "number" },
113
+ category: { type: "string" },
114
+ inStock: { type: "boolean" },
115
+ },
116
+ required: ["id", "name", "price", "category", "inStock"],
117
+ additionalProperties: false,
118
+ } as const satisfies JsonSchema;
119
+
120
+ // TypeScript automatically infers:
121
+ // Entity = FromSchema<typeof ProductSchema>
122
+ // PrimaryKey = { id: string }
123
+ ```
124
+
125
+ ### Environment Compatibility
126
+
127
+ | Storage Type | Node.js | Bun | Browser | Persistence |
128
+ | ------------ | ------- | --- | ------- | ----------- |
129
+ | InMemory | ✅ | ✅ | ✅ | ❌ |
130
+ | IndexedDB | ❌ | ❌ | ✅ | ✅ |
131
+ | SQLite | ✅ | ✅ | ❌ | ✅ |
132
+ | PostgreSQL | ✅ | ✅ | ❌ | ✅ |
133
+ | Supabase | ✅ | ✅ | ✅ | ✅ |
134
+ | FileSystem | ✅ | ✅ | ❌ | ✅ |
135
+
136
+ ### Import Patterns
137
+
138
+ The package uses conditional exports, so importing from `@workglow/storage` automatically selects the right build for your runtime (browser, Node.js, or Bun).
139
+
140
+ ```typescript
141
+ // Import from the top-level package; it resolves to the correct target per environment
142
+ import { InMemoryKvStorage, SqliteTabularStorage } from "@workglow/storage";
143
+ ```
144
+
145
+ ## Storage Types
146
+
147
+ ### Key-Value Storage
148
+
149
+ Simple key-value storage for unstructured or semi-structured data.
150
+
151
+ #### Basic Usage
152
+
153
+ ```typescript
154
+ import { InMemoryKvStorage, FsFolderJsonKvRepository } from "@workglow/storage";
155
+
156
+ // In-memory (for testing/caching)
157
+ const cache = new InMemoryKvStorage<string, any>();
158
+ await cache.put("config", { theme: "dark", language: "en" });
159
+
160
+ // File-based JSON (persistent)
161
+ const settings = new FsFolderJsonKvRepository("./data/settings");
162
+ await settings.put("user:preferences", { notifications: true });
163
+ ```
164
+
165
+ #### Environment-Specific Examples
166
+
167
+ ```typescript
168
+ // Browser (using IndexedDB)
169
+ import { IndexedDbKvRepository } from "@workglow/storage";
170
+ const browserStore = new IndexedDbKvRepository("my-app-storage");
171
+
172
+ // Node.js/Bun (using SQLite)
173
+ import { SqliteKvRepository } from "@workglow/storage";
174
+ // Pass a file path or a Database instance (see @workglow/sqlite)
175
+ const sqliteStore = new SqliteKvRepository("./data.db", "config_table");
176
+
177
+ // PostgreSQL (Node.js/Bun)
178
+ import { PostgresKvRepository } from "@workglow/storage";
179
+ import { Pool } from "pg";
180
+ const pool = new Pool({ connectionString: "postgresql://..." });
181
+ const pgStore = new PostgresKvRepository(pool, "settings");
182
+
183
+ // Supabase (Node.js/Bun)
184
+ import { SupabaseKvRepository } from "@workglow/storage";
185
+ import { createClient } from "@supabase/supabase-js";
186
+ const supabase = createClient("https://your-project.supabase.co", "your-anon-key");
187
+ const supabaseStore = new SupabaseKvRepository(supabase, "settings");
188
+ ```
189
+
190
+ #### Bulk Operations
191
+
192
+ ```typescript
193
+ const store = new InMemoryKvStorage<string, { name: string; score: number }>();
194
+
195
+ // Bulk insert
196
+ await store.putBulk([
197
+ { key: "player1", value: { name: "Alice", score: 100 } },
198
+ { key: "player2", value: { name: "Bob", score: 85 } },
199
+ ]);
200
+
201
+ // Get all data
202
+ const allPlayers = await store.getAll();
203
+ // Result: [{ key: "player1", value: { name: "Alice", score: 100 } }, ...]
204
+
205
+ // Get size
206
+ const count = await store.size(); // 2
207
+ ```
208
+
209
+ #### Event Handling
210
+
211
+ ```typescript
212
+ const store = new InMemoryKvStorage<string, any>();
213
+
214
+ // Listen to storage events
215
+ store.on("put", (key, value) => {
216
+ console.log(`Stored: ${key} = ${JSON.stringify(value)}`);
217
+ });
218
+
219
+ store.on("get", (key, value) => {
220
+ console.log(`Retrieved: ${key} = ${value ? "found" : "not found"}`);
221
+ });
222
+
223
+ await store.put("test", { data: "example" }); // Triggers 'put' event
224
+ await store.get("test"); // Triggers 'get' event
225
+ ```
226
+
227
+ ### Tabular Storage
228
+
229
+ Structured storage with schemas, primary keys, and indexing for complex data relationships.
230
+
231
+ #### Schema Definition
232
+
233
+ ```typescript
234
+ import { JsonSchema } from "@workglow/util";
235
+ import { InMemoryTabularStorage } from "@workglow/storage";
236
+
237
+ // Define your entity schema
238
+ const UserSchema = {
239
+ type: "object",
240
+ properties: {
241
+ id: { type: "string" },
242
+ email: { type: "string" },
243
+ name: { type: "string" },
244
+ age: { type: "number" },
245
+ department: { type: "string" },
246
+ createdAt: { type: "string" },
247
+ },
248
+ required: ["id", "email", "name", "age", "department", "createdAt"],
249
+ additionalProperties: false,
250
+ } as const satisfies JsonSchema;
251
+
252
+ // Create repository with primary key and indexes
253
+ const userRepo = new InMemoryTabularStorage<typeof UserSchema, ["id"]>(
254
+ UserSchema,
255
+ ["id"], // Primary key (can be compound: ["dept", "id"])
256
+ ["email", "department", ["department", "age"]] // Indexes for fast lookups
257
+ );
258
+ ```
259
+
260
+ #### CRUD Operations
261
+
262
+ ```typescript
263
+ // Create
264
+ await userRepo.put({
265
+ id: "user_123",
266
+ email: "alice@company.com",
267
+ name: "Alice Johnson",
268
+ age: 28,
269
+ department: "Engineering",
270
+ createdAt: new Date().toISOString(),
271
+ });
272
+
273
+ // Read by primary key
274
+ const user = await userRepo.get({ id: "user_123" });
275
+
276
+ // Update (put with same primary key)
277
+ await userRepo.put({
278
+ ...user!,
279
+ age: 29, // Birthday!
280
+ });
281
+
282
+ // Delete
283
+ await userRepo.delete({ id: "user_123" });
284
+ ```
285
+
286
+ #### Bulk Operations
287
+
288
+ ```typescript
289
+ // Bulk insert
290
+ await userRepo.putBulk([
291
+ {
292
+ id: "1",
293
+ email: "alice@co.com",
294
+ name: "Alice",
295
+ age: 28,
296
+ department: "Engineering",
297
+ createdAt: "2024-01-01",
298
+ },
299
+ {
300
+ id: "2",
301
+ email: "bob@co.com",
302
+ name: "Bob",
303
+ age: 32,
304
+ department: "Sales",
305
+ createdAt: "2024-01-02",
306
+ },
307
+ {
308
+ id: "3",
309
+ email: "carol@co.com",
310
+ name: "Carol",
311
+ age: 26,
312
+ department: "Engineering",
313
+ createdAt: "2024-01-03",
314
+ },
315
+ ]);
316
+
317
+ // Get all records
318
+ const allUsers = await userRepo.getAll();
319
+
320
+ // Get repository size
321
+ const userCount = await userRepo.size();
322
+ ```
323
+
324
+ #### Searching and Filtering
325
+
326
+ ```typescript
327
+ // Search by partial match (uses indexes when available)
328
+ const engineeringUsers = await userRepo.search({ department: "Engineering" });
329
+ const adultUsers = await userRepo.search({ age: 25 }); // Exact match
330
+
331
+ // Delete by search criteria (supports multiple columns)
332
+ await userRepo.deleteSearch({ department: "Sales" }); // Equality
333
+ await userRepo.deleteSearch({ age: { value: 65, operator: ">=" } }); // Delete users 65 and older
334
+
335
+ // Multiple criteria (AND logic)
336
+ await userRepo.deleteSearch({
337
+ department: "Sales",
338
+ age: { value: 30, operator: "<" },
339
+ }); // Delete young Sales employees
340
+ ```
341
+
342
+ #### Environment-Specific Tabular Storage
343
+
344
+ ```typescript
345
+ // SQLite (Node.js/Bun)
346
+ import { SqliteTabularStorage } from "@workglow/storage";
347
+
348
+ const sqliteUsers = new SqliteTabularStorage<typeof UserSchema, ["id"]>(
349
+ "./users.db",
350
+ "users",
351
+ UserSchema,
352
+ ["id"],
353
+ ["email"]
354
+ );
355
+
356
+ // PostgreSQL (Node.js/Bun)
357
+ import { PostgresTabularStorage } from "@workglow/storage";
358
+ import { Pool } from "pg";
359
+
360
+ const pool = new Pool({ connectionString: "postgresql://..." });
361
+ const pgUsers = new PostgresTabularStorage<typeof UserSchema, ["id"]>(
362
+ pool,
363
+ "users",
364
+ UserSchema,
365
+ ["id"],
366
+ ["email"]
367
+ );
368
+
369
+ // Supabase (Node.js/Bun)
370
+ import { SupabaseTabularStorage } from "@workglow/storage";
371
+ import { createClient } from "@supabase/supabase-js";
372
+
373
+ const supabase = createClient("https://your-project.supabase.co", "your-anon-key");
374
+ const supabaseUsers = new SupabaseTabularStorage<typeof UserSchema, ["id"]>(
375
+ supabase,
376
+ "users",
377
+ UserSchema,
378
+ ["id"],
379
+ ["email"]
380
+ );
381
+
382
+ // IndexedDB (Browser)
383
+ import { IndexedDbTabularStorage } from "@workglow/storage";
384
+ const browserUsers = new IndexedDbTabularStorage<typeof UserSchema, ["id"]>(
385
+ "users",
386
+ UserSchema,
387
+ ["id"],
388
+ ["email"]
389
+ );
390
+
391
+ // File-based (Node.js/Bun)
392
+ import { FsFolderTabularStorage } from "@workglow/storage";
393
+ const fileUsers = new FsFolderTabularStorage<typeof UserSchema, ["id"]>(
394
+ "./data/users",
395
+ UserSchema,
396
+ ["id"],
397
+ ["email"]
398
+ );
399
+ ```
400
+
401
+ ### Queue Storage
402
+
403
+ Persistent job queue storage for background processing and task management.
404
+
405
+ > **Note**: Queue storage is primarily used internally by the job queue system. Direct usage is for advanced scenarios.
406
+
407
+ #### Basic Job Queue Operations
408
+
409
+ ```typescript
410
+ import { InMemoryQueueStorage, JobStatus } from "@workglow/storage";
411
+
412
+ // Define job input/output types
413
+ type ProcessingInput = { text: string; options: any };
414
+ type ProcessingOutput = { result: string; metadata: any };
415
+
416
+ const jobQueue = new InMemoryQueueStorage<ProcessingInput, ProcessingOutput>();
417
+
418
+ // Add job to queue
419
+ const jobId = await jobQueue.add({
420
+ input: { text: "Hello world", options: { uppercase: true } },
421
+ run_after: null, // Run immediately
422
+ max_retries: 3,
423
+ });
424
+
425
+ // Get next job for processing
426
+ const job = await jobQueue.next();
427
+ if (job) {
428
+ // Process the job...
429
+ const result = { result: "HELLO WORLD", metadata: { processed: true } };
430
+
431
+ // Mark as complete
432
+ await jobQueue.complete({
433
+ ...job,
434
+ output: result,
435
+ status: JobStatus.COMPLETED,
436
+ });
437
+ }
438
+ ```
439
+
440
+ #### Job Management
441
+
442
+ ```typescript
443
+ // Check queue status
444
+ const pendingCount = await jobQueue.size(JobStatus.PENDING);
445
+ const processingCount = await jobQueue.size(JobStatus.PROCESSING);
446
+
447
+ // Peek at jobs without removing them
448
+ const nextJobs = await jobQueue.peek(JobStatus.PENDING, 5);
449
+
450
+ // Progress tracking
451
+ await jobQueue.saveProgress(jobId, 50, "Processing...", { step: 1 });
452
+
453
+ // Handle job failures
454
+ await jobQueue.abort(jobId);
455
+
456
+ // Cleanup old completed jobs
457
+ await jobQueue.deleteJobsByStatusAndAge(JobStatus.COMPLETED, 24 * 60 * 60 * 1000); // 24 hours
458
+ ```
459
+
460
+ ## Environment-Specific Usage
461
+
462
+ ### Browser Environment
463
+
464
+ ```typescript
465
+ import {
466
+ IndexedDbKvRepository,
467
+ IndexedDbTabularStorage,
468
+ IndexedDbQueueStorage,
469
+ SupabaseKvRepository,
470
+ SupabaseTabularStorage,
471
+ SupabaseQueueStorage,
472
+ } from "@workglow/storage";
473
+ import { createClient } from "@supabase/supabase-js";
474
+
475
+ // Local browser storage with IndexedDB
476
+ const settings = new IndexedDbKvRepository("app-settings");
477
+ const userData = new IndexedDbTabularStorage("users", UserSchema, ["id"]);
478
+ const jobQueue = new IndexedDbQueueStorage<any, any>("background-jobs");
479
+
480
+ // Or use Supabase for cloud storage from the browser
481
+ const supabase = createClient("https://your-project.supabase.co", "your-anon-key");
482
+ const cloudSettings = new SupabaseKvRepository(supabase, "app-settings");
483
+ const cloudUserData = new SupabaseTabularStorage(supabase, "users", UserSchema, ["id"]);
484
+ const cloudJobQueue = new SupabaseQueueStorage(supabase, "background-jobs");
485
+ ```
486
+
487
+ ### Node.js Environment
488
+
489
+ ```typescript
490
+ import {
491
+ SqliteKvRepository,
492
+ PostgresTabularStorage,
493
+ FsFolderJsonKvRepository,
494
+ } from "@workglow/storage";
495
+
496
+ // Mix and match storage backends
497
+ const cache = new FsFolderJsonKvRepository("./cache");
498
+ const users = new PostgresTabularStorage(pool, "users", UserSchema, ["id"]);
499
+ ```
500
+
501
+ ### Bun Environment
502
+
503
+ ```typescript
504
+ // Bun has access to all implementations
505
+ import {
506
+ SqliteTabularStorage,
507
+ FsFolderJsonKvRepository,
508
+ PostgresQueueStorage,
509
+ SupabaseTabularStorage,
510
+ } from "@workglow/storage";
511
+
512
+ import { Database } from "bun:sqlite";
513
+ import { createClient } from "@supabase/supabase-js";
514
+
515
+ const db = new Database("./app.db");
516
+ const data = new SqliteTabularStorage(db, "items", ItemSchema, ["id"]);
517
+
518
+ // Or use Supabase for cloud storage
519
+ const supabase = createClient("https://your-project.supabase.co", "your-anon-key");
520
+ const cloudData = new SupabaseTabularStorage(supabase, "items", ItemSchema, ["id"]);
521
+ ```
522
+
523
+ ## Advanced Features
524
+
525
+ ### Repository Registry
526
+
527
+ Repositories can be registered globally by ID, allowing tasks to reference them by name rather than passing direct instances. This is useful for configuring repositories once at application startup and referencing them throughout your task graphs.
528
+
529
+ #### Registering Repositories
530
+
531
+ ```typescript
532
+ import {
533
+ registerTabularStorage,
534
+ getTabularStorage,
535
+ InMemoryTabularStorage,
536
+ } from "@workglow/storage";
537
+
538
+ // Define your schema
539
+ const userSchema = {
540
+ type: "object",
541
+ properties: {
542
+ id: { type: "string" },
543
+ name: { type: "string" },
544
+ email: { type: "string" },
545
+ },
546
+ required: ["id", "name", "email"],
547
+ additionalProperties: false,
548
+ } as const;
549
+
550
+ // Create and register a repository
551
+ const userRepo = new InMemoryTabularStorage(userSchema, ["id"] as const);
552
+ registerTabularStorage("users", userRepo);
553
+
554
+ // Later, retrieve the repository by ID
555
+ const repo = getTabularStorage("users");
556
+ ```
557
+
558
+ #### Using Repositories in Tasks
559
+
560
+ When using repositories with tasks, you can pass either the repository ID or a direct instance. The TaskRunner automatically resolves string IDs using the registry.
561
+
562
+ ```typescript
563
+ import { TypeTabularStorage } from "@workglow/storage";
564
+
565
+ // In your task's input schema, use TypeTabularStorage
566
+ static inputSchema() {
567
+ return {
568
+ type: "object",
569
+ properties: {
570
+ dataSource: TypeTabularStorage({
571
+ title: "User Repository",
572
+ description: "Repository containing user records",
573
+ }),
574
+ },
575
+ required: ["dataSource"],
576
+ };
577
+ }
578
+
579
+ // Both approaches work:
580
+ await task.run({ dataSource: "users" }); // Resolved from registry
581
+ await task.run({ dataSource: userRepoInstance }); // Direct instance
582
+ ```
583
+
584
+ #### Schema Helper Functions
585
+
586
+ The package provides schema helper functions for defining repository inputs with proper format annotations:
587
+
588
+ ```typescript
589
+ import {
590
+ TypeTabularStorage,
591
+ TypeVectorRepository,
592
+ TypeDocumentRepository,
593
+ } from "@workglow/storage";
594
+
595
+ // Tabular repository (format: "storage:tabular")
596
+ const tabularSchema = TypeTabularStorage({
597
+ title: "Data Source",
598
+ description: "Tabular data storage",
599
+ });
600
+
601
+ // Vector repository (format: "dataset:document-chunk")
602
+ const vectorSchema = TypeVectorRepository({
603
+ title: "Embeddings Store",
604
+ description: "Vector embeddings dataset",
605
+ });
606
+
607
+ // Document repository (format: "dataset:document")
608
+ const docSchema = TypeDocumentRepository({
609
+ title: "Document Store",
610
+ description: "Document storage dataset",
611
+ });
612
+ ```
613
+
614
+ ### Event-Driven Architecture
615
+
616
+ All storage implementations support event emission for monitoring and reactive programming:
617
+
618
+ ```typescript
619
+ const store = new InMemoryTabularStorage(UserSchema, ["id"]);
620
+
621
+ // Monitor all operations
622
+ store.on("put", (entity) => console.log("User created/updated:", entity));
623
+ store.on("delete", (key) => console.log("User deleted:", key));
624
+ store.on("get", (key, entity) => console.log("User accessed:", entity ? "found" : "not found"));
625
+
626
+ // Wait for specific events
627
+ const [entity] = await store.waitOn("put"); // Waits for next put operation
628
+ ```
629
+
630
+ ### Compound Primary Keys
631
+
632
+ ```typescript
633
+ import { JsonSchema } from "@workglow/util";
634
+
635
+ const OrderLineSchema = {
636
+ type: "object",
637
+ properties: {
638
+ orderId: { type: "string" },
639
+ lineNumber: { type: "number" },
640
+ productId: { type: "string" },
641
+ quantity: { type: "number" },
642
+ price: { type: "number" },
643
+ },
644
+ required: ["orderId", "lineNumber", "productId", "quantity", "price"],
645
+ additionalProperties: false,
646
+ } as const satisfies JsonSchema;
647
+
648
+ const orderLines = new InMemoryTabularStorage<typeof OrderLineSchema, ["orderId", "lineNumber"]>(
649
+ OrderLineSchema,
650
+ ["orderId", "lineNumber"], // Compound primary key
651
+ ["productId"] // Additional index
652
+ );
653
+
654
+ // Use compound keys
655
+ await orderLines.put({
656
+ orderId: "ORD-123",
657
+ lineNumber: 1,
658
+ productId: "PROD-A",
659
+ quantity: 2,
660
+ price: 19.99,
661
+ });
662
+ const line = await orderLines.get({ orderId: "ORD-123", lineNumber: 1 });
663
+ ```
664
+
665
+ ### Custom File Layout (KV on filesystem)
666
+
667
+ ```typescript
668
+ import { FsFolderKvRepository } from "@workglow/storage";
669
+ import { JsonSchema } from "@workglow/util";
670
+
671
+ // Control how keys map to file paths and value encoding via schemas
672
+ const keySchema = { type: "string" } as const satisfies JsonSchema;
673
+ const valueSchema = { type: "string" } as const satisfies JsonSchema;
674
+
675
+ const files = new FsFolderKvRepository<string, string>(
676
+ "./data/files",
677
+ (key) => `${key}.txt`,
678
+ keySchema,
679
+ valueSchema
680
+ );
681
+
682
+ await files.put("note-1", "Hello world");
683
+ ```
684
+
685
+ ## API Reference
686
+
687
+ ### IKvStorage<Key, Value>
688
+
689
+ Core interface for key-value storage:
690
+
691
+ ```typescript
692
+ interface IKvStorage<Key, Value> {
693
+ // Core operations
694
+ put(key: Key, value: Value): Promise<void>;
695
+ putBulk(items: Array<{ key: Key; value: Value }>): Promise<void>;
696
+ get(key: Key): Promise<Value | undefined>;
697
+ delete(key: Key): Promise<void>;
698
+ getAll(): Promise<Array<{ key: Key; value: Value }> | undefined>;
699
+ deleteAll(): Promise<void>;
700
+ size(): Promise<number>;
701
+
702
+ // Event handling
703
+ on(event: "put" | "get" | "getAll" | "delete" | "deleteall", callback: Function): void;
704
+ off(event: string, callback: Function): void;
705
+ once(event: string, callback: Function): void;
706
+ waitOn(event: string): Promise<any[]>;
707
+ emit(event: string, ...args: any[]): void;
708
+ }
709
+ ```
710
+
711
+ ### ITabularStorage<Schema, PrimaryKeyNames>
712
+
713
+ Core interface for tabular storage:
714
+
715
+ ```typescript
716
+ interface ITabularStorage<Schema, PrimaryKeyNames, Entity, PrimaryKey, Value> {
717
+ // Core operations
718
+ put(entity: Entity): Promise<void>;
719
+ putBulk(entities: Entity[]): Promise<void>;
720
+ get(key: PrimaryKey): Promise<Entity | undefined>;
721
+ delete(key: PrimaryKey | Entity): Promise<void>;
722
+ getAll(): Promise<Entity[] | undefined>;
723
+ deleteAll(): Promise<void>;
724
+ size(): Promise<number>;
725
+
726
+ // Search operations
727
+ search(criteria: Partial<Entity>): Promise<Entity[] | undefined>;
728
+ deleteSearch(criteria: DeleteSearchCriteria<Entity>): Promise<void>;
729
+
730
+ // Event handling
731
+ on(event: "put" | "get" | "search" | "delete" | "clearall", callback: Function): void;
732
+ off(event: string, callback: Function): void;
733
+ once(event: string, callback: Function): void;
734
+ waitOn(event: string): Promise<any[]>;
735
+ emit(event: string, ...args: any[]): void;
736
+ }
737
+ ```
738
+
739
+ #### DeleteSearchCriteria<Entity>
740
+
741
+ The `deleteSearch` method accepts a criteria object that supports multiple columns with optional comparison operators:
742
+
743
+ ```typescript
744
+ // Type definitions
745
+ type SearchOperator = "=" | "<" | "<=" | ">" | ">=";
746
+
747
+ interface SearchCondition<T> {
748
+ readonly value: T;
749
+ readonly operator: SearchOperator;
750
+ }
751
+
752
+ type DeleteSearchCriteria<Entity> = {
753
+ readonly [K in keyof Entity]?: Entity[K] | SearchCondition<Entity[K]>;
754
+ };
755
+
756
+ // Usage examples
757
+ // Equality match (direct value)
758
+ await repo.deleteSearch({ category: "electronics" });
759
+
760
+ // With comparison operator
761
+ await repo.deleteSearch({ createdAt: { value: date, operator: "<" } });
762
+
763
+ // Multiple criteria (AND logic)
764
+ await repo.deleteSearch({
765
+ category: "electronics",
766
+ value: { value: 100, operator: ">=" },
767
+ });
768
+ ```
769
+
770
+ ### IQueueStorage<Input, Output>
771
+
772
+ Core interface for job queue storage:
773
+
774
+ ```typescript
775
+ interface IQueueStorage<Input, Output> {
776
+ add(job: JobStorageFormat<Input, Output>): Promise<unknown>;
777
+ get(id: unknown): Promise<JobStorageFormat<Input, Output> | undefined>;
778
+ next(): Promise<JobStorageFormat<Input, Output> | undefined>;
779
+ complete(job: JobStorageFormat<Input, Output>): Promise<void>;
780
+ peek(status?: JobStatus, num?: number): Promise<JobStorageFormat<Input, Output>[]>;
781
+ size(status?: JobStatus): Promise<number>;
782
+ abort(id: unknown): Promise<void>;
783
+ saveProgress(id: unknown, progress: number, message: string, details: any): Promise<void>;
784
+ deleteAll(): Promise<void>;
785
+ getByRunId(runId: string): Promise<Array<JobStorageFormat<Input, Output>>>;
786
+ outputForInput(input: Input): Promise<Output | null>;
787
+ delete(id: unknown): Promise<void>;
788
+ deleteJobsByStatusAndAge(status: JobStatus, olderThanMs: number): Promise<void>;
789
+ }
790
+ ```
791
+
792
+ ## Examples
793
+
794
+ ### User Management System
795
+
796
+ ```typescript
797
+ import { JsonSchema, FromSchema } from "@workglow/util";
798
+ import { InMemoryTabularStorage, InMemoryKvStorage } from "@workglow/storage";
799
+
800
+ // User profile with tabular storage
801
+ const UserSchema = {
802
+ type: "object",
803
+ properties: {
804
+ id: { type: "string" },
805
+ username: { type: "string" },
806
+ email: { type: "string" },
807
+ firstName: { type: "string" },
808
+ lastName: { type: "string" },
809
+ role: {
810
+ type: "string",
811
+ enum: ["admin", "user", "guest"],
812
+ },
813
+ createdAt: { type: "string" },
814
+ lastLoginAt: { type: "string" },
815
+ },
816
+ required: ["id", "username", "email", "firstName", "lastName", "role", "createdAt"],
817
+ additionalProperties: false,
818
+ } as const satisfies JsonSchema;
819
+
820
+ const userRepo = new InMemoryTabularStorage<typeof UserSchema, ["id"]>(
821
+ UserSchema,
822
+ ["id"],
823
+ ["email", "username"]
824
+ );
825
+
826
+ // User sessions with KV storage
827
+ const sessionStore = new InMemoryKvStorage<string, { userId: string; expiresAt: string }>();
828
+
829
+ // User management class
830
+ class UserManager {
831
+ constructor(
832
+ private userRepo: typeof userRepo,
833
+ private sessionStore: typeof sessionStore
834
+ ) {}
835
+
836
+ async createUser(userData: Omit<FromSchema<typeof UserSchema>, "id" | "createdAt">) {
837
+ const user = {
838
+ ...userData,
839
+ id: crypto.randomUUID(),
840
+ createdAt: new Date().toISOString(),
841
+ };
842
+ await this.userRepo.put(user);
843
+ return user;
844
+ }
845
+
846
+ async loginUser(email: string): Promise<string> {
847
+ const users = await this.userRepo.search({ email });
848
+ if (!users?.length) throw new Error("User not found");
849
+
850
+ const sessionId = crypto.randomUUID();
851
+ await this.sessionStore.put(sessionId, {
852
+ userId: users[0].id,
853
+ expiresAt: new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString(),
854
+ });
855
+
856
+ // Update last login
857
+ await this.userRepo.put({
858
+ ...users[0],
859
+ lastLoginAt: new Date().toISOString(),
860
+ });
861
+
862
+ return sessionId;
863
+ }
864
+
865
+ async getSessionUser(sessionId: string) {
866
+ const session = await this.sessionStore.get(sessionId);
867
+ if (!session || new Date(session.expiresAt) < new Date()) {
868
+ return null;
869
+ }
870
+ return this.userRepo.get({ id: session.userId });
871
+ }
872
+ }
873
+ ```
874
+
875
+ ### Configuration Management
876
+
877
+ ```typescript
878
+ // Application settings with typed configuration
879
+ type AppConfig = {
880
+ database: {
881
+ host: string;
882
+ port: number;
883
+ name: string;
884
+ };
885
+ features: {
886
+ enableNewUI: boolean;
887
+ maxUploadSize: number;
888
+ };
889
+ integrations: {
890
+ stripe: { apiKey: string; webhook: string };
891
+ sendgrid: { apiKey: string };
892
+ };
893
+ };
894
+
895
+ const configStore = new FsFolderJsonKvRepository<string, AppConfig>("./config");
896
+
897
+ class ConfigManager {
898
+ private cache = new Map<string, AppConfig>();
899
+
900
+ constructor(private store: typeof configStore) {
901
+ // Listen for config changes
902
+ store.on("put", (key, value) => {
903
+ this.cache.set(key, value);
904
+ console.log(`Configuration updated: ${key}`);
905
+ });
906
+ }
907
+
908
+ async getConfig(environment: string): Promise<AppConfig> {
909
+ if (this.cache.has(environment)) {
910
+ return this.cache.get(environment)!;
911
+ }
912
+
913
+ const config = await this.store.get(environment);
914
+ if (!config) throw new Error(`No configuration for environment: ${environment}`);
915
+
916
+ this.cache.set(environment, config);
917
+ return config;
918
+ }
919
+
920
+ async updateConfig(environment: string, updates: Partial<AppConfig>) {
921
+ const current = await this.getConfig(environment);
922
+ const updated = { ...current, ...updates };
923
+ await this.store.put(environment, updated);
924
+ }
925
+ }
926
+ ```
927
+
928
+ ### Supabase Integration Example
929
+
930
+ ```typescript
931
+ import { createClient } from "@supabase/supabase-js";
932
+ import { JsonSchema } from "@workglow/util";
933
+ import {
934
+ SupabaseTabularStorage,
935
+ SupabaseKvRepository,
936
+ SupabaseQueueStorage,
937
+ } from "@workglow/storage";
938
+
939
+ // Initialize Supabase client
940
+ const supabase = createClient(process.env.SUPABASE_URL!, process.env.SUPABASE_ANON_KEY!);
941
+
942
+ // Define schemas
943
+ const ProductSchema = {
944
+ type: "object",
945
+ properties: {
946
+ id: { type: "string" },
947
+ name: { type: "string" },
948
+ price: { type: "number" },
949
+ category: { type: "string" },
950
+ stock: { type: "number", minimum: 0 },
951
+ createdAt: { type: "string", format: "date-time" },
952
+ },
953
+ required: ["id", "name", "price", "category", "stock", "createdAt"],
954
+ additionalProperties: false,
955
+ } as const satisfies JsonSchema;
956
+
957
+ const OrderSchema = {
958
+ type: "object",
959
+ properties: {
960
+ id: { type: "string" },
961
+ customerId: { type: "string" },
962
+ productId: { type: "string" },
963
+ quantity: { type: "number", minimum: 1 },
964
+ status: {
965
+ type: "string",
966
+ enum: ["pending", "processing", "completed", "cancelled"],
967
+ },
968
+ createdAt: { type: "string", format: "date-time" },
969
+ },
970
+ required: ["id", "customerId", "productId", "quantity", "status", "createdAt"],
971
+ additionalProperties: false,
972
+ } as const satisfies JsonSchema;
973
+
974
+ // Create repositories
975
+ const products = new SupabaseTabularStorage<typeof ProductSchema, ["id"]>(
976
+ supabase,
977
+ "products",
978
+ ProductSchema,
979
+ ["id"],
980
+ ["category", "name"] // Indexed columns for fast searching
981
+ );
982
+
983
+ const orders = new SupabaseTabularStorage<typeof OrderSchema, ["id"]>(
984
+ supabase,
985
+ "orders",
986
+ OrderSchema,
987
+ ["id"],
988
+ ["customerId", "status", ["customerId", "status"]] // Compound index
989
+ );
990
+
991
+ // Use KV for caching
992
+ const cache = new SupabaseKvRepository(supabase, "cache");
993
+
994
+ // Use queue for background processing
995
+ type EmailJob = { to: string; subject: string; body: string };
996
+ const emailQueue = new SupabaseQueueStorage<EmailJob, void>(supabase, "emails");
997
+
998
+ // Example usage
999
+ async function createOrder(customerId: string, productId: string, quantity: number) {
1000
+ // Check product availability
1001
+ const product = await products.get({ id: productId });
1002
+ if (!product || product.stock < quantity) {
1003
+ throw new Error("Insufficient stock");
1004
+ }
1005
+
1006
+ // Create order
1007
+ const order = {
1008
+ id: crypto.randomUUID(),
1009
+ customerId,
1010
+ productId,
1011
+ quantity,
1012
+ status: "pending" as const,
1013
+ createdAt: new Date().toISOString(),
1014
+ };
1015
+ await orders.put(order);
1016
+
1017
+ // Update stock
1018
+ await products.put({
1019
+ ...product,
1020
+ stock: product.stock - quantity,
1021
+ });
1022
+
1023
+ // Queue email notification
1024
+ await emailQueue.add({
1025
+ input: {
1026
+ to: customerId,
1027
+ subject: "Order Confirmation",
1028
+ body: `Your order ${order.id} has been confirmed!`,
1029
+ },
1030
+ run_after: null,
1031
+ max_retries: 3,
1032
+ });
1033
+
1034
+ return order;
1035
+ }
1036
+
1037
+ // Get customer's orders
1038
+ async function getCustomerOrders(customerId: string) {
1039
+ return await orders.search({ customerId });
1040
+ }
1041
+
1042
+ // Get orders by status
1043
+ async function getOrdersByStatus(status: string) {
1044
+ return await orders.search({ status });
1045
+ }
1046
+ ```
1047
+
1048
+ **Important Note**
1049
+ The implementations assume you have an exec_sql RPC function in your Supabase database for table creation, or that you've created the tables through Supabase migrations. For production use, it's recommended to:
1050
+
1051
+ - Create tables using Supabase migrations rather than runtime table creation
1052
+ - Set up proper Row Level Security (RLS) policies in Supabase
1053
+ - Use service role keys for server-side operations that need elevated permissions
1054
+
1055
+ ## Testing
1056
+
1057
+ The package includes comprehensive test suites for all storage implementations:
1058
+
1059
+ ```bash
1060
+ # Run all tests
1061
+ bun test
1062
+
1063
+ # Run specific test suites
1064
+ bun test --grep "KvRepository"
1065
+ bun test --grep "TabularStorage"
1066
+ bun test --grep "QueueStorage"
1067
+
1068
+ # Test specific environments
1069
+ bun test --grep "InMemory" # Cross-platform tests
1070
+ bun test --grep "IndexedDb" # Browser tests
1071
+ bun test --grep "Sqlite" # Native tests
1072
+ ```
1073
+
1074
+ ### Writing Tests for Your Storage Usage
1075
+
1076
+ ```typescript
1077
+ import { describe, test, expect, beforeEach } from "vitest";
1078
+ import { InMemoryTabularStorage } from "@workglow/storage";
1079
+
1080
+ describe("UserRepository", () => {
1081
+ let userRepo: InMemoryTabularStorage<typeof UserSchema, ["id"]>;
1082
+
1083
+ beforeEach(() => {
1084
+ userRepo = new InMemoryTabularStorage<typeof UserSchema, ["id"]>(UserSchema, ["id"], ["email"]);
1085
+ });
1086
+
1087
+ test("should create and retrieve user", async () => {
1088
+ const user = {
1089
+ id: "test-123",
1090
+ email: "test@example.com",
1091
+ name: "Test User",
1092
+ age: 25,
1093
+ department: "Engineering",
1094
+ createdAt: new Date().toISOString(),
1095
+ };
1096
+
1097
+ await userRepo.put(user);
1098
+ const retrieved = await userRepo.get({ id: "test-123" });
1099
+
1100
+ expect(retrieved).toEqual(user);
1101
+ });
1102
+
1103
+ test("should find users by department", async () => {
1104
+ const users = [
1105
+ {
1106
+ id: "1",
1107
+ email: "alice@co.com",
1108
+ name: "Alice",
1109
+ age: 28,
1110
+ department: "Engineering",
1111
+ createdAt: "2024-01-01",
1112
+ },
1113
+ {
1114
+ id: "2",
1115
+ email: "bob@co.com",
1116
+ name: "Bob",
1117
+ age: 32,
1118
+ department: "Sales",
1119
+ createdAt: "2024-01-02",
1120
+ },
1121
+ ];
1122
+
1123
+ await userRepo.putBulk(users);
1124
+ const engineers = await userRepo.search({ department: "Engineering" });
1125
+
1126
+ expect(engineers).toHaveLength(1);
1127
+ expect(engineers![0].name).toBe("Alice");
1128
+ });
1129
+ });
1130
+ ```
1131
+
1132
+ ## License
1133
+
1134
+ Apache 2.0 - See [LICENSE](./LICENSE) for details