@workglow/dataset 0.0.109 → 0.0.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +504 -968
- package/dist/browser.js +376 -490
- package/dist/browser.js.map +13 -13
- package/dist/bun.js +376 -490
- package/dist/bun.js.map +13 -13
- package/dist/chunk/ChunkSchema.d.ts +206 -0
- package/dist/chunk/ChunkSchema.d.ts.map +1 -0
- package/dist/chunk/ChunkVectorStorageSchema.d.ts +64 -0
- package/dist/chunk/ChunkVectorStorageSchema.d.ts.map +1 -0
- package/dist/common.d.ts +5 -5
- package/dist/common.d.ts.map +1 -1
- package/dist/document/Document.d.ts +7 -6
- package/dist/document/Document.d.ts.map +1 -1
- package/dist/document/DocumentSchema.d.ts +0 -465
- package/dist/document/DocumentSchema.d.ts.map +1 -1
- package/dist/knowledge-base/KnowledgeBase.d.ts +122 -0
- package/dist/knowledge-base/KnowledgeBase.d.ts.map +1 -0
- package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts +24 -0
- package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts.map +1 -0
- package/dist/knowledge-base/createKnowledgeBase.d.ts +28 -0
- package/dist/knowledge-base/createKnowledgeBase.d.ts.map +1 -0
- package/dist/node.js +376 -490
- package/dist/node.js.map +13 -13
- package/dist/util/DatasetSchema.d.ts +9 -49
- package/dist/util/DatasetSchema.d.ts.map +1 -1
- package/package.json +7 -5
- package/dist/document/DocumentDataset.d.ts +0 -79
- package/dist/document/DocumentDataset.d.ts.map +0 -1
- package/dist/document/DocumentDatasetRegistry.d.ts +0 -29
- package/dist/document/DocumentDatasetRegistry.d.ts.map +0 -1
- package/dist/document-chunk/DocumentChunkDataset.d.ts +0 -79
- package/dist/document-chunk/DocumentChunkDataset.d.ts.map +0 -1
- package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts +0 -29
- package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts.map +0 -1
- package/dist/document-chunk/DocumentChunkSchema.d.ts +0 -56
- package/dist/document-chunk/DocumentChunkSchema.d.ts.map +0 -1
- package/src/document-chunk/README.md +0 -362
package/README.md
CHANGED
|
@@ -1,1132 +1,668 @@
|
|
|
1
|
-
# @workglow/
|
|
1
|
+
# @workglow/dataset
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Document management, hierarchical chunking, and knowledge base infrastructure for RAG pipelines.
|
|
4
4
|
|
|
5
|
-
- [
|
|
5
|
+
- [Overview](#overview)
|
|
6
6
|
- [Installation](#installation)
|
|
7
|
-
- [
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
- [
|
|
11
|
-
- [
|
|
12
|
-
- [
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
- [
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
- [
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
- [
|
|
27
|
-
- [Browser Environment](#browser-environment)
|
|
28
|
-
- [Node.js Environment](#nodejs-environment)
|
|
29
|
-
- [Bun Environment](#bun-environment)
|
|
30
|
-
- [Advanced Features](#advanced-features)
|
|
31
|
-
- [Repository Registry](#repository-registry)
|
|
32
|
-
- [Event-Driven Architecture](#event-driven-architecture)
|
|
33
|
-
- [Compound Primary Keys](#compound-primary-keys)
|
|
34
|
-
- [Custom File Layout (KV on filesystem)](#custom-file-layout-kv-on-filesystem)
|
|
7
|
+
- [Quick Start](#quick-start)
|
|
8
|
+
- [Architecture](#architecture)
|
|
9
|
+
- [Documents](#documents)
|
|
10
|
+
- [Document Tree Structure](#document-tree-structure)
|
|
11
|
+
- [Parsing](#parsing)
|
|
12
|
+
- [Node Enrichment](#node-enrichment)
|
|
13
|
+
- [Chunks](#chunks)
|
|
14
|
+
- [ChunkRecord](#chunkrecord)
|
|
15
|
+
- [Chunk Vector Storage](#chunk-vector-storage)
|
|
16
|
+
- [KnowledgeBase](#knowledgebase)
|
|
17
|
+
- [Creating a KnowledgeBase](#creating-a-knowledgebase)
|
|
18
|
+
- [Document CRUD](#document-crud)
|
|
19
|
+
- [Chunk Operations](#chunk-operations)
|
|
20
|
+
- [Search](#search)
|
|
21
|
+
- [Tree Traversal](#tree-traversal)
|
|
22
|
+
- [Lifecycle Management](#lifecycle-management)
|
|
23
|
+
- [Registry](#registry)
|
|
24
|
+
- [Data Flow](#data-flow)
|
|
25
|
+
- [Ingestion Pipeline](#ingestion-pipeline)
|
|
26
|
+
- [Retrieval Pipeline](#retrieval-pipeline)
|
|
35
27
|
- [API Reference](#api-reference)
|
|
36
|
-
- [
|
|
37
|
-
- [
|
|
38
|
-
- [
|
|
39
|
-
- [
|
|
40
|
-
- [
|
|
41
|
-
- [Configuration Management](#configuration-management)
|
|
42
|
-
- [Testing](#testing)
|
|
43
|
-
- [Writing Tests for Your Storage Usage](#writing-tests-for-your-storage-usage)
|
|
28
|
+
- [Document](#document)
|
|
29
|
+
- [KnowledgeBase](#knowledgebase-1)
|
|
30
|
+
- [createKnowledgeBase](#createknowledgebase)
|
|
31
|
+
- [StructuralParser](#structuralparser)
|
|
32
|
+
- [Type Helpers](#type-helpers)
|
|
44
33
|
- [License](#license)
|
|
45
34
|
|
|
46
|
-
##
|
|
47
|
-
|
|
48
|
-
```typescript
|
|
49
|
-
// Key-Value Storage (simple data)
|
|
50
|
-
import { InMemoryKvStorage } from "@workglow/storage";
|
|
35
|
+
## Overview
|
|
51
36
|
|
|
52
|
-
|
|
53
|
-
await kvStore.put("user:123", { name: "Alice", age: 30 });
|
|
54
|
-
const kvUser = await kvStore.get("user:123"); // { name: "Alice", age: 30 }
|
|
55
|
-
```
|
|
37
|
+
This package provides the data layer for RAG (Retrieval-Augmented Generation) workflows. It ties together three concerns:
|
|
56
38
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
import { JsonSchema } from "@workglow/util";
|
|
61
|
-
|
|
62
|
-
const userSchema = {
|
|
63
|
-
type: "object",
|
|
64
|
-
properties: {
|
|
65
|
-
id: { type: "string" },
|
|
66
|
-
name: { type: "string" },
|
|
67
|
-
email: { type: "string" },
|
|
68
|
-
age: { type: "number" },
|
|
69
|
-
},
|
|
70
|
-
required: ["id", "name", "email", "age"],
|
|
71
|
-
additionalProperties: false,
|
|
72
|
-
} as const satisfies JsonSchema;
|
|
73
|
-
|
|
74
|
-
const userRepo = new InMemoryTabularStorage<typeof userSchema, ["id"]>(
|
|
75
|
-
userSchema,
|
|
76
|
-
["id"], // primary key
|
|
77
|
-
["email"] // additional indexes
|
|
78
|
-
);
|
|
39
|
+
1. **Documents** — hierarchical tree representation of parsed text (sections, paragraphs, sentences)
|
|
40
|
+
2. **Chunks** — flat records derived from the document tree, each tracking its position via node paths
|
|
41
|
+
3. **KnowledgeBase** — unified interface that owns both document storage (tabular) and chunk storage (vector), with cascading lifecycle management
|
|
79
42
|
|
|
80
|
-
|
|
81
|
-
|
|
43
|
+
```
|
|
44
|
+
Markdown / Plain Text
|
|
45
|
+
│
|
|
46
|
+
▼
|
|
47
|
+
┌──────────────┐
|
|
48
|
+
│ Document │ Hierarchical tree (sections, paragraphs)
|
|
49
|
+
│ (tabular) │ Stored as serialized JSON
|
|
50
|
+
└──────┬───────┘
|
|
51
|
+
│ chunking
|
|
52
|
+
▼
|
|
53
|
+
┌──────────────┐
|
|
54
|
+
│ Chunks │ Flat records with tree linkage (nodePath, depth)
|
|
55
|
+
│ (vector) │ Stored with embedding vectors
|
|
56
|
+
└──────┬───────┘
|
|
57
|
+
│ search
|
|
58
|
+
▼
|
|
59
|
+
┌──────────────┐
|
|
60
|
+
│ Results │ Ranked by similarity score
|
|
61
|
+
└──────────────┘
|
|
82
62
|
```
|
|
83
63
|
|
|
84
64
|
## Installation
|
|
85
65
|
|
|
86
66
|
```bash
|
|
87
|
-
|
|
88
|
-
bun install @workglow/storage
|
|
89
|
-
|
|
90
|
-
# Using npm
|
|
91
|
-
npm install @workglow/storage
|
|
92
|
-
|
|
93
|
-
# Using yarn
|
|
94
|
-
yarn add @workglow/storage
|
|
67
|
+
bun install @workglow/dataset
|
|
95
68
|
```
|
|
96
69
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
### Type Safety
|
|
70
|
+
Peer dependencies: `@workglow/storage`, `@workglow/util`.
|
|
100
71
|
|
|
101
|
-
|
|
72
|
+
## Quick Start
|
|
102
73
|
|
|
103
74
|
```typescript
|
|
104
|
-
import {
|
|
75
|
+
import {
|
|
76
|
+
createKnowledgeBase,
|
|
77
|
+
Document,
|
|
78
|
+
StructuralParser,
|
|
79
|
+
} from "@workglow/dataset";
|
|
80
|
+
|
|
81
|
+
// 1. Create a knowledge base
|
|
82
|
+
const kb = await createKnowledgeBase({
|
|
83
|
+
name: "my-kb",
|
|
84
|
+
vectorDimensions: 384,
|
|
85
|
+
});
|
|
105
86
|
|
|
106
|
-
//
|
|
107
|
-
const
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
87
|
+
// 2. Parse a document
|
|
88
|
+
const root = await StructuralParser.parseMarkdown("doc1", markdown, "My Doc");
|
|
89
|
+
const doc = new Document(root, { title: "My Doc" });
|
|
90
|
+
|
|
91
|
+
// 3. Store the document
|
|
92
|
+
const inserted = await kb.upsertDocument(doc);
|
|
93
|
+
|
|
94
|
+
// 4. Store chunk embeddings
|
|
95
|
+
await kb.upsertChunk({
|
|
96
|
+
doc_id: inserted.doc_id!,
|
|
97
|
+
vector: new Float32Array([0.1, 0.2, ...]),
|
|
98
|
+
metadata: {
|
|
99
|
+
chunkId: "chunk_1",
|
|
100
|
+
doc_id: inserted.doc_id!,
|
|
101
|
+
text: "The chunk text...",
|
|
102
|
+
nodePath: [root.nodeId, sectionNodeId],
|
|
103
|
+
depth: 2,
|
|
115
104
|
},
|
|
116
|
-
|
|
117
|
-
additionalProperties: false,
|
|
118
|
-
} as const satisfies JsonSchema;
|
|
119
|
-
|
|
120
|
-
// TypeScript automatically infers:
|
|
121
|
-
// Entity = FromSchema<typeof ProductSchema>
|
|
122
|
-
// PrimaryKey = { id: string }
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### Environment Compatibility
|
|
126
|
-
|
|
127
|
-
| Storage Type | Node.js | Bun | Browser | Persistence |
|
|
128
|
-
| ------------ | ------- | --- | ------- | ----------- |
|
|
129
|
-
| InMemory | ✅ | ✅ | ✅ | ❌ |
|
|
130
|
-
| IndexedDB | ❌ | ❌ | ✅ | ✅ |
|
|
131
|
-
| SQLite | ✅ | ✅ | ❌ | ✅ |
|
|
132
|
-
| PostgreSQL | ✅ | ✅ | ❌ | ✅ |
|
|
133
|
-
| Supabase | ✅ | ✅ | ✅ | ✅ |
|
|
134
|
-
| FileSystem | ✅ | ✅ | ❌ | ✅ |
|
|
135
|
-
|
|
136
|
-
### Import Patterns
|
|
105
|
+
});
|
|
137
106
|
|
|
138
|
-
|
|
107
|
+
// 5. Search (via task pipeline)
|
|
108
|
+
import { Workflow } from "@workglow/task-graph";
|
|
139
109
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
110
|
+
const result = await new Workflow()
|
|
111
|
+
.chunkRetrieval({
|
|
112
|
+
knowledgeBase: "my-kb",
|
|
113
|
+
query: "your search query",
|
|
114
|
+
model: "your-embedding-model",
|
|
115
|
+
topK: 5,
|
|
116
|
+
})
|
|
117
|
+
.run();
|
|
143
118
|
```
|
|
144
119
|
|
|
145
|
-
##
|
|
146
|
-
|
|
147
|
-
### Key-Value Storage
|
|
148
|
-
|
|
149
|
-
Simple key-value storage for unstructured or semi-structured data.
|
|
150
|
-
|
|
151
|
-
#### Basic Usage
|
|
152
|
-
|
|
153
|
-
```typescript
|
|
154
|
-
import { InMemoryKvStorage, FsFolderJsonKvRepository } from "@workglow/storage";
|
|
120
|
+
## Architecture
|
|
155
121
|
|
|
156
|
-
|
|
157
|
-
const cache = new InMemoryKvStorage<string, any>();
|
|
158
|
-
await cache.put("config", { theme: "dark", language: "en" });
|
|
122
|
+
The package is organized around three layers:
|
|
159
123
|
|
|
160
|
-
// File-based JSON (persistent)
|
|
161
|
-
const settings = new FsFolderJsonKvRepository("./data/settings");
|
|
162
|
-
await settings.put("user:preferences", { notifications: true });
|
|
163
124
|
```
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
const sqliteStore = new SqliteKvRepository("./data.db", "config_table");
|
|
176
|
-
|
|
177
|
-
// PostgreSQL (Node.js/Bun)
|
|
178
|
-
import { PostgresKvRepository } from "@workglow/storage";
|
|
179
|
-
import { Pool } from "pg";
|
|
180
|
-
const pool = new Pool({ connectionString: "postgresql://..." });
|
|
181
|
-
const pgStore = new PostgresKvRepository(pool, "settings");
|
|
182
|
-
|
|
183
|
-
// Supabase (Node.js/Bun)
|
|
184
|
-
import { SupabaseKvRepository } from "@workglow/storage";
|
|
185
|
-
import { createClient } from "@supabase/supabase-js";
|
|
186
|
-
const supabase = createClient("https://your-project.supabase.co", "your-anon-key");
|
|
187
|
-
const supabaseStore = new SupabaseKvRepository(supabase, "settings");
|
|
125
|
+
┌───────────────────────────────────────────────────┐
|
|
126
|
+
│ KnowledgeBase │
|
|
127
|
+
│ Unified API for documents + chunks + search │
|
|
128
|
+
├────────────────────┬──────────────────────────────┤
|
|
129
|
+
│ DocumentTabular │ ChunkVector │
|
|
130
|
+
│ Storage │ Storage │
|
|
131
|
+
│ (ITabularStorage) │ (IVectorStorage) │
|
|
132
|
+
│ │ │
|
|
133
|
+
│ Stores serialized │ Stores embeddings + │
|
|
134
|
+
│ document trees │ ChunkRecord metadata │
|
|
135
|
+
└────────────────────┴──────────────────────────────┘
|
|
188
136
|
```
|
|
189
137
|
|
|
190
|
-
|
|
138
|
+
**Storage backends** are pluggable via the `@workglow/storage` interfaces. The `createKnowledgeBase` factory defaults to in-memory storage; production deployments can use SQLite, PostgreSQL, or any other `ITabularStorage` / `IVectorStorage` implementation.
|
|
191
139
|
|
|
192
|
-
|
|
193
|
-
const store = new InMemoryKvStorage<string, { name: string; score: number }>();
|
|
140
|
+
## Documents
|
|
194
141
|
|
|
195
|
-
|
|
196
|
-
await store.putBulk([
|
|
197
|
-
{ key: "player1", value: { name: "Alice", score: 100 } },
|
|
198
|
-
{ key: "player2", value: { name: "Bob", score: 85 } },
|
|
199
|
-
]);
|
|
142
|
+
### Document Tree Structure
|
|
200
143
|
|
|
201
|
-
|
|
202
|
-
const allPlayers = await store.getAll();
|
|
203
|
-
// Result: [{ key: "player1", value: { name: "Alice", score: 100 } }, ...]
|
|
144
|
+
Documents are represented as a hierarchical tree using a **discriminated union** of node types:
|
|
204
145
|
|
|
205
|
-
// Get size
|
|
206
|
-
const count = await store.size(); // 2
|
|
207
146
|
```
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
});
|
|
218
|
-
|
|
219
|
-
store.on("get", (key, value) => {
|
|
220
|
-
console.log(`Retrieved: ${key} = ${value ? "found" : "not found"}`);
|
|
221
|
-
});
|
|
222
|
-
|
|
223
|
-
await store.put("test", { data: "example" }); // Triggers 'put' event
|
|
224
|
-
await store.get("test"); // Triggers 'get' event
|
|
147
|
+
DocumentRootNode (kind: "document")
|
|
148
|
+
├── SectionNode (kind: "section", level: 1)
|
|
149
|
+
│ ├── ParagraphNode (kind: "paragraph")
|
|
150
|
+
│ ├── ParagraphNode (kind: "paragraph")
|
|
151
|
+
│ └── SectionNode (kind: "section", level: 2)
|
|
152
|
+
│ └── ParagraphNode (kind: "paragraph")
|
|
153
|
+
├── SectionNode (kind: "section", level: 1)
|
|
154
|
+
│ └── ParagraphNode (kind: "paragraph")
|
|
155
|
+
└── ParagraphNode (kind: "paragraph")
|
|
225
156
|
```
|
|
226
157
|
|
|
227
|
-
|
|
158
|
+
**Node types:**
|
|
228
159
|
|
|
229
|
-
|
|
160
|
+
| Type | Kind | Has Children | Description |
|
|
161
|
+
| ------------------ | ------------- | ------------ | ------------------------------------------------- |
|
|
162
|
+
| `DocumentRootNode` | `"document"` | yes | Root of the tree, has `title` |
|
|
163
|
+
| `SectionNode` | `"section"` | yes | From headers (level 1-6), has `title` and `level` |
|
|
164
|
+
| `ParagraphNode` | `"paragraph"` | no | Prose content |
|
|
165
|
+
| `SentenceNode` | `"sentence"` | no | Fine-grained segmentation |
|
|
166
|
+
| `TopicNode` | `"topic"` | yes | From topic segmentation algorithms |
|
|
230
167
|
|
|
231
|
-
|
|
168
|
+
All nodes share a base set of fields:
|
|
232
169
|
|
|
233
170
|
```typescript
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
//
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
id: { type: "string" },
|
|
242
|
-
email: { type: "string" },
|
|
243
|
-
name: { type: "string" },
|
|
244
|
-
age: { type: "number" },
|
|
245
|
-
department: { type: "string" },
|
|
246
|
-
createdAt: { type: "string" },
|
|
247
|
-
},
|
|
248
|
-
required: ["id", "email", "name", "age", "department", "createdAt"],
|
|
249
|
-
additionalProperties: false,
|
|
250
|
-
} as const satisfies JsonSchema;
|
|
251
|
-
|
|
252
|
-
// Create repository with primary key and indexes
|
|
253
|
-
const userRepo = new InMemoryTabularStorage<typeof UserSchema, ["id"]>(
|
|
254
|
-
UserSchema,
|
|
255
|
-
["id"], // Primary key (can be compound: ["dept", "id"])
|
|
256
|
-
["email", "department", ["department", "age"]] // Indexes for fast lookups
|
|
257
|
-
);
|
|
171
|
+
interface DocumentNodeBase {
|
|
172
|
+
readonly nodeId: string; // Unique identifier (UUID)
|
|
173
|
+
readonly kind: NodeKind; // Discriminator
|
|
174
|
+
readonly range: NodeRange; // { startOffset, endOffset } in source text
|
|
175
|
+
readonly text: string; // Text content
|
|
176
|
+
readonly enrichment?: NodeEnrichment; // Optional summary, entities, keywords
|
|
177
|
+
}
|
|
258
178
|
```
|
|
259
179
|
|
|
260
|
-
|
|
180
|
+
Use the `NodeKind` constants for comparisons:
|
|
261
181
|
|
|
262
182
|
```typescript
|
|
263
|
-
|
|
264
|
-
await userRepo.put({
|
|
265
|
-
id: "user_123",
|
|
266
|
-
email: "alice@company.com",
|
|
267
|
-
name: "Alice Johnson",
|
|
268
|
-
age: 28,
|
|
269
|
-
department: "Engineering",
|
|
270
|
-
createdAt: new Date().toISOString(),
|
|
271
|
-
});
|
|
272
|
-
|
|
273
|
-
// Read by primary key
|
|
274
|
-
const user = await userRepo.get({ id: "user_123" });
|
|
275
|
-
|
|
276
|
-
// Update (put with same primary key)
|
|
277
|
-
await userRepo.put({
|
|
278
|
-
...user!,
|
|
279
|
-
age: 29, // Birthday!
|
|
280
|
-
});
|
|
183
|
+
import { NodeKind } from "@workglow/dataset";
|
|
281
184
|
|
|
282
|
-
|
|
283
|
-
|
|
185
|
+
if (node.kind === NodeKind.SECTION) {
|
|
186
|
+
console.log(node.title, node.level, node.children.length);
|
|
187
|
+
}
|
|
284
188
|
```
|
|
285
189
|
|
|
286
|
-
|
|
190
|
+
### Parsing
|
|
191
|
+
|
|
192
|
+
`StructuralParser` converts raw text into a `DocumentRootNode`:
|
|
287
193
|
|
|
288
194
|
```typescript
|
|
289
|
-
|
|
290
|
-
await userRepo.putBulk([
|
|
291
|
-
{
|
|
292
|
-
id: "1",
|
|
293
|
-
email: "alice@co.com",
|
|
294
|
-
name: "Alice",
|
|
295
|
-
age: 28,
|
|
296
|
-
department: "Engineering",
|
|
297
|
-
createdAt: "2024-01-01",
|
|
298
|
-
},
|
|
299
|
-
{
|
|
300
|
-
id: "2",
|
|
301
|
-
email: "bob@co.com",
|
|
302
|
-
name: "Bob",
|
|
303
|
-
age: 32,
|
|
304
|
-
department: "Sales",
|
|
305
|
-
createdAt: "2024-01-02",
|
|
306
|
-
},
|
|
307
|
-
{
|
|
308
|
-
id: "3",
|
|
309
|
-
email: "carol@co.com",
|
|
310
|
-
name: "Carol",
|
|
311
|
-
age: 26,
|
|
312
|
-
department: "Engineering",
|
|
313
|
-
createdAt: "2024-01-03",
|
|
314
|
-
},
|
|
315
|
-
]);
|
|
195
|
+
import { StructuralParser } from "@workglow/dataset";
|
|
316
196
|
|
|
317
|
-
//
|
|
318
|
-
const
|
|
197
|
+
// Markdown — detects headers, creates nested sections
|
|
198
|
+
const root = await StructuralParser.parseMarkdown(docId, markdownText, "Title");
|
|
319
199
|
|
|
320
|
-
//
|
|
321
|
-
const
|
|
322
|
-
```
|
|
200
|
+
// Plain text — splits on blank lines into paragraphs
|
|
201
|
+
const root = await StructuralParser.parsePlainText(docId, plainText, "Title");
|
|
323
202
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
```typescript
|
|
327
|
-
// Search by partial match (uses indexes when available)
|
|
328
|
-
const engineeringUsers = await userRepo.search({ department: "Engineering" });
|
|
329
|
-
const adultUsers = await userRepo.search({ age: 25 }); // Exact match
|
|
330
|
-
|
|
331
|
-
// Delete by search criteria (supports multiple columns)
|
|
332
|
-
await userRepo.deleteSearch({ department: "Sales" }); // Equality
|
|
333
|
-
await userRepo.deleteSearch({ age: { value: 65, operator: ">=" } }); // Delete users 65 and older
|
|
334
|
-
|
|
335
|
-
// Multiple criteria (AND logic)
|
|
336
|
-
await userRepo.deleteSearch({
|
|
337
|
-
department: "Sales",
|
|
338
|
-
age: { value: 30, operator: "<" },
|
|
339
|
-
}); // Delete young Sales employees
|
|
203
|
+
// Auto-detect format
|
|
204
|
+
const root = await StructuralParser.parse(docId, text, "Title");
|
|
340
205
|
```
|
|
341
206
|
|
|
342
|
-
|
|
207
|
+
The parser:
|
|
343
208
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
const sqliteUsers = new SqliteTabularStorage<typeof UserSchema, ["id"]>(
|
|
349
|
-
"./users.db",
|
|
350
|
-
"users",
|
|
351
|
-
UserSchema,
|
|
352
|
-
["id"],
|
|
353
|
-
["email"]
|
|
354
|
-
);
|
|
209
|
+
- Converts markdown headers (`#` through `######`) into nested `SectionNode`s
|
|
210
|
+
- Groups text between headers as `ParagraphNode` children
|
|
211
|
+
- Tracks character offsets (`startOffset`, `endOffset`) for every node
|
|
212
|
+
- Assigns a unique `nodeId` to each node
|
|
355
213
|
|
|
356
|
-
|
|
357
|
-
import { PostgresTabularStorage } from "@workglow/storage";
|
|
358
|
-
import { Pool } from "pg";
|
|
359
|
-
|
|
360
|
-
const pool = new Pool({ connectionString: "postgresql://..." });
|
|
361
|
-
const pgUsers = new PostgresTabularStorage<typeof UserSchema, ["id"]>(
|
|
362
|
-
pool,
|
|
363
|
-
"users",
|
|
364
|
-
UserSchema,
|
|
365
|
-
["id"],
|
|
366
|
-
["email"]
|
|
367
|
-
);
|
|
214
|
+
### Node Enrichment
|
|
368
215
|
|
|
369
|
-
|
|
370
|
-
import { SupabaseTabularStorage } from "@workglow/storage";
|
|
371
|
-
import { createClient } from "@supabase/supabase-js";
|
|
372
|
-
|
|
373
|
-
const supabase = createClient("https://your-project.supabase.co", "your-anon-key");
|
|
374
|
-
const supabaseUsers = new SupabaseTabularStorage<typeof UserSchema, ["id"]>(
|
|
375
|
-
supabase,
|
|
376
|
-
"users",
|
|
377
|
-
UserSchema,
|
|
378
|
-
["id"],
|
|
379
|
-
["email"]
|
|
380
|
-
);
|
|
381
|
-
|
|
382
|
-
// IndexedDB (Browser)
|
|
383
|
-
import { IndexedDbTabularStorage } from "@workglow/storage";
|
|
384
|
-
const browserUsers = new IndexedDbTabularStorage<typeof UserSchema, ["id"]>(
|
|
385
|
-
"users",
|
|
386
|
-
UserSchema,
|
|
387
|
-
["id"],
|
|
388
|
-
["email"]
|
|
389
|
-
);
|
|
216
|
+
Nodes can carry optional enrichment data populated by AI tasks:
|
|
390
217
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
["email"]
|
|
398
|
-
);
|
|
218
|
+
```typescript
|
|
219
|
+
interface NodeEnrichment {
|
|
220
|
+
summary?: string; // AI-generated summary
|
|
221
|
+
entities?: Entity[]; // Named entities (text, type, confidence score)
|
|
222
|
+
keywords?: string[]; // Extracted keywords
|
|
223
|
+
}
|
|
399
224
|
```
|
|
400
225
|
|
|
401
|
-
|
|
226
|
+
Enrichment is set on nodes during the ingestion pipeline (e.g., by `DocumentEnricherTask`) and propagated to chunks during hierarchy join.
|
|
402
227
|
|
|
403
|
-
|
|
228
|
+
## Chunks
|
|
404
229
|
|
|
405
|
-
|
|
230
|
+
### ChunkRecord
|
|
406
231
|
|
|
407
|
-
|
|
232
|
+
A `ChunkRecord` is a flat, self-contained unit of text with full context about its position in the document tree:
|
|
408
233
|
|
|
409
234
|
```typescript
|
|
410
|
-
|
|
235
|
+
interface ChunkRecord {
|
|
236
|
+
// Identity
|
|
237
|
+
chunkId: string; // Unique chunk identifier
|
|
238
|
+
doc_id: string; // Parent document ID
|
|
411
239
|
|
|
412
|
-
//
|
|
413
|
-
|
|
414
|
-
type ProcessingOutput = { result: string; metadata: any };
|
|
240
|
+
// Content
|
|
241
|
+
text: string; // The text to embed and search
|
|
415
242
|
|
|
416
|
-
|
|
243
|
+
// Tree linkage
|
|
244
|
+
nodePath: string[]; // Node IDs from root to leaf
|
|
245
|
+
depth: number; // Depth in the document tree
|
|
417
246
|
|
|
418
|
-
//
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
// Get next job for processing
|
|
426
|
-
const job = await jobQueue.next();
|
|
427
|
-
if (job) {
|
|
428
|
-
// Process the job...
|
|
429
|
-
const result = { result: "HELLO WORLD", metadata: { processed: true } };
|
|
430
|
-
|
|
431
|
-
// Mark as complete
|
|
432
|
-
await jobQueue.complete({
|
|
433
|
-
...job,
|
|
434
|
-
output: result,
|
|
435
|
-
status: JobStatus.COMPLETED,
|
|
436
|
-
});
|
|
247
|
+
// Optional fields
|
|
248
|
+
leafNodeId?: string; // Leaf node this chunk belongs to
|
|
249
|
+
summary?: string; // Chunk-level summary
|
|
250
|
+
entities?: Entity[]; // Named entities
|
|
251
|
+
parentSummaries?: string[]; // Summaries from ancestor nodes
|
|
252
|
+
sectionTitles?: string[]; // Titles of ancestor sections
|
|
253
|
+
doc_title?: string; // Document title
|
|
437
254
|
}
|
|
438
255
|
```
|
|
439
256
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
```typescript
|
|
443
|
-
// Check queue status
|
|
444
|
-
const pendingCount = await jobQueue.size(JobStatus.PENDING);
|
|
445
|
-
const processingCount = await jobQueue.size(JobStatus.PROCESSING);
|
|
446
|
-
|
|
447
|
-
// Peek at jobs without removing them
|
|
448
|
-
const nextJobs = await jobQueue.peek(JobStatus.PENDING, 5);
|
|
257
|
+
The `nodePath` and `depth` fields enable **hierarchy-aware retrieval**: given a search result, you can walk back up the document tree to get section titles, parent summaries, or sibling chunks for additional context.
|
|
449
258
|
|
|
450
|
-
|
|
451
|
-
await jobQueue.saveProgress(jobId, 50, "Processing...", { step: 1 });
|
|
259
|
+
### Chunk Vector Storage
|
|
452
260
|
|
|
453
|
-
|
|
454
|
-
await jobQueue.abort(jobId);
|
|
455
|
-
|
|
456
|
-
// Cleanup old completed jobs
|
|
457
|
-
await jobQueue.deleteJobsByStatusAndAge(JobStatus.COMPLETED, 24 * 60 * 60 * 1000); // 24 hours
|
|
458
|
-
```
|
|
459
|
-
|
|
460
|
-
## Environment-Specific Usage
|
|
461
|
-
|
|
462
|
-
### Browser Environment
|
|
261
|
+
Chunks are stored in vector storage as `ChunkVectorEntity`:
|
|
463
262
|
|
|
464
263
|
```typescript
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
SupabaseQueueStorage,
|
|
472
|
-
} from "@workglow/storage";
|
|
473
|
-
import { createClient } from "@supabase/supabase-js";
|
|
474
|
-
|
|
475
|
-
// Local browser storage with IndexedDB
|
|
476
|
-
const settings = new IndexedDbKvRepository("app-settings");
|
|
477
|
-
const userData = new IndexedDbTabularStorage("users", UserSchema, ["id"]);
|
|
478
|
-
const jobQueue = new IndexedDbQueueStorage<any, any>("background-jobs");
|
|
479
|
-
|
|
480
|
-
// Or use Supabase for cloud storage from the browser
|
|
481
|
-
const supabase = createClient("https://your-project.supabase.co", "your-anon-key");
|
|
482
|
-
const cloudSettings = new SupabaseKvRepository(supabase, "app-settings");
|
|
483
|
-
const cloudUserData = new SupabaseTabularStorage(supabase, "users", UserSchema, ["id"]);
|
|
484
|
-
const cloudJobQueue = new SupabaseQueueStorage(supabase, "background-jobs");
|
|
264
|
+
interface ChunkVectorEntity {
|
|
265
|
+
chunk_id: string; // Primary key (auto-generated UUID)
|
|
266
|
+
doc_id: string; // For filtering by document
|
|
267
|
+
vector: TypedArray; // Embedding (Float32Array, etc.)
|
|
268
|
+
metadata: ChunkRecord; // Full chunk record
|
|
269
|
+
}
|
|
485
270
|
```
|
|
486
271
|
|
|
487
|
-
|
|
272
|
+
The `metadata` field holds the complete `ChunkRecord`, so search results carry all the context needed for hierarchy-aware retrieval without additional lookups.
|
|
488
273
|
|
|
489
|
-
|
|
490
|
-
import {
|
|
491
|
-
SqliteKvRepository,
|
|
492
|
-
PostgresTabularStorage,
|
|
493
|
-
FsFolderJsonKvRepository,
|
|
494
|
-
} from "@workglow/storage";
|
|
495
|
-
|
|
496
|
-
// Mix and match storage backends
|
|
497
|
-
const cache = new FsFolderJsonKvRepository("./cache");
|
|
498
|
-
const users = new PostgresTabularStorage(pool, "users", UserSchema, ["id"]);
|
|
499
|
-
```
|
|
274
|
+
## KnowledgeBase
|
|
500
275
|
|
|
501
|
-
|
|
276
|
+
`KnowledgeBase` is the central class that ties document storage and vector storage together.
|
|
502
277
|
|
|
503
|
-
|
|
504
|
-
// Bun has access to all implementations
|
|
505
|
-
import {
|
|
506
|
-
SqliteTabularStorage,
|
|
507
|
-
FsFolderJsonKvRepository,
|
|
508
|
-
PostgresQueueStorage,
|
|
509
|
-
SupabaseTabularStorage,
|
|
510
|
-
} from "@workglow/storage";
|
|
278
|
+
### Creating a KnowledgeBase
|
|
511
279
|
|
|
512
|
-
|
|
513
|
-
import { createClient } from "@supabase/supabase-js";
|
|
280
|
+
**Factory function (recommended):**
|
|
514
281
|
|
|
515
|
-
|
|
516
|
-
|
|
282
|
+
```typescript
|
|
283
|
+
import { createKnowledgeBase } from "@workglow/dataset";
|
|
517
284
|
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
285
|
+
const kb = await createKnowledgeBase({
|
|
286
|
+
name: "my-kb", // Identifier
|
|
287
|
+
vectorDimensions: 384, // Must match your embedding model
|
|
288
|
+
backend: "in-memory", // Currently only "in-memory"
|
|
289
|
+
vectorType: Float32Array, // Default: Float32Array
|
|
290
|
+
register: true, // Register globally (default: true)
|
|
291
|
+
});
|
|
521
292
|
```
|
|
522
293
|
|
|
523
|
-
|
|
294
|
+
**Direct construction (custom storage backends):**
|
|
524
295
|
|
|
525
|
-
|
|
296
|
+
```typescript
|
|
297
|
+
import { KnowledgeBase } from "@workglow/dataset";
|
|
526
298
|
|
|
527
|
-
|
|
299
|
+
const kb = new KnowledgeBase(
|
|
300
|
+
"my-kb",
|
|
301
|
+
myDocumentTabularStorage, // ITabularStorage implementation
|
|
302
|
+
myChunkVectorStorage // IVectorStorage implementation
|
|
303
|
+
);
|
|
304
|
+
```
|
|
528
305
|
|
|
529
|
-
|
|
306
|
+
### Document CRUD
|
|
530
307
|
|
|
531
308
|
```typescript
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
} from "@workglow/storage";
|
|
309
|
+
// Upsert — auto-generates doc_id if not set
|
|
310
|
+
const doc = new Document(root, { title: "My Document" });
|
|
311
|
+
const inserted = await kb.upsertDocument(doc);
|
|
312
|
+
console.log(inserted.doc_id); // auto-generated UUID
|
|
537
313
|
|
|
538
|
-
//
|
|
539
|
-
const
|
|
540
|
-
type: "object",
|
|
541
|
-
properties: {
|
|
542
|
-
id: { type: "string" },
|
|
543
|
-
name: { type: "string" },
|
|
544
|
-
email: { type: "string" },
|
|
545
|
-
},
|
|
546
|
-
required: ["id", "name", "email"],
|
|
547
|
-
additionalProperties: false,
|
|
548
|
-
} as const;
|
|
314
|
+
// Get by ID
|
|
315
|
+
const retrieved = await kb.getDocument(inserted.doc_id!);
|
|
549
316
|
|
|
550
|
-
//
|
|
551
|
-
const
|
|
552
|
-
registerTabularStorage("users", userRepo);
|
|
317
|
+
// List all document IDs
|
|
318
|
+
const docIds = await kb.listDocuments();
|
|
553
319
|
|
|
554
|
-
//
|
|
555
|
-
|
|
320
|
+
// Delete — cascades to all chunks in vector storage
|
|
321
|
+
await kb.deleteDocument(inserted.doc_id!);
|
|
556
322
|
```
|
|
557
323
|
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
When using repositories with tasks, you can pass either the repository ID or a direct instance. The TaskRunner automatically resolves string IDs using the registry.
|
|
324
|
+
### Chunk Operations
|
|
561
325
|
|
|
562
326
|
```typescript
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
required: ["dataSource"],
|
|
576
|
-
};
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
// Both approaches work:
|
|
580
|
-
await task.run({ dataSource: "users" }); // Resolved from registry
|
|
581
|
-
await task.run({ dataSource: userRepoInstance }); // Direct instance
|
|
582
|
-
```
|
|
583
|
-
|
|
584
|
-
#### Schema Helper Functions
|
|
327
|
+
// Upsert a single chunk
|
|
328
|
+
const entity = await kb.upsertChunk({
|
|
329
|
+
doc_id: "doc1",
|
|
330
|
+
vector: new Float32Array([0.1, 0.2, 0.3]),
|
|
331
|
+
metadata: {
|
|
332
|
+
chunkId: "chunk_1",
|
|
333
|
+
doc_id: "doc1",
|
|
334
|
+
text: "Some text...",
|
|
335
|
+
nodePath: ["root", "section1"],
|
|
336
|
+
depth: 2,
|
|
337
|
+
},
|
|
338
|
+
});
|
|
585
339
|
|
|
586
|
-
|
|
340
|
+
// Upsert in bulk
|
|
341
|
+
const entities = await kb.upsertChunksBulk(chunkArray);
|
|
587
342
|
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
TypeTabularStorage,
|
|
591
|
-
TypeVectorRepository,
|
|
592
|
-
TypeDocumentRepository,
|
|
593
|
-
} from "@workglow/storage";
|
|
594
|
-
|
|
595
|
-
// Tabular repository (format: "storage:tabular")
|
|
596
|
-
const tabularSchema = TypeTabularStorage({
|
|
597
|
-
title: "Data Source",
|
|
598
|
-
description: "Tabular data storage",
|
|
599
|
-
});
|
|
343
|
+
// Get a specific chunk
|
|
344
|
+
const chunk = await kb.getChunk("chunk_id_here");
|
|
600
345
|
|
|
601
|
-
//
|
|
602
|
-
const
|
|
603
|
-
title: "Embeddings Store",
|
|
604
|
-
description: "Vector embeddings dataset",
|
|
605
|
-
});
|
|
346
|
+
// Get all chunks for a document
|
|
347
|
+
const docChunks = await kb.getChunksForDocument("doc1");
|
|
606
348
|
|
|
607
|
-
//
|
|
608
|
-
|
|
609
|
-
title: "Document Store",
|
|
610
|
-
description: "Document storage dataset",
|
|
611
|
-
});
|
|
349
|
+
// Delete all chunks for a document (without deleting the document)
|
|
350
|
+
await kb.deleteChunksForDocument("doc1");
|
|
612
351
|
```
|
|
613
352
|
|
|
614
|
-
###
|
|
353
|
+
### Search
|
|
615
354
|
|
|
616
|
-
|
|
355
|
+
**Similarity search** — vector-only:
|
|
617
356
|
|
|
618
357
|
```typescript
|
|
619
|
-
const
|
|
620
|
-
|
|
621
|
-
//
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
store.on("get", (key, entity) => console.log("User accessed:", entity ? "found" : "not found"));
|
|
358
|
+
const results = await kb.similaritySearch(queryVector, {
|
|
359
|
+
topK: 10, // Max results (default varies by backend)
|
|
360
|
+
scoreThreshold: 0.7, // Minimum similarity score
|
|
361
|
+
filter: { doc_id: "doc1" }, // Metadata filter
|
|
362
|
+
});
|
|
625
363
|
|
|
626
|
-
//
|
|
627
|
-
const
|
|
364
|
+
// Each result: ChunkVectorEntity & { score: number }
|
|
365
|
+
for (const result of results) {
|
|
366
|
+
console.log(result.chunk_id, result.score, result.metadata.text);
|
|
367
|
+
}
|
|
628
368
|
```
|
|
629
369
|
|
|
630
|
-
|
|
370
|
+
**Hybrid search** — combines vector similarity with full-text search. Requires a storage backend that supports it (e.g., PostgreSQL with pgvector). Returns an empty array if unsupported.
|
|
631
371
|
|
|
632
372
|
```typescript
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
lineNumber: { type: "number" },
|
|
640
|
-
productId: { type: "string" },
|
|
641
|
-
quantity: { type: "number" },
|
|
642
|
-
price: { type: "number" },
|
|
643
|
-
},
|
|
644
|
-
required: ["orderId", "lineNumber", "productId", "quantity", "price"],
|
|
645
|
-
additionalProperties: false,
|
|
646
|
-
} as const satisfies JsonSchema;
|
|
647
|
-
|
|
648
|
-
const orderLines = new InMemoryTabularStorage<typeof OrderLineSchema, ["orderId", "lineNumber"]>(
|
|
649
|
-
OrderLineSchema,
|
|
650
|
-
["orderId", "lineNumber"], // Compound primary key
|
|
651
|
-
["productId"] // Additional index
|
|
652
|
-
);
|
|
653
|
-
|
|
654
|
-
// Use compound keys
|
|
655
|
-
await orderLines.put({
|
|
656
|
-
orderId: "ORD-123",
|
|
657
|
-
lineNumber: 1,
|
|
658
|
-
productId: "PROD-A",
|
|
659
|
-
quantity: 2,
|
|
660
|
-
price: 19.99,
|
|
373
|
+
const results = await kb.hybridSearch(queryVector, {
|
|
374
|
+
textQuery: "machine learning",
|
|
375
|
+
topK: 10,
|
|
376
|
+
vectorWeight: 0.7, // 0-1, balance between vector and text
|
|
377
|
+
scoreThreshold: 0.5,
|
|
378
|
+
filter: { doc_id: "doc1" },
|
|
661
379
|
});
|
|
662
|
-
const line = await orderLines.get({ orderId: "ORD-123", lineNumber: 1 });
|
|
663
380
|
```
|
|
664
381
|
|
|
665
|
-
###
|
|
382
|
+
### Tree Traversal
|
|
383
|
+
|
|
384
|
+
Navigate the document tree stored in the knowledge base:
|
|
666
385
|
|
|
667
386
|
```typescript
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
// Control how keys map to file paths and value encoding via schemas
|
|
672
|
-
const keySchema = { type: "string" } as const satisfies JsonSchema;
|
|
673
|
-
const valueSchema = { type: "string" } as const satisfies JsonSchema;
|
|
674
|
-
|
|
675
|
-
const files = new FsFolderKvRepository<string, string>(
|
|
676
|
-
"./data/files",
|
|
677
|
-
(key) => `${key}.txt`,
|
|
678
|
-
keySchema,
|
|
679
|
-
valueSchema
|
|
680
|
-
);
|
|
387
|
+
// Get a specific node by ID
|
|
388
|
+
const node = await kb.getNode("doc1", nodeId);
|
|
681
389
|
|
|
682
|
-
|
|
683
|
-
|
|
390
|
+
// Get ancestors from root to target (useful for building context)
|
|
391
|
+
const ancestors = await kb.getAncestors("doc1", leafNodeId);
|
|
392
|
+
// Returns: [rootNode, sectionNode, subsectionNode, targetNode]
|
|
684
393
|
|
|
685
|
-
|
|
394
|
+
// Get chunks stored in the document JSON (not vector storage)
|
|
395
|
+
const chunks = await kb.getDocumentChunks("doc1");
|
|
686
396
|
|
|
687
|
-
|
|
397
|
+
// Find chunks whose nodePath contains a given node ID
|
|
398
|
+
const related = await kb.findChunksByNodeId("doc1", sectionNodeId);
|
|
399
|
+
```
|
|
688
400
|
|
|
689
|
-
|
|
401
|
+
### Lifecycle Management
|
|
690
402
|
|
|
691
403
|
```typescript
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
putBulk(items: Array<{ key: Key; value: Value }>): Promise<void>;
|
|
696
|
-
get(key: Key): Promise<Value | undefined>;
|
|
697
|
-
delete(key: Key): Promise<void>;
|
|
698
|
-
getAll(): Promise<Array<{ key: Key; value: Value }> | undefined>;
|
|
699
|
-
deleteAll(): Promise<void>;
|
|
700
|
-
size(): Promise<number>;
|
|
701
|
-
|
|
702
|
-
// Event handling
|
|
703
|
-
on(event: "put" | "get" | "getAll" | "delete" | "deleteall", callback: Function): void;
|
|
704
|
-
off(event: string, callback: Function): void;
|
|
705
|
-
once(event: string, callback: Function): void;
|
|
706
|
-
waitOn(event: string): Promise<any[]>;
|
|
707
|
-
emit(event: string, ...args: any[]): void;
|
|
708
|
-
}
|
|
709
|
-
```
|
|
404
|
+
// Prepare for re-indexing: delete chunks but keep the document
|
|
405
|
+
const doc = await kb.prepareReindex("doc1");
|
|
406
|
+
// doc is returned so you can re-chunk and re-embed
|
|
710
407
|
|
|
711
|
-
|
|
408
|
+
// Initialize storage backends
|
|
409
|
+
await kb.setupDatabase();
|
|
712
410
|
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
```typescript
|
|
716
|
-
interface ITabularStorage<Schema, PrimaryKeyNames, Entity, PrimaryKey, Value> {
|
|
717
|
-
// Core operations
|
|
718
|
-
put(entity: Entity): Promise<void>;
|
|
719
|
-
putBulk(entities: Entity[]): Promise<void>;
|
|
720
|
-
get(key: PrimaryKey): Promise<Entity | undefined>;
|
|
721
|
-
delete(key: PrimaryKey | Entity): Promise<void>;
|
|
722
|
-
getAll(): Promise<Entity[] | undefined>;
|
|
723
|
-
deleteAll(): Promise<void>;
|
|
724
|
-
size(): Promise<number>;
|
|
725
|
-
|
|
726
|
-
// Search operations
|
|
727
|
-
search(criteria: Partial<Entity>): Promise<Entity[] | undefined>;
|
|
728
|
-
deleteSearch(criteria: DeleteSearchCriteria<Entity>): Promise<void>;
|
|
729
|
-
|
|
730
|
-
// Event handling
|
|
731
|
-
on(event: "put" | "get" | "search" | "delete" | "clearall", callback: Function): void;
|
|
732
|
-
off(event: string, callback: Function): void;
|
|
733
|
-
once(event: string, callback: Function): void;
|
|
734
|
-
waitOn(event: string): Promise<any[]>;
|
|
735
|
-
emit(event: string, ...args: any[]): void;
|
|
736
|
-
}
|
|
411
|
+
// Tear down
|
|
412
|
+
kb.destroy();
|
|
737
413
|
```
|
|
738
414
|
|
|
739
|
-
|
|
415
|
+
### Registry
|
|
740
416
|
|
|
741
|
-
|
|
417
|
+
Knowledge bases can be registered globally by name, allowing tasks to reference them by string ID:
|
|
742
418
|
|
|
743
419
|
```typescript
|
|
744
|
-
|
|
745
|
-
type SearchOperator = "=" | "<" | "<=" | ">" | ">=";
|
|
746
|
-
|
|
747
|
-
interface SearchCondition<T> {
|
|
748
|
-
readonly value: T;
|
|
749
|
-
readonly operator: SearchOperator;
|
|
750
|
-
}
|
|
420
|
+
import { registerKnowledgeBase, getKnowledgeBase, TypeKnowledgeBase } from "@workglow/dataset";
|
|
751
421
|
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
};
|
|
422
|
+
// Register
|
|
423
|
+
registerKnowledgeBase("my-kb", kb);
|
|
755
424
|
|
|
756
|
-
//
|
|
757
|
-
|
|
758
|
-
await repo.deleteSearch({ category: "electronics" });
|
|
425
|
+
// Retrieve
|
|
426
|
+
const retrieved = getKnowledgeBase("my-kb");
|
|
759
427
|
|
|
760
|
-
//
|
|
761
|
-
|
|
428
|
+
// In task schemas — accepts either a string ID or a KnowledgeBase instance
|
|
429
|
+
const inputSchema = {
|
|
430
|
+
type: "object",
|
|
431
|
+
properties: {
|
|
432
|
+
knowledgeBase: TypeKnowledgeBase({
|
|
433
|
+
title: "Knowledge Base",
|
|
434
|
+
description: "The KB to search",
|
|
435
|
+
}),
|
|
436
|
+
},
|
|
437
|
+
required: ["knowledgeBase"],
|
|
438
|
+
} as const;
|
|
762
439
|
|
|
763
|
-
//
|
|
764
|
-
await
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
440
|
+
// Both work:
|
|
441
|
+
await task.run({ knowledgeBase: kb }); // Direct instance
|
|
442
|
+
await task.run({ knowledgeBase: "my-kb" }); // Resolved from registry
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
## Data Flow
|
|
446
|
+
|
|
447
|
+
### Ingestion Pipeline
|
|
448
|
+
|
|
449
|
+
All ingestion steps are composable Tasks that auto-connect in a Workflow:
|
|
450
|
+
|
|
451
|
+
```
|
|
452
|
+
Raw Text / Markdown
|
|
453
|
+
│
|
|
454
|
+
▼ StructuralParserTask
|
|
455
|
+
DocumentRootNode (tree with character offsets)
|
|
456
|
+
│
|
|
457
|
+
▼ DocumentEnricherTask (optional AI enrichment)
|
|
458
|
+
DocumentRootNode (+ summaries, entities on nodes)
|
|
459
|
+
│
|
|
460
|
+
▼ HierarchicalChunkerTask
|
|
461
|
+
ChunkRecord[] (flat chunks with nodePath linkage)
|
|
462
|
+
│
|
|
463
|
+
▼ TextEmbeddingTask (AI model)
|
|
464
|
+
ChunkRecord[] + Float32Array[] (text → vectors)
|
|
465
|
+
│
|
|
466
|
+
▼ ChunkToVectorTask
|
|
467
|
+
Vectors + metadata in vector store format
|
|
468
|
+
│
|
|
469
|
+
▼ ChunkVectorUpsertTask → vector + tabular storage
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
Example using the Workflow API:
|
|
473
|
+
|
|
474
|
+
```typescript
|
|
475
|
+
import { Workflow } from "@workglow/task-graph";
|
|
476
|
+
|
|
477
|
+
const result = await new Workflow()
|
|
478
|
+
.fileLoader({ url: `file://${filePath}`, format: "markdown" })
|
|
479
|
+
.structuralParser({ title: "My Document" })
|
|
480
|
+
.documentEnricher({ generateSummaries: true, extractEntities: true })
|
|
481
|
+
.hierarchicalChunker({ maxTokens: 512, overlap: 50 })
|
|
482
|
+
.textEmbedding({ model: "your-embedding-model" })
|
|
483
|
+
.chunkToVector()
|
|
484
|
+
.chunkVectorUpsert({ knowledgeBase: "my-kb" })
|
|
485
|
+
.run();
|
|
486
|
+
|
|
487
|
+
console.log(result.count); // Number of vectors stored
|
|
488
|
+
```
|
|
489
|
+
|
|
490
|
+
### Retrieval Pipeline
|
|
491
|
+
|
|
492
|
+
All retrieval steps are composable Tasks that auto-connect in a Workflow:
|
|
493
|
+
|
|
494
|
+
```
|
|
495
|
+
User Query
|
|
496
|
+
│
|
|
497
|
+
▼ QueryExpanderTask (optional — generates query variations)
|
|
498
|
+
Expanded queries
|
|
499
|
+
│
|
|
500
|
+
▼ ChunkRetrievalTask (embeds query + vector search)
|
|
501
|
+
or ChunkVectorHybridSearchTask (vector + full-text search)
|
|
502
|
+
ChunkSearchResult[] (chunks, chunk_ids, scores, query)
|
|
503
|
+
│
|
|
504
|
+
▼ HierarchyJoinTask (optional — enriches with ancestor context)
|
|
505
|
+
Enriched chunks (+ parentSummaries, sectionTitles, entities)
|
|
506
|
+
│
|
|
507
|
+
▼ RerankerTask (optional — cross-encoder reranking)
|
|
508
|
+
Re-scored chunks
|
|
509
|
+
│
|
|
510
|
+
▼ ContextBuilderTask
|
|
511
|
+
Formatted context string for LLM prompt
|
|
512
|
+
│
|
|
513
|
+
▼ LLM
|
|
514
|
+
Answer
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
Example using the Workflow API:
|
|
518
|
+
|
|
519
|
+
```typescript
|
|
520
|
+
import { Workflow } from "@workglow/task-graph";
|
|
521
|
+
|
|
522
|
+
const result = await new Workflow()
|
|
523
|
+
.chunkRetrieval({
|
|
524
|
+
knowledgeBase: "my-kb",
|
|
525
|
+
query: "What caused the Civil War?",
|
|
526
|
+
model: "your-embedding-model",
|
|
527
|
+
topK: 10,
|
|
528
|
+
})
|
|
529
|
+
.hierarchyJoin({
|
|
530
|
+
knowledgeBase: "my-kb",
|
|
531
|
+
includeParentSummaries: true,
|
|
532
|
+
})
|
|
533
|
+
.reranker({
|
|
534
|
+
method: "cross-encoder",
|
|
535
|
+
model: "your-reranker-model",
|
|
536
|
+
topK: 5,
|
|
537
|
+
})
|
|
538
|
+
.contextBuilder({
|
|
539
|
+
format: "numbered",
|
|
540
|
+
includeMetadata: false,
|
|
541
|
+
})
|
|
542
|
+
.run();
|
|
543
|
+
|
|
544
|
+
console.log(result.context); // Formatted context ready for LLM
|
|
768
545
|
```
|
|
769
546
|
|
|
770
|
-
|
|
547
|
+
## API Reference
|
|
771
548
|
|
|
772
|
-
|
|
549
|
+
### Document
|
|
773
550
|
|
|
774
551
|
```typescript
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
552
|
+
class Document {
|
|
553
|
+
readonly doc_id?: string;
|
|
554
|
+
readonly root: DocumentRootNode;
|
|
555
|
+
readonly metadata: DocumentMetadata;
|
|
556
|
+
|
|
557
|
+
constructor(
|
|
558
|
+
root: DocumentRootNode,
|
|
559
|
+
metadata: DocumentMetadata,
|
|
560
|
+
chunks?: ChunkRecord[],
|
|
561
|
+
doc_id?: string
|
|
562
|
+
);
|
|
563
|
+
|
|
564
|
+
setDocId(id: string): void;
|
|
565
|
+
setChunks(chunks: ChunkRecord[]): void;
|
|
566
|
+
getChunks(): ChunkRecord[];
|
|
567
|
+
findChunksByNodeId(nodeId: string): ChunkRecord[];
|
|
568
|
+
toJSON(): object;
|
|
569
|
+
static fromJSON(json: string, doc_id?: string): Document;
|
|
789
570
|
}
|
|
790
571
|
```
|
|
791
572
|
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
### User Management System
|
|
573
|
+
### KnowledgeBase
|
|
795
574
|
|
|
796
575
|
```typescript
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
// User profile with tabular storage
|
|
801
|
-
const UserSchema = {
|
|
802
|
-
type: "object",
|
|
803
|
-
properties: {
|
|
804
|
-
id: { type: "string" },
|
|
805
|
-
username: { type: "string" },
|
|
806
|
-
email: { type: "string" },
|
|
807
|
-
firstName: { type: "string" },
|
|
808
|
-
lastName: { type: "string" },
|
|
809
|
-
role: {
|
|
810
|
-
type: "string",
|
|
811
|
-
enum: ["admin", "user", "guest"],
|
|
812
|
-
},
|
|
813
|
-
createdAt: { type: "string" },
|
|
814
|
-
lastLoginAt: { type: "string" },
|
|
815
|
-
},
|
|
816
|
-
required: ["id", "username", "email", "firstName", "lastName", "role", "createdAt"],
|
|
817
|
-
additionalProperties: false,
|
|
818
|
-
} as const satisfies JsonSchema;
|
|
819
|
-
|
|
820
|
-
const userRepo = new InMemoryTabularStorage<typeof UserSchema, ["id"]>(
|
|
821
|
-
UserSchema,
|
|
822
|
-
["id"],
|
|
823
|
-
["email", "username"]
|
|
824
|
-
);
|
|
576
|
+
class KnowledgeBase {
|
|
577
|
+
readonly name: string;
|
|
825
578
|
|
|
826
|
-
// User sessions with KV storage
|
|
827
|
-
const sessionStore = new InMemoryKvStorage<string, { userId: string; expiresAt: string }>();
|
|
828
|
-
|
|
829
|
-
// User management class
|
|
830
|
-
class UserManager {
|
|
831
579
|
constructor(
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
580
|
+
name: string,
|
|
581
|
+
documentStorage: DocumentTabularStorage,
|
|
582
|
+
chunkStorage: ChunkVectorStorage
|
|
583
|
+
);
|
|
584
|
+
|
|
585
|
+
// Documents
|
|
586
|
+
upsertDocument(document: Document): Promise<Document>;
|
|
587
|
+
getDocument(doc_id: string): Promise<Document | undefined>;
|
|
588
|
+
deleteDocument(doc_id: string): Promise<void>;
|
|
589
|
+
listDocuments(): Promise<string[]>;
|
|
590
|
+
|
|
591
|
+
// Tree traversal
|
|
592
|
+
getNode(doc_id: string, nodeId: string): Promise<DocumentNode | undefined>;
|
|
593
|
+
getAncestors(doc_id: string, nodeId: string): Promise<DocumentNode[]>;
|
|
594
|
+
|
|
595
|
+
// Chunks
|
|
596
|
+
upsertChunk(chunk: InsertChunkVectorEntity): Promise<ChunkVectorEntity>;
|
|
597
|
+
upsertChunksBulk(chunks: InsertChunkVectorEntity[]): Promise<ChunkVectorEntity[]>;
|
|
598
|
+
getChunk(chunk_id: string): Promise<ChunkVectorEntity | undefined>;
|
|
599
|
+
getChunksForDocument(doc_id: string): Promise<ChunkVectorEntity[]>;
|
|
600
|
+
deleteChunksForDocument(doc_id: string): Promise<void>;
|
|
601
|
+
|
|
602
|
+
// Search
|
|
603
|
+
similaritySearch(
|
|
604
|
+
query: TypedArray,
|
|
605
|
+
options?: VectorSearchOptions<ChunkRecord>
|
|
606
|
+
): Promise<ChunkSearchResult[]>;
|
|
607
|
+
hybridSearch(
|
|
608
|
+
query: TypedArray,
|
|
609
|
+
options: HybridSearchOptions<ChunkRecord>
|
|
610
|
+
): Promise<ChunkSearchResult[]>;
|
|
611
|
+
|
|
612
|
+
// Lifecycle
|
|
613
|
+
prepareReindex(doc_id: string): Promise<Document | undefined>;
|
|
614
|
+
setupDatabase(): Promise<void>;
|
|
615
|
+
destroy(): void;
|
|
616
|
+
|
|
617
|
+
// Accessors
|
|
618
|
+
put(chunk: InsertChunkVectorEntity): Promise<ChunkVectorEntity>;
|
|
619
|
+
putBulk(chunks: InsertChunkVectorEntity[]): Promise<ChunkVectorEntity[]>;
|
|
620
|
+
getAllChunks(): Promise<ChunkVectorEntity[] | undefined>;
|
|
621
|
+
chunkCount(): Promise<number>;
|
|
622
|
+
clearChunks(): Promise<void>;
|
|
623
|
+
getVectorDimensions(): number;
|
|
624
|
+
getDocumentChunks(doc_id: string): Promise<ChunkRecord[]>;
|
|
625
|
+
findChunksByNodeId(doc_id: string, nodeId: string): Promise<ChunkRecord[]>;
|
|
872
626
|
}
|
|
873
627
|
```
|
|
874
628
|
|
|
875
|
-
###
|
|
629
|
+
### createKnowledgeBase
|
|
876
630
|
|
|
877
631
|
```typescript
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
};
|
|
885
|
-
|
|
886
|
-
enableNewUI: boolean;
|
|
887
|
-
maxUploadSize: number;
|
|
888
|
-
};
|
|
889
|
-
integrations: {
|
|
890
|
-
stripe: { apiKey: string; webhook: string };
|
|
891
|
-
sendgrid: { apiKey: string };
|
|
892
|
-
};
|
|
893
|
-
};
|
|
894
|
-
|
|
895
|
-
const configStore = new FsFolderJsonKvRepository<string, AppConfig>("./config");
|
|
896
|
-
|
|
897
|
-
class ConfigManager {
|
|
898
|
-
private cache = new Map<string, AppConfig>();
|
|
899
|
-
|
|
900
|
-
constructor(private store: typeof configStore) {
|
|
901
|
-
// Listen for config changes
|
|
902
|
-
store.on("put", (key, value) => {
|
|
903
|
-
this.cache.set(key, value);
|
|
904
|
-
console.log(`Configuration updated: ${key}`);
|
|
905
|
-
});
|
|
906
|
-
}
|
|
907
|
-
|
|
908
|
-
async getConfig(environment: string): Promise<AppConfig> {
|
|
909
|
-
if (this.cache.has(environment)) {
|
|
910
|
-
return this.cache.get(environment)!;
|
|
911
|
-
}
|
|
912
|
-
|
|
913
|
-
const config = await this.store.get(environment);
|
|
914
|
-
if (!config) throw new Error(`No configuration for environment: ${environment}`);
|
|
915
|
-
|
|
916
|
-
this.cache.set(environment, config);
|
|
917
|
-
return config;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
|
-
async updateConfig(environment: string, updates: Partial<AppConfig>) {
|
|
921
|
-
const current = await this.getConfig(environment);
|
|
922
|
-
const updated = { ...current, ...updates };
|
|
923
|
-
await this.store.put(environment, updated);
|
|
924
|
-
}
|
|
632
|
+
function createKnowledgeBase(options: CreateKnowledgeBaseOptions): Promise<KnowledgeBase>;
|
|
633
|
+
|
|
634
|
+
interface CreateKnowledgeBaseOptions {
|
|
635
|
+
readonly name: string;
|
|
636
|
+
readonly vectorDimensions: number;
|
|
637
|
+
readonly backend?: "in-memory";
|
|
638
|
+
readonly vectorType?: { new (array: number[]): TypedArray };
|
|
639
|
+
readonly register?: boolean; // Default: true
|
|
925
640
|
}
|
|
926
641
|
```
|
|
927
642
|
|
|
928
|
-
###
|
|
643
|
+
### StructuralParser
|
|
929
644
|
|
|
930
645
|
```typescript
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
const supabase = createClient(process.env.SUPABASE_URL!, process.env.SUPABASE_ANON_KEY!);
|
|
941
|
-
|
|
942
|
-
// Define schemas
|
|
943
|
-
const ProductSchema = {
|
|
944
|
-
type: "object",
|
|
945
|
-
properties: {
|
|
946
|
-
id: { type: "string" },
|
|
947
|
-
name: { type: "string" },
|
|
948
|
-
price: { type: "number" },
|
|
949
|
-
category: { type: "string" },
|
|
950
|
-
stock: { type: "number", minimum: 0 },
|
|
951
|
-
createdAt: { type: "string", format: "date-time" },
|
|
952
|
-
},
|
|
953
|
-
required: ["id", "name", "price", "category", "stock", "createdAt"],
|
|
954
|
-
additionalProperties: false,
|
|
955
|
-
} as const satisfies JsonSchema;
|
|
956
|
-
|
|
957
|
-
const OrderSchema = {
|
|
958
|
-
type: "object",
|
|
959
|
-
properties: {
|
|
960
|
-
id: { type: "string" },
|
|
961
|
-
customerId: { type: "string" },
|
|
962
|
-
productId: { type: "string" },
|
|
963
|
-
quantity: { type: "number", minimum: 1 },
|
|
964
|
-
status: {
|
|
965
|
-
type: "string",
|
|
966
|
-
enum: ["pending", "processing", "completed", "cancelled"],
|
|
967
|
-
},
|
|
968
|
-
createdAt: { type: "string", format: "date-time" },
|
|
969
|
-
},
|
|
970
|
-
required: ["id", "customerId", "productId", "quantity", "status", "createdAt"],
|
|
971
|
-
additionalProperties: false,
|
|
972
|
-
} as const satisfies JsonSchema;
|
|
973
|
-
|
|
974
|
-
// Create repositories
|
|
975
|
-
const products = new SupabaseTabularStorage<typeof ProductSchema, ["id"]>(
|
|
976
|
-
supabase,
|
|
977
|
-
"products",
|
|
978
|
-
ProductSchema,
|
|
979
|
-
["id"],
|
|
980
|
-
["category", "name"] // Indexed columns for fast searching
|
|
981
|
-
);
|
|
982
|
-
|
|
983
|
-
const orders = new SupabaseTabularStorage<typeof OrderSchema, ["id"]>(
|
|
984
|
-
supabase,
|
|
985
|
-
"orders",
|
|
986
|
-
OrderSchema,
|
|
987
|
-
["id"],
|
|
988
|
-
["customerId", "status", ["customerId", "status"]] // Compound index
|
|
989
|
-
);
|
|
990
|
-
|
|
991
|
-
// Use KV for caching
|
|
992
|
-
const cache = new SupabaseKvRepository(supabase, "cache");
|
|
993
|
-
|
|
994
|
-
// Use queue for background processing
|
|
995
|
-
type EmailJob = { to: string; subject: string; body: string };
|
|
996
|
-
const emailQueue = new SupabaseQueueStorage<EmailJob, void>(supabase, "emails");
|
|
997
|
-
|
|
998
|
-
// Example usage
|
|
999
|
-
async function createOrder(customerId: string, productId: string, quantity: number) {
|
|
1000
|
-
// Check product availability
|
|
1001
|
-
const product = await products.get({ id: productId });
|
|
1002
|
-
if (!product || product.stock < quantity) {
|
|
1003
|
-
throw new Error("Insufficient stock");
|
|
1004
|
-
}
|
|
1005
|
-
|
|
1006
|
-
// Create order
|
|
1007
|
-
const order = {
|
|
1008
|
-
id: crypto.randomUUID(),
|
|
1009
|
-
customerId,
|
|
1010
|
-
productId,
|
|
1011
|
-
quantity,
|
|
1012
|
-
status: "pending" as const,
|
|
1013
|
-
createdAt: new Date().toISOString(),
|
|
1014
|
-
};
|
|
1015
|
-
await orders.put(order);
|
|
1016
|
-
|
|
1017
|
-
// Update stock
|
|
1018
|
-
await products.put({
|
|
1019
|
-
...product,
|
|
1020
|
-
stock: product.stock - quantity,
|
|
1021
|
-
});
|
|
1022
|
-
|
|
1023
|
-
// Queue email notification
|
|
1024
|
-
await emailQueue.add({
|
|
1025
|
-
input: {
|
|
1026
|
-
to: customerId,
|
|
1027
|
-
subject: "Order Confirmation",
|
|
1028
|
-
body: `Your order ${order.id} has been confirmed!`,
|
|
1029
|
-
},
|
|
1030
|
-
run_after: null,
|
|
1031
|
-
max_retries: 3,
|
|
1032
|
-
});
|
|
1033
|
-
|
|
1034
|
-
return order;
|
|
646
|
+
class StructuralParser {
|
|
647
|
+
static parseMarkdown(doc_id: string, text: string, title: string): Promise<DocumentRootNode>;
|
|
648
|
+
static parsePlainText(doc_id: string, text: string, title: string): Promise<DocumentRootNode>;
|
|
649
|
+
static parse(
|
|
650
|
+
doc_id: string,
|
|
651
|
+
text: string,
|
|
652
|
+
title: string,
|
|
653
|
+
format?: string
|
|
654
|
+
): Promise<DocumentRootNode>;
|
|
1035
655
|
}
|
|
1036
|
-
|
|
1037
|
-
// Get customer's orders
|
|
1038
|
-
async function getCustomerOrders(customerId: string) {
|
|
1039
|
-
return await orders.search({ customerId });
|
|
1040
|
-
}
|
|
1041
|
-
|
|
1042
|
-
// Get orders by status
|
|
1043
|
-
async function getOrdersByStatus(status: string) {
|
|
1044
|
-
return await orders.search({ status });
|
|
1045
|
-
}
|
|
1046
|
-
```
|
|
1047
|
-
|
|
1048
|
-
**Important Note**
|
|
1049
|
-
The implementations assume you have an exec_sql RPC function in your Supabase database for table creation, or that you've created the tables through Supabase migrations. For production use, it's recommended to:
|
|
1050
|
-
|
|
1051
|
-
- Create tables using Supabase migrations rather than runtime table creation
|
|
1052
|
-
- Set up proper Row Level Security (RLS) policies in Supabase
|
|
1053
|
-
- Use service role keys for server-side operations that need elevated permissions
|
|
1054
|
-
|
|
1055
|
-
## Testing
|
|
1056
|
-
|
|
1057
|
-
The package includes comprehensive test suites for all storage implementations:
|
|
1058
|
-
|
|
1059
|
-
```bash
|
|
1060
|
-
# Run all tests
|
|
1061
|
-
bun test
|
|
1062
|
-
|
|
1063
|
-
# Run specific test suites
|
|
1064
|
-
bun test --grep "KvRepository"
|
|
1065
|
-
bun test --grep "TabularStorage"
|
|
1066
|
-
bun test --grep "QueueStorage"
|
|
1067
|
-
|
|
1068
|
-
# Test specific environments
|
|
1069
|
-
bun test --grep "InMemory" # Cross-platform tests
|
|
1070
|
-
bun test --grep "IndexedDb" # Browser tests
|
|
1071
|
-
bun test --grep "Sqlite" # Native tests
|
|
1072
656
|
```
|
|
1073
657
|
|
|
1074
|
-
###
|
|
658
|
+
### Type Helpers
|
|
1075
659
|
|
|
1076
660
|
```typescript
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
beforeEach(() => {
|
|
1084
|
-
userRepo = new InMemoryTabularStorage<typeof UserSchema, ["id"]>(UserSchema, ["id"], ["email"]);
|
|
1085
|
-
});
|
|
1086
|
-
|
|
1087
|
-
test("should create and retrieve user", async () => {
|
|
1088
|
-
const user = {
|
|
1089
|
-
id: "test-123",
|
|
1090
|
-
email: "test@example.com",
|
|
1091
|
-
name: "Test User",
|
|
1092
|
-
age: 25,
|
|
1093
|
-
department: "Engineering",
|
|
1094
|
-
createdAt: new Date().toISOString(),
|
|
1095
|
-
};
|
|
1096
|
-
|
|
1097
|
-
await userRepo.put(user);
|
|
1098
|
-
const retrieved = await userRepo.get({ id: "test-123" });
|
|
1099
|
-
|
|
1100
|
-
expect(retrieved).toEqual(user);
|
|
1101
|
-
});
|
|
1102
|
-
|
|
1103
|
-
test("should find users by department", async () => {
|
|
1104
|
-
const users = [
|
|
1105
|
-
{
|
|
1106
|
-
id: "1",
|
|
1107
|
-
email: "alice@co.com",
|
|
1108
|
-
name: "Alice",
|
|
1109
|
-
age: 28,
|
|
1110
|
-
department: "Engineering",
|
|
1111
|
-
createdAt: "2024-01-01",
|
|
1112
|
-
},
|
|
1113
|
-
{
|
|
1114
|
-
id: "2",
|
|
1115
|
-
email: "bob@co.com",
|
|
1116
|
-
name: "Bob",
|
|
1117
|
-
age: 32,
|
|
1118
|
-
department: "Sales",
|
|
1119
|
-
createdAt: "2024-01-02",
|
|
1120
|
-
},
|
|
1121
|
-
];
|
|
1122
|
-
|
|
1123
|
-
await userRepo.putBulk(users);
|
|
1124
|
-
const engineers = await userRepo.search({ department: "Engineering" });
|
|
1125
|
-
|
|
1126
|
-
expect(engineers).toHaveLength(1);
|
|
1127
|
-
expect(engineers![0].name).toBe("Alice");
|
|
1128
|
-
});
|
|
1129
|
-
});
|
|
661
|
+
// Schema helper for task inputs that accept a KnowledgeBase ID or instance
|
|
662
|
+
function TypeKnowledgeBase<O>(options?: O): JsonSchema;
|
|
663
|
+
|
|
664
|
+
// Schema helper for tabular storage inputs
|
|
665
|
+
function TypeTabularStorage<O>(options?: O): JsonSchema;
|
|
1130
666
|
```
|
|
1131
667
|
|
|
1132
668
|
## License
|