@workglow/ai 0.0.84 → 0.0.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +166 -35
- package/dist/browser.js +3947 -1449
- package/dist/browser.js.map +49 -38
- package/dist/bun.js +3947 -1449
- package/dist/bun.js.map +49 -38
- package/dist/common.d.ts +0 -3
- package/dist/common.d.ts.map +1 -1
- package/dist/model/ModelRegistry.d.ts +2 -2
- package/dist/model/ModelRegistry.d.ts.map +1 -1
- package/dist/model/ModelRepository.d.ts +3 -3
- package/dist/model/ModelRepository.d.ts.map +1 -1
- package/dist/node.js +3947 -1449
- package/dist/node.js.map +49 -38
- package/dist/task/BackgroundRemovalTask.d.ts +121 -289
- package/dist/task/BackgroundRemovalTask.d.ts.map +1 -1
- package/dist/task/ChunkRetrievalTask.d.ts +243 -0
- package/dist/task/ChunkRetrievalTask.d.ts.map +1 -0
- package/dist/task/ChunkToVectorTask.d.ts +183 -0
- package/dist/task/ChunkToVectorTask.d.ts.map +1 -0
- package/dist/task/ChunkVectorHybridSearchTask.d.ts +160 -0
- package/dist/task/ChunkVectorHybridSearchTask.d.ts.map +1 -0
- package/dist/task/ChunkVectorSearchTask.d.ts +137 -0
- package/dist/task/ChunkVectorSearchTask.d.ts.map +1 -0
- package/dist/task/ChunkVectorUpsertTask.d.ts +120 -0
- package/dist/task/ChunkVectorUpsertTask.d.ts.map +1 -0
- package/dist/task/ContextBuilderTask.d.ts +131 -0
- package/dist/task/ContextBuilderTask.d.ts.map +1 -0
- package/dist/task/DocumentEnricherTask.d.ts +232 -0
- package/dist/task/DocumentEnricherTask.d.ts.map +1 -0
- package/dist/task/DownloadModelTask.d.ts +79 -208
- package/dist/task/DownloadModelTask.d.ts.map +1 -1
- package/dist/task/FaceDetectorTask.d.ts +117 -272
- package/dist/task/FaceDetectorTask.d.ts.map +1 -1
- package/dist/task/FaceLandmarkerTask.d.ts +117 -272
- package/dist/task/FaceLandmarkerTask.d.ts.map +1 -1
- package/dist/task/GestureRecognizerTask.d.ts +129 -284
- package/dist/task/GestureRecognizerTask.d.ts.map +1 -1
- package/dist/task/HandLandmarkerTask.d.ts +125 -280
- package/dist/task/HandLandmarkerTask.d.ts.map +1 -1
- package/dist/task/HierarchicalChunkerTask.d.ts +212 -0
- package/dist/task/HierarchicalChunkerTask.d.ts.map +1 -0
- package/dist/task/HierarchyJoinTask.d.ts +318 -0
- package/dist/task/HierarchyJoinTask.d.ts.map +1 -0
- package/dist/task/ImageClassificationTask.d.ts +117 -272
- package/dist/task/ImageClassificationTask.d.ts.map +1 -1
- package/dist/task/ImageEmbeddingTask.d.ts +125 -446
- package/dist/task/ImageEmbeddingTask.d.ts.map +1 -1
- package/dist/task/ImageSegmentationTask.d.ts +117 -272
- package/dist/task/ImageSegmentationTask.d.ts.map +1 -1
- package/dist/task/ImageToTextTask.d.ts +117 -272
- package/dist/task/ImageToTextTask.d.ts.map +1 -1
- package/dist/task/ObjectDetectionTask.d.ts +119 -274
- package/dist/task/ObjectDetectionTask.d.ts.map +1 -1
- package/dist/task/PoseLandmarkerTask.d.ts +117 -272
- package/dist/task/PoseLandmarkerTask.d.ts.map +1 -1
- package/dist/task/QueryExpanderTask.d.ts +129 -0
- package/dist/task/QueryExpanderTask.d.ts.map +1 -0
- package/dist/task/RerankerTask.d.ts +209 -0
- package/dist/task/RerankerTask.d.ts.map +1 -0
- package/dist/task/StructuralParserTask.d.ts +91 -0
- package/dist/task/StructuralParserTask.d.ts.map +1 -0
- package/dist/task/TextChunkerTask.d.ts +129 -0
- package/dist/task/TextChunkerTask.d.ts.map +1 -0
- package/dist/task/TextClassificationTask.d.ts +42 -115
- package/dist/task/TextClassificationTask.d.ts.map +1 -1
- package/dist/task/TextEmbeddingTask.d.ts +55 -277
- package/dist/task/TextEmbeddingTask.d.ts.map +1 -1
- package/dist/task/TextFillMaskTask.d.ts +42 -115
- package/dist/task/TextFillMaskTask.d.ts.map +1 -1
- package/dist/task/TextGenerationTask.d.ts +44 -128
- package/dist/task/TextGenerationTask.d.ts.map +1 -1
- package/dist/task/TextLanguageDetectionTask.d.ts +42 -115
- package/dist/task/TextLanguageDetectionTask.d.ts.map +1 -1
- package/dist/task/TextNamedEntityRecognitionTask.d.ts +42 -115
- package/dist/task/TextNamedEntityRecognitionTask.d.ts.map +1 -1
- package/dist/task/TextQuestionAnswerTask.d.ts +47 -144
- package/dist/task/TextQuestionAnswerTask.d.ts.map +1 -1
- package/dist/task/TextRewriterTask.d.ts +45 -131
- package/dist/task/TextRewriterTask.d.ts.map +1 -1
- package/dist/task/TextSummaryTask.d.ts +42 -115
- package/dist/task/TextSummaryTask.d.ts.map +1 -1
- package/dist/task/TextTranslationTask.d.ts +54 -168
- package/dist/task/TextTranslationTask.d.ts.map +1 -1
- package/dist/task/TopicSegmenterTask.d.ts +148 -0
- package/dist/task/TopicSegmenterTask.d.ts.map +1 -0
- package/dist/task/UnloadModelTask.d.ts +79 -208
- package/dist/task/UnloadModelTask.d.ts.map +1 -1
- package/dist/task/VectorQuantizeTask.d.ts +120 -0
- package/dist/task/VectorQuantizeTask.d.ts.map +1 -0
- package/dist/task/VectorSimilarityTask.d.ts +18 -253
- package/dist/task/VectorSimilarityTask.d.ts.map +1 -1
- package/dist/task/base/AiTask.d.ts +24 -22
- package/dist/task/base/AiTask.d.ts.map +1 -1
- package/dist/task/base/AiTaskSchemas.d.ts +3 -127
- package/dist/task/base/AiTaskSchemas.d.ts.map +1 -1
- package/dist/task/base/AiVisionTask.d.ts +1 -4
- package/dist/task/base/AiVisionTask.d.ts.map +1 -1
- package/dist/task/index.d.ts +54 -1
- package/dist/task/index.d.ts.map +1 -1
- package/package.json +14 -9
- package/dist/source/Document.d.ts +0 -56
- package/dist/source/Document.d.ts.map +0 -1
- package/dist/source/DocumentConverter.d.ts +0 -15
- package/dist/source/DocumentConverter.d.ts.map +0 -1
- package/dist/source/DocumentConverterMarkdown.d.ts +0 -13
- package/dist/source/DocumentConverterMarkdown.d.ts.map +0 -1
- package/dist/source/DocumentConverterText.d.ts +0 -13
- package/dist/source/DocumentConverterText.d.ts.map +0 -1
- package/dist/source/MasterDocument.d.ts +0 -27
- package/dist/source/MasterDocument.d.ts.map +0 -1
- package/dist/source/index.d.ts +0 -10
- package/dist/source/index.d.ts.map +0 -1
- package/dist/task/DocumentSplitterTask.d.ts +0 -58
- package/dist/task/DocumentSplitterTask.d.ts.map +0 -1
package/README.md
CHANGED
|
@@ -52,7 +52,7 @@ await modelRepo.addModel({
|
|
|
52
52
|
provider: HF_TRANSFORMERS_ONNX,
|
|
53
53
|
provider_config: {
|
|
54
54
|
pipeline: "text2text-generation",
|
|
55
|
-
|
|
55
|
+
model_path: "Xenova/LaMini-Flan-T5-783M"
|
|
56
56
|
});
|
|
57
57
|
|
|
58
58
|
// 3. Register provider functions (inline, same thread)
|
|
@@ -216,25 +216,6 @@ const result = await task.run();
|
|
|
216
216
|
// Output: { similarity: 0.85 }
|
|
217
217
|
```
|
|
218
218
|
|
|
219
|
-
### Document Processing Tasks
|
|
220
|
-
|
|
221
|
-
#### DocumentSplitterTask
|
|
222
|
-
|
|
223
|
-
Splits documents into smaller chunks for processing.
|
|
224
|
-
|
|
225
|
-
```typescript
|
|
226
|
-
import { DocumentSplitterTask } from "@workglow/ai";
|
|
227
|
-
|
|
228
|
-
const task = new DocumentSplitterTask({
|
|
229
|
-
document: "Very long document content...",
|
|
230
|
-
chunkSize: 1000,
|
|
231
|
-
chunkOverlap: 200,
|
|
232
|
-
});
|
|
233
|
-
|
|
234
|
-
const result = await task.run();
|
|
235
|
-
// Output: { chunks: ["chunk1...", "chunk2...", "chunk3..."] }
|
|
236
|
-
```
|
|
237
|
-
|
|
238
219
|
### Model Management Tasks
|
|
239
220
|
|
|
240
221
|
#### DownloadModelTask
|
|
@@ -415,30 +396,140 @@ const result = await workflow
|
|
|
415
396
|
console.log("Final similarity score:", result.similarity);
|
|
416
397
|
```
|
|
417
398
|
|
|
418
|
-
##
|
|
399
|
+
## RAG (Retrieval-Augmented Generation) Pipelines
|
|
400
|
+
|
|
401
|
+
The AI package provides a comprehensive set of tasks for building RAG pipelines. These tasks chain together in workflows without requiring external loops.
|
|
402
|
+
|
|
403
|
+
### Document Processing Tasks
|
|
404
|
+
|
|
405
|
+
| Task | Description |
|
|
406
|
+
| ------------------------- | ----------------------------------------------------- |
|
|
407
|
+
| `StructuralParserTask` | Parses markdown/text into hierarchical document trees |
|
|
408
|
+
| `TextChunkerTask` | Splits text into chunks with configurable strategies |
|
|
409
|
+
| `HierarchicalChunkerTask` | Token-aware chunking that respects document structure |
|
|
410
|
+
| `TopicSegmenterTask` | Segments text by topic using heuristics or embeddings |
|
|
411
|
+
| `DocumentEnricherTask` | Adds summaries and entities to document nodes |
|
|
412
|
+
|
|
413
|
+
### Vector and Storage Tasks
|
|
419
414
|
|
|
420
|
-
|
|
415
|
+
| Task | Description |
|
|
416
|
+
| ----------------------- | ---------------------------------------- |
|
|
417
|
+
| `ChunkToVectorTask` | Transforms chunks to vector store format |
|
|
418
|
+
| `ChunkVectorUpsertTask` | Stores vectors in a repository |
|
|
419
|
+
| `ChunkVectorSearchTask` | Searches vectors by similarity |
|
|
420
|
+
| `VectorQuantizeTask` | Quantizes vectors for storage efficiency |
|
|
421
|
+
|
|
422
|
+
### Retrieval and Generation Tasks
|
|
423
|
+
|
|
424
|
+
| Task | Description |
|
|
425
|
+
| ----------------------------- | --------------------------------------------- |
|
|
426
|
+
| `QueryExpanderTask` | Expands queries for better retrieval coverage |
|
|
427
|
+
| `ChunkVectorHybridSearchTask` | Combines vector and full-text search |
|
|
428
|
+
| `RerankerTask` | Reranks search results for relevance |
|
|
429
|
+
| `HierarchyJoinTask` | Enriches results with parent context |
|
|
430
|
+
| `ContextBuilderTask` | Builds context for LLM prompts |
|
|
431
|
+
| `ChunkRetrievalTask` | Orchestrates end-to-end retrieval |
|
|
432
|
+
|
|
433
|
+
### Complete RAG Workflow Example
|
|
421
434
|
|
|
422
435
|
```typescript
|
|
423
|
-
import {
|
|
436
|
+
import { Workflow } from "@workglow/task-graph";
|
|
437
|
+
import { InMemoryVectorRepository } from "@workglow/storage";
|
|
424
438
|
|
|
425
|
-
|
|
426
|
-
|
|
439
|
+
const vectorRepo = new InMemoryVectorRepository();
|
|
440
|
+
await vectorRepo.setupDatabase();
|
|
427
441
|
|
|
428
|
-
//
|
|
429
|
-
|
|
430
|
-
|
|
442
|
+
// Document ingestion - fully chainable, no loops required
|
|
443
|
+
await new Workflow()
|
|
444
|
+
.structuralParser({
|
|
445
|
+
text: markdownContent,
|
|
446
|
+
title: "Documentation",
|
|
447
|
+
format: "markdown",
|
|
448
|
+
})
|
|
449
|
+
.documentEnricher({
|
|
450
|
+
generateSummaries: true,
|
|
451
|
+
extractEntities: true,
|
|
452
|
+
})
|
|
453
|
+
.hierarchicalChunker({
|
|
454
|
+
maxTokens: 512,
|
|
455
|
+
overlap: 50,
|
|
456
|
+
strategy: "hierarchical",
|
|
457
|
+
})
|
|
458
|
+
.textEmbedding({
|
|
459
|
+
model: "Xenova/all-MiniLM-L6-v2",
|
|
460
|
+
})
|
|
461
|
+
.chunkToVector()
|
|
462
|
+
.vectorStoreUpsert({
|
|
463
|
+
repository: vectorRepo,
|
|
464
|
+
})
|
|
465
|
+
.run();
|
|
431
466
|
|
|
432
|
-
//
|
|
433
|
-
const
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
467
|
+
// Query pipeline
|
|
468
|
+
const answer = await new Workflow()
|
|
469
|
+
.queryExpander({
|
|
470
|
+
query: "What is transfer learning?",
|
|
471
|
+
method: "multi-query",
|
|
472
|
+
numVariations: 3,
|
|
473
|
+
})
|
|
474
|
+
.textEmbedding({
|
|
475
|
+
model: "Xenova/all-MiniLM-L6-v2",
|
|
476
|
+
})
|
|
477
|
+
.vectorStoreSearch({
|
|
478
|
+
repository: vectorRepo,
|
|
479
|
+
topK: 10,
|
|
480
|
+
scoreThreshold: 0.5,
|
|
481
|
+
})
|
|
482
|
+
.reranker({
|
|
483
|
+
query: "What is transfer learning?",
|
|
484
|
+
topK: 5,
|
|
485
|
+
})
|
|
486
|
+
.contextBuilder({
|
|
487
|
+
format: "markdown",
|
|
488
|
+
maxLength: 2000,
|
|
489
|
+
})
|
|
490
|
+
.textQuestionAnswer({
|
|
491
|
+
question: "What is transfer learning?",
|
|
492
|
+
model: "Xenova/LaMini-Flan-T5-783M",
|
|
493
|
+
})
|
|
494
|
+
.run();
|
|
495
|
+
```
|
|
496
|
+
|
|
497
|
+
### Hierarchical Document Structure
|
|
438
498
|
|
|
439
|
-
|
|
499
|
+
Documents are represented as trees with typed nodes:
|
|
500
|
+
|
|
501
|
+
```typescript
|
|
502
|
+
type DocumentNode =
|
|
503
|
+
| DocumentRootNode // Root of document
|
|
504
|
+
| SectionNode // Headers, structural sections
|
|
505
|
+
| ParagraphNode // Text blocks
|
|
506
|
+
| SentenceNode // Fine-grained (optional)
|
|
507
|
+
| TopicNode; // Detected topic segments
|
|
440
508
|
```
|
|
441
509
|
|
|
510
|
+
Each node contains:
|
|
511
|
+
|
|
512
|
+
- `nodeId` - Deterministic content-based ID
|
|
513
|
+
- `range` - Source character offsets
|
|
514
|
+
- `text` - Content
|
|
515
|
+
- `enrichment` - Summaries, entities, keywords (optional)
|
|
516
|
+
- `children` - Child nodes (for parent nodes)
|
|
517
|
+
|
|
518
|
+
### Task Data Flow
|
|
519
|
+
|
|
520
|
+
Each task passes through what the next task needs:
|
|
521
|
+
|
|
522
|
+
| Task | Passes Through | Adds |
|
|
523
|
+
| --------------------- | ------------------------ | ------------------------------------- |
|
|
524
|
+
| `structuralParser` | - | `doc_id`, `documentTree`, `nodeCount` |
|
|
525
|
+
| `documentEnricher` | `doc_id`, `documentTree` | `summaryCount`, `entityCount` |
|
|
526
|
+
| `hierarchicalChunker` | `doc_id` | `chunks`, `text[]`, `count` |
|
|
527
|
+
| `textEmbedding` | (implicit) | `vector[]` |
|
|
528
|
+
| `chunkToVector` | - | `ids[]`, `vectors[]`, `metadata[]` |
|
|
529
|
+
| `vectorStoreUpsert` | - | `count`, `ids` |
|
|
530
|
+
|
|
531
|
+
This design eliminates the need for external loops - the entire pipeline chains together naturally.
|
|
532
|
+
|
|
442
533
|
## Error Handling
|
|
443
534
|
|
|
444
535
|
AI tasks include comprehensive error handling:
|
|
@@ -466,6 +557,46 @@ try {
|
|
|
466
557
|
|
|
467
558
|
## Advanced Configuration
|
|
468
559
|
|
|
560
|
+
### Model Input Resolution
|
|
561
|
+
|
|
562
|
+
AI tasks accept model inputs as either string identifiers or direct `ModelConfig` objects. When a string is provided, the TaskRunner automatically resolves it to a `ModelConfig` before task execution using the `ModelRepository`.
|
|
563
|
+
|
|
564
|
+
```typescript
|
|
565
|
+
import { TextGenerationTask } from "@workglow/ai";
|
|
566
|
+
|
|
567
|
+
// Using a model ID (resolved from ModelRepository)
|
|
568
|
+
const task1 = new TextGenerationTask({
|
|
569
|
+
model: "onnx:Xenova/gpt2:q8",
|
|
570
|
+
prompt: "Generate text",
|
|
571
|
+
});
|
|
572
|
+
|
|
573
|
+
// Using a direct ModelConfig object
|
|
574
|
+
const task2 = new TextGenerationTask({
|
|
575
|
+
model: {
|
|
576
|
+
model_id: "onnx:Xenova/gpt2:q8",
|
|
577
|
+
provider: "hf-transformers-onnx",
|
|
578
|
+
tasks: ["TextGenerationTask"],
|
|
579
|
+
title: "GPT-2",
|
|
580
|
+
provider_config: { pipeline: "text-generation" },
|
|
581
|
+
},
|
|
582
|
+
prompt: "Generate text",
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
// Both approaches work identically
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
This resolution is handled by the input resolver system, which inspects schema `format` annotations (like `"model"` or `"model:TextGenerationTask"`) to determine how string values should be resolved.
|
|
589
|
+
|
|
590
|
+
### Supported Format Annotations
|
|
591
|
+
|
|
592
|
+
| Format | Description | Resolver |
|
|
593
|
+
| --------------------------------- | ---------------------------------------- | -------------------------- |
|
|
594
|
+
| `model` | Any AI model configuration | ModelRepository |
|
|
595
|
+
| `model:TaskName` | Model compatible with specific task type | ModelRepository |
|
|
596
|
+
| `repository:tabular` | Tabular data repository | TabularStorageRegistry |
|
|
597
|
+
| `repository:document-node-vector` | Vector storage repository | VectorRepositoryRegistry |
|
|
598
|
+
| `repository:document` | Document repository | DocumentRepositoryRegistry |
|
|
599
|
+
|
|
469
600
|
### Custom Model Validation
|
|
470
601
|
|
|
471
602
|
Tasks automatically validate that specified models exist and are compatible:
|