rag-lite-ts 1.0.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +605 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/binary-index-format.d.ts +52 -0
- package/dist/core/binary-index-format.js +122 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.d.ts +1 -1
- package/dist/core/vector-index.js +37 -39
- package/dist/factories/index.d.ts +3 -1
- package/dist/factories/index.js +2 -0
- package/dist/factories/polymorphic-factory.d.ts +50 -0
- package/dist/factories/polymorphic-factory.js +159 -0
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +32 -0
- package/dist/index.js +29 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +327 -0
- package/dist/multimodal/clip-embedder.js +992 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +60 -9
- package/dist/search.js +82 -11
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +27 -6
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Abstract Base Embedder
|
|
3
|
+
*
|
|
4
|
+
* Provides model-agnostic base functionality for all embedder implementations.
|
|
5
|
+
* This is an abstract base class, not a concrete implementation.
|
|
6
|
+
*
|
|
7
|
+
* ARCHITECTURAL NOTE:
|
|
8
|
+
* While this contains implementation logic, it remains in the core layer because:
|
|
9
|
+
* 1. It's model-agnostic (no knowledge of specific models or transformers.js)
|
|
10
|
+
* 2. It's shared by multiple implementation layers (text, multimodal)
|
|
11
|
+
* 3. It provides common infrastructure (lifecycle, validation, batch processing)
|
|
12
|
+
* 4. Moving it would create awkward cross-layer dependencies
|
|
13
|
+
*
|
|
14
|
+
* This follows the "shared base class" pattern common in framework design,
|
|
15
|
+
* similar to React.Component, Django Model, or other framework base classes.
|
|
16
|
+
*
|
|
17
|
+
* RESPONSIBILITIES:
|
|
18
|
+
* - Model lifecycle management (loading, cleanup, disposal)
|
|
19
|
+
* - Batch processing coordination
|
|
20
|
+
* - Input validation and text truncation
|
|
21
|
+
* - Error handling with helpful messages
|
|
22
|
+
* - Embedding ID generation
|
|
23
|
+
* - Common utility methods
|
|
24
|
+
*
|
|
25
|
+
* IMPLEMENTATION LAYERS:
|
|
26
|
+
* - Text: SentenceTransformerEmbedder extends this class
|
|
27
|
+
* - Multimodal: CLIPEmbedder extends this class
|
|
28
|
+
*/
|
|
29
|
+
import type { UniversalEmbedder, ModelInfo, ModelType, EmbeddingBatchItem } from './universal-embedder.js';
|
|
30
|
+
import type { EmbeddingResult } from '../types.js';
|
|
31
|
+
/**
|
|
32
|
+
* Abstract base class for universal embedders
|
|
33
|
+
* Provides common functionality and lifecycle management
|
|
34
|
+
*/
|
|
35
|
+
export declare abstract class BaseUniversalEmbedder implements UniversalEmbedder {
|
|
36
|
+
readonly modelName: string;
|
|
37
|
+
protected readonly options: EmbedderOptions;
|
|
38
|
+
protected _isLoaded: boolean;
|
|
39
|
+
protected _modelInfo: ModelInfo;
|
|
40
|
+
constructor(modelName: string, options?: EmbedderOptions);
|
|
41
|
+
get modelType(): ModelType;
|
|
42
|
+
get dimensions(): number;
|
|
43
|
+
get supportedContentTypes(): readonly string[];
|
|
44
|
+
isLoaded(): boolean;
|
|
45
|
+
getModelInfo(): ModelInfo;
|
|
46
|
+
/**
|
|
47
|
+
* Load the model - must be implemented by subclasses
|
|
48
|
+
*/
|
|
49
|
+
abstract loadModel(): Promise<void>;
|
|
50
|
+
/**
|
|
51
|
+
* Embed text content - must be implemented by subclasses
|
|
52
|
+
*/
|
|
53
|
+
abstract embedText(text: string): Promise<EmbeddingResult>;
|
|
54
|
+
/**
|
|
55
|
+
* Clean up resources - must be implemented by subclasses
|
|
56
|
+
*/
|
|
57
|
+
abstract cleanup(): Promise<void>;
|
|
58
|
+
/**
|
|
59
|
+
* Dispose of all resources and prepare for garbage collection
|
|
60
|
+
* This method should be called when the embedder is no longer needed
|
|
61
|
+
*/
|
|
62
|
+
dispose(): Promise<void>;
|
|
63
|
+
/**
|
|
64
|
+
* Embed image content - optional, only implemented by multimodal embedders
|
|
65
|
+
*/
|
|
66
|
+
embedImage?(imagePath: string): Promise<EmbeddingResult>;
|
|
67
|
+
/**
|
|
68
|
+
* Batch embedding with default implementation
|
|
69
|
+
* Subclasses can override for more efficient batch processing
|
|
70
|
+
*/
|
|
71
|
+
embedBatch(items: EmbeddingBatchItem[]): Promise<EmbeddingResult[]>;
|
|
72
|
+
/**
|
|
73
|
+
* Process a single batch of items
|
|
74
|
+
* Can be overridden by subclasses for more efficient batch processing
|
|
75
|
+
*/
|
|
76
|
+
protected processBatch(batch: EmbeddingBatchItem[]): Promise<EmbeddingResult[]>;
|
|
77
|
+
/**
|
|
78
|
+
* Validate that the model is loaded before operations
|
|
79
|
+
*/
|
|
80
|
+
protected ensureLoaded(): void;
|
|
81
|
+
/**
|
|
82
|
+
* Generate a unique embedding ID
|
|
83
|
+
*/
|
|
84
|
+
protected generateEmbeddingId(content: string, contentType?: string): string;
|
|
85
|
+
/**
|
|
86
|
+
* Simple hash function for content identification
|
|
87
|
+
*/
|
|
88
|
+
private simpleHash;
|
|
89
|
+
/**
|
|
90
|
+
* Validate text length against model constraints
|
|
91
|
+
*/
|
|
92
|
+
protected validateTextLength(text: string): void;
|
|
93
|
+
/**
|
|
94
|
+
* Truncate text to model's maximum length
|
|
95
|
+
*/
|
|
96
|
+
protected truncateText(text: string): string;
|
|
97
|
+
/**
|
|
98
|
+
* Log model loading progress
|
|
99
|
+
*/
|
|
100
|
+
protected logModelLoading(stage: string, details?: string): void;
|
|
101
|
+
/**
|
|
102
|
+
* Handle model loading errors with helpful messages
|
|
103
|
+
*/
|
|
104
|
+
protected handleLoadingError(error: Error): Error;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Options for configuring embedder instances
|
|
108
|
+
*/
|
|
109
|
+
export interface EmbedderOptions {
|
|
110
|
+
cachePath?: string;
|
|
111
|
+
maxBatchSize?: number;
|
|
112
|
+
timeout?: number;
|
|
113
|
+
enableGPU?: boolean;
|
|
114
|
+
customConfig?: Record<string, any>;
|
|
115
|
+
logLevel?: 'debug' | 'info' | 'warn' | 'error' | 'silent';
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Create embedder options with defaults
|
|
119
|
+
*/
|
|
120
|
+
export declare function createEmbedderOptions(options?: Partial<EmbedderOptions>): EmbedderOptions;
|
|
121
|
+
/**
|
|
122
|
+
* Validate embedder options
|
|
123
|
+
*/
|
|
124
|
+
export declare function validateEmbedderOptions(options: EmbedderOptions): void;
|
|
125
|
+
//# sourceMappingURL=abstract-embedder.d.ts.map
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Abstract Base Embedder
|
|
3
|
+
*
|
|
4
|
+
* Provides model-agnostic base functionality for all embedder implementations.
|
|
5
|
+
* This is an abstract base class, not a concrete implementation.
|
|
6
|
+
*
|
|
7
|
+
* ARCHITECTURAL NOTE:
|
|
8
|
+
* While this contains implementation logic, it remains in the core layer because:
|
|
9
|
+
* 1. It's model-agnostic (no knowledge of specific models or transformers.js)
|
|
10
|
+
* 2. It's shared by multiple implementation layers (text, multimodal)
|
|
11
|
+
* 3. It provides common infrastructure (lifecycle, validation, batch processing)
|
|
12
|
+
* 4. Moving it would create awkward cross-layer dependencies
|
|
13
|
+
*
|
|
14
|
+
* This follows the "shared base class" pattern common in framework design,
|
|
15
|
+
* similar to React.Component, Django Model, or other framework base classes.
|
|
16
|
+
*
|
|
17
|
+
* RESPONSIBILITIES:
|
|
18
|
+
* - Model lifecycle management (loading, cleanup, disposal)
|
|
19
|
+
* - Batch processing coordination
|
|
20
|
+
* - Input validation and text truncation
|
|
21
|
+
* - Error handling with helpful messages
|
|
22
|
+
* - Embedding ID generation
|
|
23
|
+
* - Common utility methods
|
|
24
|
+
*
|
|
25
|
+
* IMPLEMENTATION LAYERS:
|
|
26
|
+
* - Text: SentenceTransformerEmbedder extends this class
|
|
27
|
+
* - Multimodal: CLIPEmbedder extends this class
|
|
28
|
+
*/
|
|
29
|
+
import { ModelRegistry } from './model-registry.js';
|
|
30
|
+
import { validateContentType, createEnhancedEmbeddingResult } from './universal-embedder.js';
|
|
31
|
+
// =============================================================================
|
|
32
|
+
// BASE EMBEDDER ABSTRACT CLASS
|
|
33
|
+
// =============================================================================
|
|
34
|
+
/**
|
|
35
|
+
* Abstract base class for universal embedders
|
|
36
|
+
* Provides common functionality and lifecycle management
|
|
37
|
+
*/
|
|
38
|
+
export class BaseUniversalEmbedder {
|
|
39
|
+
modelName;
|
|
40
|
+
options;
|
|
41
|
+
_isLoaded = false;
|
|
42
|
+
_modelInfo;
|
|
43
|
+
constructor(modelName, options = {}) {
|
|
44
|
+
this.modelName = modelName;
|
|
45
|
+
this.options = options;
|
|
46
|
+
const modelInfo = ModelRegistry.getModelInfo(modelName);
|
|
47
|
+
if (!modelInfo) {
|
|
48
|
+
throw new Error(`Model '${modelName}' is not supported. ` +
|
|
49
|
+
`Supported models: ${ModelRegistry.getSupportedModels().join(', ')}`);
|
|
50
|
+
}
|
|
51
|
+
this._modelInfo = modelInfo;
|
|
52
|
+
}
|
|
53
|
+
// =============================================================================
|
|
54
|
+
// PUBLIC INTERFACE IMPLEMENTATION
|
|
55
|
+
// =============================================================================
|
|
56
|
+
get modelType() {
|
|
57
|
+
return this._modelInfo.type;
|
|
58
|
+
}
|
|
59
|
+
get dimensions() {
|
|
60
|
+
return this._modelInfo.dimensions;
|
|
61
|
+
}
|
|
62
|
+
get supportedContentTypes() {
|
|
63
|
+
return this._modelInfo.supportedContentTypes;
|
|
64
|
+
}
|
|
65
|
+
isLoaded() {
|
|
66
|
+
return this._isLoaded;
|
|
67
|
+
}
|
|
68
|
+
getModelInfo() {
|
|
69
|
+
return { ...this._modelInfo }; // Return a copy to prevent mutation
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Dispose of all resources and prepare for garbage collection
|
|
73
|
+
* This method should be called when the embedder is no longer needed
|
|
74
|
+
*/
|
|
75
|
+
async dispose() {
|
|
76
|
+
try {
|
|
77
|
+
// Call the specific cleanup implementation
|
|
78
|
+
await this.cleanup();
|
|
79
|
+
// Clear internal state
|
|
80
|
+
this._isLoaded = false;
|
|
81
|
+
// Force garbage collection if available
|
|
82
|
+
if (global.gc) {
|
|
83
|
+
global.gc();
|
|
84
|
+
}
|
|
85
|
+
this.logModelLoading('Resources disposed and garbage collection triggered');
|
|
86
|
+
}
|
|
87
|
+
catch (error) {
|
|
88
|
+
console.warn(`Error during resource disposal: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// =============================================================================
|
|
92
|
+
// DEFAULT IMPLEMENTATIONS
|
|
93
|
+
// =============================================================================
|
|
94
|
+
/**
|
|
95
|
+
* Batch embedding with default implementation
|
|
96
|
+
* Subclasses can override for more efficient batch processing
|
|
97
|
+
*/
|
|
98
|
+
async embedBatch(items) {
|
|
99
|
+
if (!this._isLoaded) {
|
|
100
|
+
await this.loadModel();
|
|
101
|
+
}
|
|
102
|
+
const results = [];
|
|
103
|
+
const batchSize = this.options.maxBatchSize || this._modelInfo.capabilities.maxBatchSize || 8;
|
|
104
|
+
// Process in batches to avoid memory issues
|
|
105
|
+
for (let i = 0; i < items.length; i += batchSize) {
|
|
106
|
+
const batch = items.slice(i, i + batchSize);
|
|
107
|
+
const batchResults = await this.processBatch(batch);
|
|
108
|
+
results.push(...batchResults);
|
|
109
|
+
}
|
|
110
|
+
return results;
|
|
111
|
+
}
|
|
112
|
+
// =============================================================================
|
|
113
|
+
// PROTECTED HELPER METHODS
|
|
114
|
+
// =============================================================================
|
|
115
|
+
/**
|
|
116
|
+
* Process a single batch of items
|
|
117
|
+
* Can be overridden by subclasses for more efficient batch processing
|
|
118
|
+
*/
|
|
119
|
+
async processBatch(batch) {
|
|
120
|
+
const results = [];
|
|
121
|
+
for (const item of batch) {
|
|
122
|
+
try {
|
|
123
|
+
validateContentType(item.contentType, this.supportedContentTypes);
|
|
124
|
+
let result;
|
|
125
|
+
if (item.contentType === 'text') {
|
|
126
|
+
result = await this.embedText(item.content);
|
|
127
|
+
}
|
|
128
|
+
else if (item.contentType === 'image' && this.embedImage) {
|
|
129
|
+
result = await this.embedImage(item.content);
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
throw new Error(`Content type '${item.contentType}' not supported by model '${this.modelName}'`);
|
|
133
|
+
}
|
|
134
|
+
// Enhance the result with content type and metadata
|
|
135
|
+
const enhancedResult = createEnhancedEmbeddingResult(result.embedding_id, result.vector, item.contentType, item.metadata);
|
|
136
|
+
results.push(enhancedResult);
|
|
137
|
+
}
|
|
138
|
+
catch (error) {
|
|
139
|
+
// Log error but continue processing other items
|
|
140
|
+
console.warn(`Failed to embed item: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
141
|
+
// Create a placeholder result with zero vector for failed items
|
|
142
|
+
const zeroVector = new Float32Array(this.dimensions).fill(0);
|
|
143
|
+
const failedResult = createEnhancedEmbeddingResult(`failed_${Date.now()}_${Math.random()}`, zeroVector, item.contentType, { ...item.metadata, error: error instanceof Error ? error.message : 'Unknown error' });
|
|
144
|
+
results.push(failedResult);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return results;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Validate that the model is loaded before operations
|
|
151
|
+
*/
|
|
152
|
+
ensureLoaded() {
|
|
153
|
+
if (!this._isLoaded) {
|
|
154
|
+
throw new Error(`Model '${this.modelName}' is not loaded. Call loadModel() first.`);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Generate a unique embedding ID
|
|
159
|
+
*/
|
|
160
|
+
generateEmbeddingId(content, contentType = 'text') {
|
|
161
|
+
const timestamp = Date.now();
|
|
162
|
+
const random = Math.random().toString(36).substring(2);
|
|
163
|
+
const contentHash = this.simpleHash(content);
|
|
164
|
+
return `${contentType}_${contentHash}_${timestamp}_${random}`;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Simple hash function for content identification
|
|
168
|
+
*/
|
|
169
|
+
simpleHash(str) {
|
|
170
|
+
let hash = 0;
|
|
171
|
+
for (let i = 0; i < str.length; i++) {
|
|
172
|
+
const char = str.charCodeAt(i);
|
|
173
|
+
hash = ((hash << 5) - hash) + char;
|
|
174
|
+
hash = hash & hash; // Convert to 32-bit integer
|
|
175
|
+
}
|
|
176
|
+
return Math.abs(hash).toString(36);
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Validate text length against model constraints
|
|
180
|
+
*/
|
|
181
|
+
validateTextLength(text) {
|
|
182
|
+
const maxLength = this._modelInfo.capabilities.maxTextLength;
|
|
183
|
+
if (maxLength && text.length > maxLength) {
|
|
184
|
+
console.warn(`Text length (${text.length}) exceeds model maximum (${maxLength}). ` +
|
|
185
|
+
`Text will be truncated.`);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Truncate text to model's maximum length
|
|
190
|
+
*/
|
|
191
|
+
truncateText(text) {
|
|
192
|
+
const maxLength = this._modelInfo.capabilities.maxTextLength;
|
|
193
|
+
if (maxLength && text.length > maxLength) {
|
|
194
|
+
return text.substring(0, maxLength);
|
|
195
|
+
}
|
|
196
|
+
return text;
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Log model loading progress
|
|
200
|
+
*/
|
|
201
|
+
logModelLoading(stage, details) {
|
|
202
|
+
const message = `[${this.modelName}] ${stage}`;
|
|
203
|
+
if (details) {
|
|
204
|
+
console.log(`${message}: ${details}`);
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
console.log(message);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Handle model loading errors with helpful messages
|
|
212
|
+
*/
|
|
213
|
+
handleLoadingError(error) {
|
|
214
|
+
const baseMessage = `Failed to load model '${this.modelName}': ${error.message}`;
|
|
215
|
+
// Provide specific guidance based on error type
|
|
216
|
+
if (error.message.includes('network') || error.message.includes('fetch')) {
|
|
217
|
+
return new Error(`${baseMessage}\n` +
|
|
218
|
+
`This appears to be a network error. Please check your internet connection ` +
|
|
219
|
+
`and ensure the model repository is accessible.`);
|
|
220
|
+
}
|
|
221
|
+
if (error.message.includes('memory') || error.message.includes('OOM')) {
|
|
222
|
+
return new Error(`${baseMessage}\n` +
|
|
223
|
+
`This appears to be a memory error. Try using a smaller model or ` +
|
|
224
|
+
`increase available memory. Required: ${this._modelInfo.requirements.minimumMemory}MB`);
|
|
225
|
+
}
|
|
226
|
+
if (error.message.includes('unsupported') || error.message.includes('not found')) {
|
|
227
|
+
const suggestions = ModelRegistry.getSupportedModels(this.modelType);
|
|
228
|
+
return new Error(`${baseMessage}\n` +
|
|
229
|
+
`Model may not be available. Supported ${this.modelType} models: ${suggestions.join(', ')}`);
|
|
230
|
+
}
|
|
231
|
+
return new Error(baseMessage);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
// =============================================================================
|
|
235
|
+
// UTILITY FUNCTIONS
|
|
236
|
+
// =============================================================================
|
|
237
|
+
/**
|
|
238
|
+
* Create embedder options with defaults
|
|
239
|
+
*/
|
|
240
|
+
export function createEmbedderOptions(options = {}) {
|
|
241
|
+
return {
|
|
242
|
+
maxBatchSize: 8,
|
|
243
|
+
timeout: 30000, // 30 seconds
|
|
244
|
+
enableGPU: false,
|
|
245
|
+
logLevel: 'info',
|
|
246
|
+
...options
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Validate embedder options
|
|
251
|
+
*/
|
|
252
|
+
export function validateEmbedderOptions(options) {
|
|
253
|
+
if (options.maxBatchSize && (options.maxBatchSize < 1 || options.maxBatchSize > 128)) {
|
|
254
|
+
throw new Error('maxBatchSize must be between 1 and 128');
|
|
255
|
+
}
|
|
256
|
+
if (options.timeout && options.timeout < 1000) {
|
|
257
|
+
throw new Error('timeout must be at least 1000ms');
|
|
258
|
+
}
|
|
259
|
+
const validLogLevels = ['debug', 'info', 'warn', 'error', 'silent'];
|
|
260
|
+
if (options.logLevel && !validLogLevels.includes(options.logLevel)) {
|
|
261
|
+
throw new Error(`logLevel must be one of: ${validLogLevels.join(', ')}`);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
//# sourceMappingURL=abstract-embedder.js.map
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Actionable Error Messages
|
|
3
|
+
* Provides user-friendly, actionable error messages with specific guidance
|
|
4
|
+
* Replaces technical error messages with helpful troubleshooting steps
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Configuration for actionable error messages
|
|
8
|
+
*/
|
|
9
|
+
export interface ActionableErrorConfig {
|
|
10
|
+
/** Include examples in error messages */
|
|
11
|
+
includeExamples?: boolean;
|
|
12
|
+
/** Include troubleshooting steps */
|
|
13
|
+
includeTroubleshooting?: boolean;
|
|
14
|
+
/** Context about the current operation */
|
|
15
|
+
operationContext?: string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Create actionable error message for missing files
|
|
19
|
+
*/
|
|
20
|
+
export declare function createMissingFileError(filePath: string, fileType: 'index' | 'database' | 'config' | 'content', config?: ActionableErrorConfig): Error;
|
|
21
|
+
/**
|
|
22
|
+
* Create actionable error message for invalid paths
|
|
23
|
+
*/
|
|
24
|
+
export declare function createInvalidPathError(paths: {
|
|
25
|
+
name: string;
|
|
26
|
+
value: string | undefined;
|
|
27
|
+
}[], config?: ActionableErrorConfig): Error;
|
|
28
|
+
/**
|
|
29
|
+
* Create actionable error message for model loading failures
|
|
30
|
+
*/
|
|
31
|
+
export declare function createModelLoadingError(modelName: string, originalError: string, config?: ActionableErrorConfig): Error;
|
|
32
|
+
/**
|
|
33
|
+
* Create actionable error message for dimension mismatches
|
|
34
|
+
*/
|
|
35
|
+
export declare function createDimensionMismatchError(expected: number, actual: number, context: string, config?: ActionableErrorConfig): Error;
|
|
36
|
+
/**
|
|
37
|
+
* Create actionable error message for mode mismatches
|
|
38
|
+
*/
|
|
39
|
+
export declare function createModeMismatchError(expectedMode: string, actualMode: string, config?: ActionableErrorConfig): Error;
|
|
40
|
+
/**
|
|
41
|
+
* Create actionable error message for empty or invalid content
|
|
42
|
+
*/
|
|
43
|
+
export declare function createInvalidContentError(contentType: string, issue: 'empty' | 'invalid_format' | 'too_large' | 'unsupported', config?: ActionableErrorConfig): Error;
|
|
44
|
+
/**
|
|
45
|
+
* Create actionable error message for missing dependencies
|
|
46
|
+
*/
|
|
47
|
+
export declare function createMissingDependencyError(dependencyName: string, dependencyType: 'function' | 'object' | 'service', config?: ActionableErrorConfig): Error;
|
|
48
|
+
/**
|
|
49
|
+
* Create actionable error message for factory creation failures
|
|
50
|
+
*/
|
|
51
|
+
export declare function createFactoryCreationError(factoryName: string, originalError: string, config?: ActionableErrorConfig): Error;
|
|
52
|
+
/**
|
|
53
|
+
* Enhance an existing error with actionable information
|
|
54
|
+
*/
|
|
55
|
+
export declare function enhanceError(originalError: Error, context: string, suggestions?: string[], config?: ActionableErrorConfig): Error;
|
|
56
|
+
/**
|
|
57
|
+
* Create a user-friendly error message with context
|
|
58
|
+
*/
|
|
59
|
+
export declare function createContextualError(message: string, context: string, suggestions?: string[], examples?: string[], config?: ActionableErrorConfig): Error;
|
|
60
|
+
//# sourceMappingURL=actionable-error-messages.d.ts.map
|