rag-lite-ts 1.0.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +605 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/binary-index-format.d.ts +52 -0
- package/dist/core/binary-index-format.js +122 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.d.ts +1 -1
- package/dist/core/vector-index.js +37 -39
- package/dist/factories/index.d.ts +3 -1
- package/dist/factories/index.js +2 -0
- package/dist/factories/polymorphic-factory.d.ts +50 -0
- package/dist/factories/polymorphic-factory.js +159 -0
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +32 -0
- package/dist/index.js +29 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +327 -0
- package/dist/multimodal/clip-embedder.js +992 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +60 -9
- package/dist/search.js +82 -11
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +27 -6
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Simple Embedder Creation Function
|
|
3
|
+
*
|
|
4
|
+
* Provides direct model instantiation with clear validation and error handling.
|
|
5
|
+
* No fallback mechanisms - models work reliably or fail clearly with actionable guidance.
|
|
6
|
+
*
|
|
7
|
+
* Supported Models:
|
|
8
|
+
* - Text Mode: sentence-transformers/all-MiniLM-L6-v2, Xenova/all-mpnet-base-v2
|
|
9
|
+
* - Multimodal Mode: Xenova/clip-vit-base-patch32, Xenova/clip-vit-base-patch16
|
|
10
|
+
*
|
|
11
|
+
* Mode Selection Guide:
|
|
12
|
+
* - Use text mode for text-only content (faster, optimized for text similarity)
|
|
13
|
+
* - Use multimodal mode for mixed text/image content (enables cross-modal search)
|
|
14
|
+
*/
|
|
15
|
+
import '../dom-polyfills.js';
|
|
16
|
+
import type { UniversalEmbedder, ModelType, EmbedderCreationOptions } from './universal-embedder.js';
|
|
17
|
+
/**
|
|
18
|
+
* Create a universal embedder for the specified model
|
|
19
|
+
*
|
|
20
|
+
* Simple function-based approach that validates model compatibility and creates
|
|
21
|
+
* the appropriate embedder. Models work reliably without fallback mechanisms -
|
|
22
|
+
* if there's an issue, you'll get clear error messages with actionable guidance.
|
|
23
|
+
*
|
|
24
|
+
* Mode Selection:
|
|
25
|
+
* - Text Mode: Use sentence-transformer models for text-only content
|
|
26
|
+
* - Fast, optimized for text similarity
|
|
27
|
+
* - Best for: document search, semantic similarity, text clustering
|
|
28
|
+
*
|
|
29
|
+
* - Multimodal Mode: Use CLIP models for mixed text/image content
|
|
30
|
+
* - Unified embedding space for text and images
|
|
31
|
+
* - Enables cross-modal search (text queries find images, image queries find text)
|
|
32
|
+
* - Best for: image search, visual question answering, multimodal retrieval
|
|
33
|
+
*
|
|
34
|
+
* @param modelName - Name of the model to create
|
|
35
|
+
* @param options - Optional configuration options
|
|
36
|
+
* @returns Promise resolving to a UniversalEmbedder instance
|
|
37
|
+
* @throws {Error} If model is not supported or validation fails
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```typescript
|
|
41
|
+
* // Text mode - optimized for text-only content
|
|
42
|
+
* const textEmbedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2');
|
|
43
|
+
* const textResult = await textEmbedder.embedText('machine learning');
|
|
44
|
+
*
|
|
45
|
+
* // Multimodal mode - enables cross-modal search
|
|
46
|
+
* const clipEmbedder = await createEmbedder('Xenova/clip-vit-base-patch32');
|
|
47
|
+
* const textResult = await clipEmbedder.embedText('red sports car');
|
|
48
|
+
* const imageResult = await clipEmbedder.embedImage('./car.jpg');
|
|
49
|
+
*
|
|
50
|
+
* // Create with custom options
|
|
51
|
+
* const embedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2', {
|
|
52
|
+
* maxBatchSize: 16,
|
|
53
|
+
* cachePath: './models'
|
|
54
|
+
* });
|
|
55
|
+
* ```
|
|
56
|
+
*/
|
|
57
|
+
export declare function createEmbedder(modelName: string, options?: EmbedderCreationOptions): Promise<UniversalEmbedder>;
|
|
58
|
+
/**
|
|
59
|
+
* Get supported models for a specific content type
|
|
60
|
+
* Convenience function for filtering models by capability
|
|
61
|
+
*
|
|
62
|
+
* @param contentType - Content type to filter by ('text', 'image', etc.)
|
|
63
|
+
* @returns Array of model names that support the content type
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```typescript
|
|
67
|
+
* const textModels = getSupportedModelsForContentType('text');
|
|
68
|
+
* const imageModels = getSupportedModelsForContentType('image');
|
|
69
|
+
* ```
|
|
70
|
+
*/
|
|
71
|
+
export declare function getSupportedModelsForContentType(contentType: string): string[];
|
|
72
|
+
/**
|
|
73
|
+
* Get recommended model for a specific use case
|
|
74
|
+
*
|
|
75
|
+
* Provides intelligent model selection based on content types and constraints.
|
|
76
|
+
* Returns models that work reliably for the specified requirements.
|
|
77
|
+
*
|
|
78
|
+
* Mode Selection Guide:
|
|
79
|
+
* - Text only (['text']): Returns sentence-transformer models
|
|
80
|
+
* - Fast, optimized for text similarity
|
|
81
|
+
* - Best for document search and text clustering
|
|
82
|
+
*
|
|
83
|
+
* - Text + Images (['text', 'image']): Returns CLIP models
|
|
84
|
+
* - Unified embedding space for cross-modal search
|
|
85
|
+
* - Text queries can find images, image queries can find text
|
|
86
|
+
* - Best for visual search and multimodal retrieval
|
|
87
|
+
*
|
|
88
|
+
* @param contentTypes - Required content types
|
|
89
|
+
* @param constraints - Optional constraints (memory, performance, etc.)
|
|
90
|
+
* @param constraints.maxMemory - Maximum memory in MB
|
|
91
|
+
* @param constraints.preferPerformance - Prefer faster models
|
|
92
|
+
* @param constraints.preferAccuracy - Prefer more accurate models
|
|
93
|
+
* @returns Recommended model name or null if no suitable model found
|
|
94
|
+
*
|
|
95
|
+
* @example
|
|
96
|
+
* ```typescript
|
|
97
|
+
* // Get best text model (fast, optimized for text)
|
|
98
|
+
* const textModel = getRecommendedModel(['text']);
|
|
99
|
+
* // Returns: 'sentence-transformers/all-MiniLM-L6-v2'
|
|
100
|
+
*
|
|
101
|
+
* // Get best multimodal model (enables cross-modal search)
|
|
102
|
+
* const multimodalModel = getRecommendedModel(['text', 'image']);
|
|
103
|
+
* // Returns: 'Xenova/clip-vit-base-patch32'
|
|
104
|
+
*
|
|
105
|
+
* // Get performance-optimized model
|
|
106
|
+
* const fastModel = getRecommendedModel(['text'], { preferPerformance: true });
|
|
107
|
+
*
|
|
108
|
+
* // Get accuracy-optimized model
|
|
109
|
+
* const accurateModel = getRecommendedModel(['text'], { preferAccuracy: true });
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
export declare function getRecommendedModel(contentTypes: string[], constraints?: {
|
|
113
|
+
maxMemory?: number;
|
|
114
|
+
preferPerformance?: boolean;
|
|
115
|
+
preferAccuracy?: boolean;
|
|
116
|
+
}): string | null;
|
|
117
|
+
/**
|
|
118
|
+
* Validate model compatibility before creation
|
|
119
|
+
* Useful for checking compatibility without creating the embedder
|
|
120
|
+
*
|
|
121
|
+
* @param modelName - Name of the model to validate
|
|
122
|
+
* @returns Promise resolving to validation result
|
|
123
|
+
*
|
|
124
|
+
* @example
|
|
125
|
+
* ```typescript
|
|
126
|
+
* const isValid = await validateModelCompatibility('sentence-transformers/all-MiniLM-L6-v2');
|
|
127
|
+
* if (isValid) {
|
|
128
|
+
* const embedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2');
|
|
129
|
+
* }
|
|
130
|
+
* ```
|
|
131
|
+
*/
|
|
132
|
+
export declare function validateModelCompatibility(modelName: string): Promise<boolean>;
|
|
133
|
+
/**
|
|
134
|
+
* List all available models with their capabilities
|
|
135
|
+
* Useful for displaying model options to users
|
|
136
|
+
*
|
|
137
|
+
* @returns Array of model information objects
|
|
138
|
+
*
|
|
139
|
+
* @example
|
|
140
|
+
* ```typescript
|
|
141
|
+
* const models = listAvailableModels();
|
|
142
|
+
* models.forEach(model => {
|
|
143
|
+
* console.log(`${model.name}: ${model.supportedContentTypes.join(', ')}`);
|
|
144
|
+
* });
|
|
145
|
+
* ```
|
|
146
|
+
*/
|
|
147
|
+
export declare function listAvailableModels(): Array<{
|
|
148
|
+
name: string;
|
|
149
|
+
type: ModelType;
|
|
150
|
+
dimensions: number;
|
|
151
|
+
supportedContentTypes: readonly string[];
|
|
152
|
+
memoryRequirement: number | undefined;
|
|
153
|
+
}>;
|
|
154
|
+
/**
|
|
155
|
+
* @deprecated Use createEmbedder() instead
|
|
156
|
+
* Legacy factory-style interface for backward compatibility
|
|
157
|
+
*/
|
|
158
|
+
export declare const UniversalEmbedderFactory: {
|
|
159
|
+
/**
|
|
160
|
+
* @deprecated Use createEmbedder() instead
|
|
161
|
+
*/
|
|
162
|
+
readonly create: (modelName: string, options?: EmbedderCreationOptions) => Promise<UniversalEmbedder>;
|
|
163
|
+
/**
|
|
164
|
+
* @deprecated Use ModelRegistry.validateModel() instead
|
|
165
|
+
*/
|
|
166
|
+
readonly validateModel: (modelName: string) => import("./universal-embedder.js").ModelValidationResult;
|
|
167
|
+
/**
|
|
168
|
+
* @deprecated Use ModelRegistry.getModelInfo() instead
|
|
169
|
+
*/
|
|
170
|
+
readonly getModelInfo: (modelName: string) => import("./universal-embedder.js").ModelInfo | null;
|
|
171
|
+
/**
|
|
172
|
+
* @deprecated Use ModelRegistry.getSupportedModels() instead
|
|
173
|
+
*/
|
|
174
|
+
readonly getSupportedModels: (modelType?: ModelType) => string[];
|
|
175
|
+
};
|
|
176
|
+
//# sourceMappingURL=embedder-factory.d.ts.map
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Simple Embedder Creation Function
|
|
3
|
+
*
|
|
4
|
+
* Provides direct model instantiation with clear validation and error handling.
|
|
5
|
+
* No fallback mechanisms - models work reliably or fail clearly with actionable guidance.
|
|
6
|
+
*
|
|
7
|
+
* Supported Models:
|
|
8
|
+
* - Text Mode: sentence-transformers/all-MiniLM-L6-v2, Xenova/all-mpnet-base-v2
|
|
9
|
+
* - Multimodal Mode: Xenova/clip-vit-base-patch32, Xenova/clip-vit-base-patch16
|
|
10
|
+
*
|
|
11
|
+
* Mode Selection Guide:
|
|
12
|
+
* - Use text mode for text-only content (faster, optimized for text similarity)
|
|
13
|
+
* - Use multimodal mode for mixed text/image content (enables cross-modal search)
|
|
14
|
+
*/
|
|
15
|
+
// Ensure DOM polyfills are set up before any transformers.js usage
|
|
16
|
+
import '../dom-polyfills.js';
|
|
17
|
+
import { ModelRegistry } from './model-registry.js';
|
|
18
|
+
import { ModelValidator } from './model-validator.js';
|
|
19
|
+
import { createModelValidationError } from './model-validator.js';
|
|
20
|
+
import { createValidationErrorMessage } from './validation-messages.js';
|
|
21
|
+
// =============================================================================
|
|
22
|
+
// SIMPLE EMBEDDER CREATION FUNCTION
|
|
23
|
+
// =============================================================================
|
|
24
|
+
/**
|
|
25
|
+
* Create a universal embedder for the specified model
|
|
26
|
+
*
|
|
27
|
+
* Simple function-based approach that validates model compatibility and creates
|
|
28
|
+
* the appropriate embedder. Models work reliably without fallback mechanisms -
|
|
29
|
+
* if there's an issue, you'll get clear error messages with actionable guidance.
|
|
30
|
+
*
|
|
31
|
+
* Mode Selection:
|
|
32
|
+
* - Text Mode: Use sentence-transformer models for text-only content
|
|
33
|
+
* - Fast, optimized for text similarity
|
|
34
|
+
* - Best for: document search, semantic similarity, text clustering
|
|
35
|
+
*
|
|
36
|
+
* - Multimodal Mode: Use CLIP models for mixed text/image content
|
|
37
|
+
* - Unified embedding space for text and images
|
|
38
|
+
* - Enables cross-modal search (text queries find images, image queries find text)
|
|
39
|
+
* - Best for: image search, visual question answering, multimodal retrieval
|
|
40
|
+
*
|
|
41
|
+
* @param modelName - Name of the model to create
|
|
42
|
+
* @param options - Optional configuration options
|
|
43
|
+
* @returns Promise resolving to a UniversalEmbedder instance
|
|
44
|
+
* @throws {Error} If model is not supported or validation fails
|
|
45
|
+
*
|
|
46
|
+
* @example
|
|
47
|
+
* ```typescript
|
|
48
|
+
* // Text mode - optimized for text-only content
|
|
49
|
+
* const textEmbedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2');
|
|
50
|
+
* const textResult = await textEmbedder.embedText('machine learning');
|
|
51
|
+
*
|
|
52
|
+
* // Multimodal mode - enables cross-modal search
|
|
53
|
+
* const clipEmbedder = await createEmbedder('Xenova/clip-vit-base-patch32');
|
|
54
|
+
* const textResult = await clipEmbedder.embedText('red sports car');
|
|
55
|
+
* const imageResult = await clipEmbedder.embedImage('./car.jpg');
|
|
56
|
+
*
|
|
57
|
+
* // Create with custom options
|
|
58
|
+
* const embedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2', {
|
|
59
|
+
* maxBatchSize: 16,
|
|
60
|
+
* cachePath: './models'
|
|
61
|
+
* });
|
|
62
|
+
* ```
|
|
63
|
+
*/
|
|
64
|
+
export async function createEmbedder(modelName, options = {}) {
|
|
65
|
+
// Step 0: Ensure polyfills are set up before any transformers.js usage
|
|
66
|
+
if (typeof globalThis.self === 'undefined') {
|
|
67
|
+
globalThis.self = globalThis;
|
|
68
|
+
}
|
|
69
|
+
if (typeof global.self === 'undefined') {
|
|
70
|
+
global.self = global;
|
|
71
|
+
}
|
|
72
|
+
// Step 1: Initialize model validator if not already done
|
|
73
|
+
if (!ModelValidator.getTransformersVersion()) {
|
|
74
|
+
await ModelValidator.detectTransformersVersion();
|
|
75
|
+
}
|
|
76
|
+
// Step 1: Validate the model
|
|
77
|
+
const modelInfo = ModelRegistry.getModelInfo(modelName);
|
|
78
|
+
if (!modelInfo) {
|
|
79
|
+
const errorMessage = createValidationErrorMessage(modelName, 'not_found', {
|
|
80
|
+
suggestions: ModelRegistry.getSupportedModels()
|
|
81
|
+
});
|
|
82
|
+
console.error(errorMessage);
|
|
83
|
+
throw createModelValidationError(modelName, 'Model not found in supported models registry');
|
|
84
|
+
}
|
|
85
|
+
// Step 2: Perform detailed validation
|
|
86
|
+
try {
|
|
87
|
+
const detailedValidation = await ModelValidator.validateModelDetailed(modelName);
|
|
88
|
+
if (!detailedValidation.isValid) {
|
|
89
|
+
const firstError = detailedValidation.errors[0] || 'Validation failed';
|
|
90
|
+
const errorMessage = createValidationErrorMessage(modelName, 'version_incompatible', {
|
|
91
|
+
required: modelInfo.requirements.transformersJsVersion,
|
|
92
|
+
current: ModelValidator.getTransformersVersion() || 'unknown'
|
|
93
|
+
});
|
|
94
|
+
console.error(errorMessage);
|
|
95
|
+
throw createModelValidationError(modelName, firstError);
|
|
96
|
+
}
|
|
97
|
+
// Log warnings if any
|
|
98
|
+
if (detailedValidation.warnings.length > 0) {
|
|
99
|
+
console.warn(`⚠️ Warnings for model '${modelName}':`);
|
|
100
|
+
detailedValidation.warnings.forEach(warning => console.warn(` • ${warning}`));
|
|
101
|
+
}
|
|
102
|
+
// Log suggestions if any
|
|
103
|
+
if (detailedValidation.suggestions.length > 0) {
|
|
104
|
+
console.info(`💡 Suggestions for model '${modelName}':`);
|
|
105
|
+
detailedValidation.suggestions.forEach(suggestion => console.info(` • ${suggestion}`));
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
// Re-throw validation errors
|
|
110
|
+
if (error instanceof Error && error.name === 'ModelValidationError') {
|
|
111
|
+
throw error;
|
|
112
|
+
}
|
|
113
|
+
// Handle unexpected validation errors
|
|
114
|
+
console.warn(`Warning: Could not perform detailed validation for '${modelName}': ${error}`);
|
|
115
|
+
console.info('Proceeding with basic validation only...');
|
|
116
|
+
}
|
|
117
|
+
// Step 3: Create the appropriate embedder based on model type
|
|
118
|
+
const modelType = modelInfo.type;
|
|
119
|
+
switch (modelType) {
|
|
120
|
+
case 'sentence-transformer':
|
|
121
|
+
return await createSentenceTransformerEmbedder(modelName, options);
|
|
122
|
+
case 'clip':
|
|
123
|
+
return await createCLIPEmbedder(modelName, options);
|
|
124
|
+
default:
|
|
125
|
+
const errorMessage = createValidationErrorMessage(modelName, 'not_found', {
|
|
126
|
+
suggestions: [`Unsupported model type: ${modelType}`]
|
|
127
|
+
});
|
|
128
|
+
console.error(errorMessage);
|
|
129
|
+
throw createModelValidationError(modelName, `Unsupported model type: ${modelType}. Supported types: sentence-transformer, clip`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// =============================================================================
|
|
133
|
+
// MODEL-SPECIFIC CREATION FUNCTIONS
|
|
134
|
+
// =============================================================================
|
|
135
|
+
/**
|
|
136
|
+
* Create a sentence transformer embedder using lazy loading
|
|
137
|
+
* @private
|
|
138
|
+
*/
|
|
139
|
+
async function createSentenceTransformerEmbedder(modelName, options) {
|
|
140
|
+
// Use lazy loading to avoid loading text dependencies unless needed
|
|
141
|
+
const { LazyEmbedderLoader } = await import('./lazy-dependency-loader.js');
|
|
142
|
+
return LazyEmbedderLoader.loadSentenceTransformerEmbedder(modelName, options);
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Create a CLIP embedder using lazy loading
|
|
146
|
+
* @private
|
|
147
|
+
*/
|
|
148
|
+
async function createCLIPEmbedder(modelName, options) {
|
|
149
|
+
// Use lazy loading to avoid loading multimodal dependencies unless needed
|
|
150
|
+
const { LazyEmbedderLoader } = await import('./lazy-dependency-loader.js');
|
|
151
|
+
return LazyEmbedderLoader.loadCLIPEmbedder(modelName, options);
|
|
152
|
+
}
|
|
153
|
+
// =============================================================================
|
|
154
|
+
// UTILITY FUNCTIONS
|
|
155
|
+
// =============================================================================
|
|
156
|
+
/**
|
|
157
|
+
* Get supported models for a specific content type
|
|
158
|
+
* Convenience function for filtering models by capability
|
|
159
|
+
*
|
|
160
|
+
* @param contentType - Content type to filter by ('text', 'image', etc.)
|
|
161
|
+
* @returns Array of model names that support the content type
|
|
162
|
+
*
|
|
163
|
+
* @example
|
|
164
|
+
* ```typescript
|
|
165
|
+
* const textModels = getSupportedModelsForContentType('text');
|
|
166
|
+
* const imageModels = getSupportedModelsForContentType('image');
|
|
167
|
+
* ```
|
|
168
|
+
*/
|
|
169
|
+
export function getSupportedModelsForContentType(contentType) {
|
|
170
|
+
return ModelRegistry.getModelsByContentType(contentType);
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Get recommended model for a specific use case
|
|
174
|
+
*
|
|
175
|
+
* Provides intelligent model selection based on content types and constraints.
|
|
176
|
+
* Returns models that work reliably for the specified requirements.
|
|
177
|
+
*
|
|
178
|
+
* Mode Selection Guide:
|
|
179
|
+
* - Text only (['text']): Returns sentence-transformer models
|
|
180
|
+
* - Fast, optimized for text similarity
|
|
181
|
+
* - Best for document search and text clustering
|
|
182
|
+
*
|
|
183
|
+
* - Text + Images (['text', 'image']): Returns CLIP models
|
|
184
|
+
* - Unified embedding space for cross-modal search
|
|
185
|
+
* - Text queries can find images, image queries can find text
|
|
186
|
+
* - Best for visual search and multimodal retrieval
|
|
187
|
+
*
|
|
188
|
+
* @param contentTypes - Required content types
|
|
189
|
+
* @param constraints - Optional constraints (memory, performance, etc.)
|
|
190
|
+
* @param constraints.maxMemory - Maximum memory in MB
|
|
191
|
+
* @param constraints.preferPerformance - Prefer faster models
|
|
192
|
+
* @param constraints.preferAccuracy - Prefer more accurate models
|
|
193
|
+
* @returns Recommended model name or null if no suitable model found
|
|
194
|
+
*
|
|
195
|
+
* @example
|
|
196
|
+
* ```typescript
|
|
197
|
+
* // Get best text model (fast, optimized for text)
|
|
198
|
+
* const textModel = getRecommendedModel(['text']);
|
|
199
|
+
* // Returns: 'sentence-transformers/all-MiniLM-L6-v2'
|
|
200
|
+
*
|
|
201
|
+
* // Get best multimodal model (enables cross-modal search)
|
|
202
|
+
* const multimodalModel = getRecommendedModel(['text', 'image']);
|
|
203
|
+
* // Returns: 'Xenova/clip-vit-base-patch32'
|
|
204
|
+
*
|
|
205
|
+
* // Get performance-optimized model
|
|
206
|
+
* const fastModel = getRecommendedModel(['text'], { preferPerformance: true });
|
|
207
|
+
*
|
|
208
|
+
* // Get accuracy-optimized model
|
|
209
|
+
* const accurateModel = getRecommendedModel(['text'], { preferAccuracy: true });
|
|
210
|
+
* ```
|
|
211
|
+
*/
|
|
212
|
+
export function getRecommendedModel(contentTypes, constraints = {}) {
|
|
213
|
+
const transformersVersion = ModelValidator.getTransformersVersion();
|
|
214
|
+
const compatibleModels = ModelValidator.getRecommendedModels(contentTypes, constraints.maxMemory, transformersVersion || undefined);
|
|
215
|
+
if (compatibleModels.length === 0) {
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
// Apply preference-based sorting
|
|
219
|
+
if (constraints.preferPerformance) {
|
|
220
|
+
// Prefer smaller, faster models
|
|
221
|
+
const performanceOrder = [
|
|
222
|
+
'sentence-transformers/all-MiniLM-L6-v2',
|
|
223
|
+
'Xenova/clip-vit-base-patch32',
|
|
224
|
+
'Xenova/all-mpnet-base-v2',
|
|
225
|
+
'Xenova/clip-vit-base-patch16'
|
|
226
|
+
];
|
|
227
|
+
for (const preferred of performanceOrder) {
|
|
228
|
+
if (compatibleModels.includes(preferred)) {
|
|
229
|
+
return preferred;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (constraints.preferAccuracy) {
|
|
234
|
+
// Prefer larger, more accurate models
|
|
235
|
+
const accuracyOrder = [
|
|
236
|
+
'Xenova/all-mpnet-base-v2',
|
|
237
|
+
'Xenova/clip-vit-base-patch16',
|
|
238
|
+
'sentence-transformers/all-MiniLM-L6-v2',
|
|
239
|
+
'Xenova/clip-vit-base-patch32'
|
|
240
|
+
];
|
|
241
|
+
for (const preferred of accuracyOrder) {
|
|
242
|
+
if (compatibleModels.includes(preferred)) {
|
|
243
|
+
return preferred;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// Default: return first compatible model
|
|
248
|
+
return compatibleModels[0];
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Validate model compatibility before creation
|
|
252
|
+
* Useful for checking compatibility without creating the embedder
|
|
253
|
+
*
|
|
254
|
+
* @param modelName - Name of the model to validate
|
|
255
|
+
* @returns Promise resolving to validation result
|
|
256
|
+
*
|
|
257
|
+
* @example
|
|
258
|
+
* ```typescript
|
|
259
|
+
* const isValid = await validateModelCompatibility('sentence-transformers/all-MiniLM-L6-v2');
|
|
260
|
+
* if (isValid) {
|
|
261
|
+
* const embedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2');
|
|
262
|
+
* }
|
|
263
|
+
* ```
|
|
264
|
+
*/
|
|
265
|
+
export async function validateModelCompatibility(modelName) {
|
|
266
|
+
try {
|
|
267
|
+
const validation = await ModelValidator.validateModelDetailed(modelName);
|
|
268
|
+
return validation.isValid;
|
|
269
|
+
}
|
|
270
|
+
catch (error) {
|
|
271
|
+
console.warn(`Validation failed for '${modelName}': ${error}`);
|
|
272
|
+
return false;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* List all available models with their capabilities
|
|
277
|
+
* Useful for displaying model options to users
|
|
278
|
+
*
|
|
279
|
+
* @returns Array of model information objects
|
|
280
|
+
*
|
|
281
|
+
* @example
|
|
282
|
+
* ```typescript
|
|
283
|
+
* const models = listAvailableModels();
|
|
284
|
+
* models.forEach(model => {
|
|
285
|
+
* console.log(`${model.name}: ${model.supportedContentTypes.join(', ')}`);
|
|
286
|
+
* });
|
|
287
|
+
* ```
|
|
288
|
+
*/
|
|
289
|
+
export function listAvailableModels() {
|
|
290
|
+
return ModelRegistry.getSupportedModels().map(modelName => {
|
|
291
|
+
const info = ModelRegistry.getModelInfo(modelName);
|
|
292
|
+
return {
|
|
293
|
+
name: info.name,
|
|
294
|
+
type: info.type,
|
|
295
|
+
dimensions: info.dimensions,
|
|
296
|
+
supportedContentTypes: info.supportedContentTypes,
|
|
297
|
+
memoryRequirement: info.requirements.minimumMemory
|
|
298
|
+
};
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
// =============================================================================
|
|
302
|
+
// BACKWARD COMPATIBILITY
|
|
303
|
+
// =============================================================================
|
|
304
|
+
/**
|
|
305
|
+
* @deprecated Use createEmbedder() instead
|
|
306
|
+
* Legacy factory-style interface for backward compatibility
|
|
307
|
+
*/
|
|
308
|
+
export const UniversalEmbedderFactory = {
|
|
309
|
+
/**
|
|
310
|
+
* @deprecated Use createEmbedder() instead
|
|
311
|
+
*/
|
|
312
|
+
async create(modelName, options) {
|
|
313
|
+
console.warn('UniversalEmbedderFactory.create() is deprecated. Use createEmbedder() instead.');
|
|
314
|
+
return createEmbedder(modelName, options);
|
|
315
|
+
},
|
|
316
|
+
/**
|
|
317
|
+
* @deprecated Use ModelRegistry.validateModel() instead
|
|
318
|
+
*/
|
|
319
|
+
validateModel(modelName) {
|
|
320
|
+
console.warn('UniversalEmbedderFactory.validateModel() is deprecated. Use ModelRegistry.validateModel() instead.');
|
|
321
|
+
return ModelRegistry.validateModel(modelName);
|
|
322
|
+
},
|
|
323
|
+
/**
|
|
324
|
+
* @deprecated Use ModelRegistry.getModelInfo() instead
|
|
325
|
+
*/
|
|
326
|
+
getModelInfo(modelName) {
|
|
327
|
+
console.warn('UniversalEmbedderFactory.getModelInfo() is deprecated. Use ModelRegistry.getModelInfo() instead.');
|
|
328
|
+
return ModelRegistry.getModelInfo(modelName);
|
|
329
|
+
},
|
|
330
|
+
/**
|
|
331
|
+
* @deprecated Use ModelRegistry.getSupportedModels() instead
|
|
332
|
+
*/
|
|
333
|
+
getSupportedModels(modelType) {
|
|
334
|
+
console.warn('UniversalEmbedderFactory.getSupportedModels() is deprecated. Use ModelRegistry.getSupportedModels() instead.');
|
|
335
|
+
return ModelRegistry.getSupportedModels(modelType);
|
|
336
|
+
}
|
|
337
|
+
};
|
|
338
|
+
//# sourceMappingURL=embedder-factory.js.map
|
package/dist/core/index.d.ts
CHANGED
|
@@ -47,11 +47,13 @@ export { type ContentDocument, type ContentChunk, type Document, type Chunk, typ
|
|
|
47
47
|
export { type EmbedFunction, type RerankFunction, type EmbeddingQueryInterface, type RerankingInterface, type SearchEngineConfig, type ContentTypeStrategy, type ModelAgnosticInterface, type ExtendedEmbeddingInterface, type ExtendedRerankingInterface, type SearchPipelineInterface, type SearchDependencyFactory, InterfaceValidator } from './interfaces.js';
|
|
48
48
|
export * from './adapters.js';
|
|
49
49
|
export * from './config.js';
|
|
50
|
-
export { type DatabaseConnection, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './db.js';
|
|
50
|
+
export { type DatabaseConnection, type ContentMetadata, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
|
|
51
51
|
export { type VectorIndexOptions, VectorIndex } from './vector-index.js';
|
|
52
52
|
export { type ChunkConfig, type GenericDocument, type GenericChunk, type ChunkingStrategy, ChunkingStrategyRegistry, DEFAULT_CHUNK_CONFIG, chunkingRegistry, chunkGenericDocument, registerTextChunkingStrategy } from './chunker.js';
|
|
53
53
|
export * from './search.js';
|
|
54
54
|
export * from './ingestion.js';
|
|
55
55
|
export * from './path-manager.js';
|
|
56
|
+
export { ContentManager, type MemoryContentMetadata, type ContentIngestionResult, type ContentManagerConfig } from './content-manager.js';
|
|
57
|
+
export { ContentResolver, type ContentRequest, type ContentResult } from './content-resolver.js';
|
|
56
58
|
export * from './error-handler.js';
|
|
57
59
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/core/index.js
CHANGED
|
@@ -50,7 +50,7 @@ export * from './adapters.js';
|
|
|
50
50
|
// Core configuration management - model-agnostic settings
|
|
51
51
|
export * from './config.js';
|
|
52
52
|
// Database operations - supports different content types through metadata
|
|
53
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './db.js';
|
|
53
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
|
|
54
54
|
// Vector index operations - works with any embedding dimensions
|
|
55
55
|
export { VectorIndex } from './vector-index.js';
|
|
56
56
|
// Generic chunking interfaces and strategies - supports text, image metadata, etc.
|
|
@@ -61,6 +61,9 @@ export * from './search.js';
|
|
|
61
61
|
export * from './ingestion.js';
|
|
62
62
|
// Path management utilities - content-type agnostic
|
|
63
63
|
export * from './path-manager.js';
|
|
64
|
+
// Unified content system - handles both filesystem and memory content
|
|
65
|
+
export { ContentManager } from './content-manager.js';
|
|
66
|
+
export { ContentResolver } from './content-resolver.js';
|
|
64
67
|
// Error handling framework - supports implementation-specific error contexts
|
|
65
68
|
export * from './error-handler.js';
|
|
66
69
|
//# sourceMappingURL=index.js.map
|