rag-lite-ts 1.0.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/README.md +605 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/binary-index-format.d.ts +52 -0
  12. package/dist/core/binary-index-format.js +122 -0
  13. package/dist/core/chunker.d.ts +2 -0
  14. package/dist/core/cli-database-utils.d.ts +53 -0
  15. package/dist/core/cli-database-utils.js +239 -0
  16. package/dist/core/config.js +10 -3
  17. package/dist/core/content-errors.d.ts +111 -0
  18. package/dist/core/content-errors.js +362 -0
  19. package/dist/core/content-manager.d.ts +343 -0
  20. package/dist/core/content-manager.js +1504 -0
  21. package/dist/core/content-performance-optimizer.d.ts +150 -0
  22. package/dist/core/content-performance-optimizer.js +516 -0
  23. package/dist/core/content-resolver.d.ts +104 -0
  24. package/dist/core/content-resolver.js +285 -0
  25. package/dist/core/cross-modal-search.d.ts +164 -0
  26. package/dist/core/cross-modal-search.js +342 -0
  27. package/dist/core/database-connection-manager.d.ts +109 -0
  28. package/dist/core/database-connection-manager.js +304 -0
  29. package/dist/core/db.d.ts +141 -2
  30. package/dist/core/db.js +631 -89
  31. package/dist/core/embedder-factory.d.ts +176 -0
  32. package/dist/core/embedder-factory.js +338 -0
  33. package/dist/core/index.d.ts +3 -1
  34. package/dist/core/index.js +4 -1
  35. package/dist/core/ingestion.d.ts +85 -15
  36. package/dist/core/ingestion.js +510 -45
  37. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  38. package/dist/core/lazy-dependency-loader.js +453 -0
  39. package/dist/core/mode-detection-service.d.ts +150 -0
  40. package/dist/core/mode-detection-service.js +565 -0
  41. package/dist/core/mode-model-validator.d.ts +92 -0
  42. package/dist/core/mode-model-validator.js +203 -0
  43. package/dist/core/model-registry.d.ts +120 -0
  44. package/dist/core/model-registry.js +415 -0
  45. package/dist/core/model-validator.d.ts +217 -0
  46. package/dist/core/model-validator.js +782 -0
  47. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  48. package/dist/core/polymorphic-search-factory.js +344 -0
  49. package/dist/core/raglite-paths.d.ts +121 -0
  50. package/dist/core/raglite-paths.js +145 -0
  51. package/dist/core/reranking-config.d.ts +42 -0
  52. package/dist/core/reranking-config.js +156 -0
  53. package/dist/core/reranking-factory.d.ts +92 -0
  54. package/dist/core/reranking-factory.js +591 -0
  55. package/dist/core/reranking-strategies.d.ts +325 -0
  56. package/dist/core/reranking-strategies.js +720 -0
  57. package/dist/core/resource-cleanup.d.ts +163 -0
  58. package/dist/core/resource-cleanup.js +371 -0
  59. package/dist/core/resource-manager.d.ts +212 -0
  60. package/dist/core/resource-manager.js +564 -0
  61. package/dist/core/search.d.ts +28 -1
  62. package/dist/core/search.js +83 -5
  63. package/dist/core/streaming-operations.d.ts +145 -0
  64. package/dist/core/streaming-operations.js +409 -0
  65. package/dist/core/types.d.ts +3 -0
  66. package/dist/core/universal-embedder.d.ts +177 -0
  67. package/dist/core/universal-embedder.js +139 -0
  68. package/dist/core/validation-messages.d.ts +99 -0
  69. package/dist/core/validation-messages.js +334 -0
  70. package/dist/core/vector-index.d.ts +1 -1
  71. package/dist/core/vector-index.js +37 -39
  72. package/dist/factories/index.d.ts +3 -1
  73. package/dist/factories/index.js +2 -0
  74. package/dist/factories/polymorphic-factory.d.ts +50 -0
  75. package/dist/factories/polymorphic-factory.js +159 -0
  76. package/dist/factories/text-factory.d.ts +128 -34
  77. package/dist/factories/text-factory.js +346 -97
  78. package/dist/file-processor.d.ts +88 -2
  79. package/dist/file-processor.js +720 -17
  80. package/dist/index.d.ts +32 -0
  81. package/dist/index.js +29 -0
  82. package/dist/ingestion.d.ts +16 -0
  83. package/dist/ingestion.js +21 -0
  84. package/dist/mcp-server.d.ts +35 -3
  85. package/dist/mcp-server.js +1107 -31
  86. package/dist/multimodal/clip-embedder.d.ts +327 -0
  87. package/dist/multimodal/clip-embedder.js +992 -0
  88. package/dist/multimodal/index.d.ts +6 -0
  89. package/dist/multimodal/index.js +6 -0
  90. package/dist/run-error-recovery-tests.d.ts +7 -0
  91. package/dist/run-error-recovery-tests.js +101 -0
  92. package/dist/search.d.ts +60 -9
  93. package/dist/search.js +82 -11
  94. package/dist/test-utils.d.ts +8 -26
  95. package/dist/text/chunker.d.ts +1 -0
  96. package/dist/text/embedder.js +15 -8
  97. package/dist/text/index.d.ts +1 -0
  98. package/dist/text/index.js +1 -0
  99. package/dist/text/reranker.d.ts +1 -2
  100. package/dist/text/reranker.js +17 -47
  101. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  102. package/dist/text/sentence-transformer-embedder.js +340 -0
  103. package/dist/types.d.ts +39 -0
  104. package/dist/utils/vector-math.d.ts +31 -0
  105. package/dist/utils/vector-math.js +70 -0
  106. package/package.json +27 -6
  107. package/dist/api-errors.d.ts.map +0 -1
  108. package/dist/api-errors.js.map +0 -1
  109. package/dist/cli/indexer.d.ts.map +0 -1
  110. package/dist/cli/indexer.js.map +0 -1
  111. package/dist/cli/search.d.ts.map +0 -1
  112. package/dist/cli/search.js.map +0 -1
  113. package/dist/cli.d.ts.map +0 -1
  114. package/dist/cli.js.map +0 -1
  115. package/dist/config.d.ts.map +0 -1
  116. package/dist/config.js.map +0 -1
  117. package/dist/core/adapters.d.ts.map +0 -1
  118. package/dist/core/adapters.js.map +0 -1
  119. package/dist/core/chunker.d.ts.map +0 -1
  120. package/dist/core/chunker.js.map +0 -1
  121. package/dist/core/config.d.ts.map +0 -1
  122. package/dist/core/config.js.map +0 -1
  123. package/dist/core/db.d.ts.map +0 -1
  124. package/dist/core/db.js.map +0 -1
  125. package/dist/core/error-handler.d.ts.map +0 -1
  126. package/dist/core/error-handler.js.map +0 -1
  127. package/dist/core/index.d.ts.map +0 -1
  128. package/dist/core/index.js.map +0 -1
  129. package/dist/core/ingestion.d.ts.map +0 -1
  130. package/dist/core/ingestion.js.map +0 -1
  131. package/dist/core/interfaces.d.ts.map +0 -1
  132. package/dist/core/interfaces.js.map +0 -1
  133. package/dist/core/path-manager.d.ts.map +0 -1
  134. package/dist/core/path-manager.js.map +0 -1
  135. package/dist/core/search-example.d.ts +0 -25
  136. package/dist/core/search-example.d.ts.map +0 -1
  137. package/dist/core/search-example.js +0 -138
  138. package/dist/core/search-example.js.map +0 -1
  139. package/dist/core/search-pipeline-example.d.ts +0 -21
  140. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  141. package/dist/core/search-pipeline-example.js +0 -188
  142. package/dist/core/search-pipeline-example.js.map +0 -1
  143. package/dist/core/search-pipeline.d.ts.map +0 -1
  144. package/dist/core/search-pipeline.js.map +0 -1
  145. package/dist/core/search.d.ts.map +0 -1
  146. package/dist/core/search.js.map +0 -1
  147. package/dist/core/types.d.ts.map +0 -1
  148. package/dist/core/types.js.map +0 -1
  149. package/dist/core/vector-index.d.ts.map +0 -1
  150. package/dist/core/vector-index.js.map +0 -1
  151. package/dist/dom-polyfills.d.ts.map +0 -1
  152. package/dist/dom-polyfills.js.map +0 -1
  153. package/dist/examples/clean-api-examples.d.ts +0 -44
  154. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  155. package/dist/examples/clean-api-examples.js +0 -206
  156. package/dist/examples/clean-api-examples.js.map +0 -1
  157. package/dist/factories/index.d.ts.map +0 -1
  158. package/dist/factories/index.js.map +0 -1
  159. package/dist/factories/text-factory.d.ts.map +0 -1
  160. package/dist/factories/text-factory.js.map +0 -1
  161. package/dist/file-processor.d.ts.map +0 -1
  162. package/dist/file-processor.js.map +0 -1
  163. package/dist/index-manager.d.ts.map +0 -1
  164. package/dist/index-manager.js.map +0 -1
  165. package/dist/index.d.ts.map +0 -1
  166. package/dist/index.js.map +0 -1
  167. package/dist/indexer.d.ts.map +0 -1
  168. package/dist/indexer.js.map +0 -1
  169. package/dist/ingestion.d.ts.map +0 -1
  170. package/dist/ingestion.js.map +0 -1
  171. package/dist/mcp-server.d.ts.map +0 -1
  172. package/dist/mcp-server.js.map +0 -1
  173. package/dist/preprocess.d.ts.map +0 -1
  174. package/dist/preprocess.js.map +0 -1
  175. package/dist/preprocessors/index.d.ts.map +0 -1
  176. package/dist/preprocessors/index.js.map +0 -1
  177. package/dist/preprocessors/mdx.d.ts.map +0 -1
  178. package/dist/preprocessors/mdx.js.map +0 -1
  179. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  180. package/dist/preprocessors/mermaid.js.map +0 -1
  181. package/dist/preprocessors/registry.d.ts.map +0 -1
  182. package/dist/preprocessors/registry.js.map +0 -1
  183. package/dist/search-standalone.d.ts.map +0 -1
  184. package/dist/search-standalone.js.map +0 -1
  185. package/dist/search.d.ts.map +0 -1
  186. package/dist/search.js.map +0 -1
  187. package/dist/test-utils.d.ts.map +0 -1
  188. package/dist/test-utils.js.map +0 -1
  189. package/dist/text/chunker.d.ts.map +0 -1
  190. package/dist/text/chunker.js.map +0 -1
  191. package/dist/text/embedder.d.ts.map +0 -1
  192. package/dist/text/embedder.js.map +0 -1
  193. package/dist/text/index.d.ts.map +0 -1
  194. package/dist/text/index.js.map +0 -1
  195. package/dist/text/preprocessors/index.d.ts.map +0 -1
  196. package/dist/text/preprocessors/index.js.map +0 -1
  197. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  198. package/dist/text/preprocessors/mdx.js.map +0 -1
  199. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  200. package/dist/text/preprocessors/mermaid.js.map +0 -1
  201. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  202. package/dist/text/preprocessors/registry.js.map +0 -1
  203. package/dist/text/reranker.d.ts.map +0 -1
  204. package/dist/text/reranker.js.map +0 -1
  205. package/dist/text/tokenizer.d.ts.map +0 -1
  206. package/dist/text/tokenizer.js.map +0 -1
  207. package/dist/types.d.ts.map +0 -1
  208. package/dist/types.js.map +0 -1
@@ -0,0 +1,176 @@
1
+ /**
2
+ * CORE MODULE — Simple Embedder Creation Function
3
+ *
4
+ * Provides direct model instantiation with clear validation and error handling.
5
+ * No fallback mechanisms - models work reliably or fail clearly with actionable guidance.
6
+ *
7
+ * Supported Models:
8
+ * - Text Mode: sentence-transformers/all-MiniLM-L6-v2, Xenova/all-mpnet-base-v2
9
+ * - Multimodal Mode: Xenova/clip-vit-base-patch32, Xenova/clip-vit-base-patch16
10
+ *
11
+ * Mode Selection Guide:
12
+ * - Use text mode for text-only content (faster, optimized for text similarity)
13
+ * - Use multimodal mode for mixed text/image content (enables cross-modal search)
14
+ */
15
+ import '../dom-polyfills.js';
16
+ import type { UniversalEmbedder, ModelType, EmbedderCreationOptions } from './universal-embedder.js';
17
+ /**
18
+ * Create a universal embedder for the specified model
19
+ *
20
+ * Simple function-based approach that validates model compatibility and creates
21
+ * the appropriate embedder. Models work reliably without fallback mechanisms -
22
+ * if there's an issue, you'll get clear error messages with actionable guidance.
23
+ *
24
+ * Mode Selection:
25
+ * - Text Mode: Use sentence-transformer models for text-only content
26
+ * - Fast, optimized for text similarity
27
+ * - Best for: document search, semantic similarity, text clustering
28
+ *
29
+ * - Multimodal Mode: Use CLIP models for mixed text/image content
30
+ * - Unified embedding space for text and images
31
+ * - Enables cross-modal search (text queries find images, image queries find text)
32
+ * - Best for: image search, visual question answering, multimodal retrieval
33
+ *
34
+ * @param modelName - Name of the model to create
35
+ * @param options - Optional configuration options
36
+ * @returns Promise resolving to a UniversalEmbedder instance
37
+ * @throws {Error} If model is not supported or validation fails
38
+ *
39
+ * @example
40
+ * ```typescript
41
+ * // Text mode - optimized for text-only content
42
+ * const textEmbedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2');
43
+ * const textResult = await textEmbedder.embedText('machine learning');
44
+ *
45
+ * // Multimodal mode - enables cross-modal search
46
+ * const clipEmbedder = await createEmbedder('Xenova/clip-vit-base-patch32');
47
+ * const textResult = await clipEmbedder.embedText('red sports car');
48
+ * const imageResult = await clipEmbedder.embedImage('./car.jpg');
49
+ *
50
+ * // Create with custom options
51
+ * const embedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2', {
52
+ * maxBatchSize: 16,
53
+ * cachePath: './models'
54
+ * });
55
+ * ```
56
+ */
57
+ export declare function createEmbedder(modelName: string, options?: EmbedderCreationOptions): Promise<UniversalEmbedder>;
58
+ /**
59
+ * Get supported models for a specific content type
60
+ * Convenience function for filtering models by capability
61
+ *
62
+ * @param contentType - Content type to filter by ('text', 'image', etc.)
63
+ * @returns Array of model names that support the content type
64
+ *
65
+ * @example
66
+ * ```typescript
67
+ * const textModels = getSupportedModelsForContentType('text');
68
+ * const imageModels = getSupportedModelsForContentType('image');
69
+ * ```
70
+ */
71
+ export declare function getSupportedModelsForContentType(contentType: string): string[];
72
+ /**
73
+ * Get recommended model for a specific use case
74
+ *
75
+ * Provides intelligent model selection based on content types and constraints.
76
+ * Returns models that work reliably for the specified requirements.
77
+ *
78
+ * Mode Selection Guide:
79
+ * - Text only (['text']): Returns sentence-transformer models
80
+ * - Fast, optimized for text similarity
81
+ * - Best for document search and text clustering
82
+ *
83
+ * - Text + Images (['text', 'image']): Returns CLIP models
84
+ * - Unified embedding space for cross-modal search
85
+ * - Text queries can find images, image queries can find text
86
+ * - Best for visual search and multimodal retrieval
87
+ *
88
+ * @param contentTypes - Required content types
89
+ * @param constraints - Optional constraints (memory, performance, etc.)
90
+ * @param constraints.maxMemory - Maximum memory in MB
91
+ * @param constraints.preferPerformance - Prefer faster models
92
+ * @param constraints.preferAccuracy - Prefer more accurate models
93
+ * @returns Recommended model name or null if no suitable model found
94
+ *
95
+ * @example
96
+ * ```typescript
97
+ * // Get best text model (fast, optimized for text)
98
+ * const textModel = getRecommendedModel(['text']);
99
+ * // Returns: 'sentence-transformers/all-MiniLM-L6-v2'
100
+ *
101
+ * // Get best multimodal model (enables cross-modal search)
102
+ * const multimodalModel = getRecommendedModel(['text', 'image']);
103
+ * // Returns: 'Xenova/clip-vit-base-patch32'
104
+ *
105
+ * // Get performance-optimized model
106
+ * const fastModel = getRecommendedModel(['text'], { preferPerformance: true });
107
+ *
108
+ * // Get accuracy-optimized model
109
+ * const accurateModel = getRecommendedModel(['text'], { preferAccuracy: true });
110
+ * ```
111
+ */
112
+ export declare function getRecommendedModel(contentTypes: string[], constraints?: {
113
+ maxMemory?: number;
114
+ preferPerformance?: boolean;
115
+ preferAccuracy?: boolean;
116
+ }): string | null;
117
+ /**
118
+ * Validate model compatibility before creation
119
+ * Useful for checking compatibility without creating the embedder
120
+ *
121
+ * @param modelName - Name of the model to validate
122
+ * @returns Promise resolving to validation result
123
+ *
124
+ * @example
125
+ * ```typescript
126
+ * const isValid = await validateModelCompatibility('sentence-transformers/all-MiniLM-L6-v2');
127
+ * if (isValid) {
128
+ * const embedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2');
129
+ * }
130
+ * ```
131
+ */
132
+ export declare function validateModelCompatibility(modelName: string): Promise<boolean>;
133
+ /**
134
+ * List all available models with their capabilities
135
+ * Useful for displaying model options to users
136
+ *
137
+ * @returns Array of model information objects
138
+ *
139
+ * @example
140
+ * ```typescript
141
+ * const models = listAvailableModels();
142
+ * models.forEach(model => {
143
+ * console.log(`${model.name}: ${model.supportedContentTypes.join(', ')}`);
144
+ * });
145
+ * ```
146
+ */
147
+ export declare function listAvailableModels(): Array<{
148
+ name: string;
149
+ type: ModelType;
150
+ dimensions: number;
151
+ supportedContentTypes: readonly string[];
152
+ memoryRequirement: number | undefined;
153
+ }>;
154
+ /**
155
+ * @deprecated Use createEmbedder() instead
156
+ * Legacy factory-style interface for backward compatibility
157
+ */
158
+ export declare const UniversalEmbedderFactory: {
159
+ /**
160
+ * @deprecated Use createEmbedder() instead
161
+ */
162
+ readonly create: (modelName: string, options?: EmbedderCreationOptions) => Promise<UniversalEmbedder>;
163
+ /**
164
+ * @deprecated Use ModelRegistry.validateModel() instead
165
+ */
166
+ readonly validateModel: (modelName: string) => import("./universal-embedder.js").ModelValidationResult;
167
+ /**
168
+ * @deprecated Use ModelRegistry.getModelInfo() instead
169
+ */
170
+ readonly getModelInfo: (modelName: string) => import("./universal-embedder.js").ModelInfo | null;
171
+ /**
172
+ * @deprecated Use ModelRegistry.getSupportedModels() instead
173
+ */
174
+ readonly getSupportedModels: (modelType?: ModelType) => string[];
175
+ };
176
+ //# sourceMappingURL=embedder-factory.d.ts.map
@@ -0,0 +1,338 @@
1
+ /**
2
+ * CORE MODULE — Simple Embedder Creation Function
3
+ *
4
+ * Provides direct model instantiation with clear validation and error handling.
5
+ * No fallback mechanisms - models work reliably or fail clearly with actionable guidance.
6
+ *
7
+ * Supported Models:
8
+ * - Text Mode: sentence-transformers/all-MiniLM-L6-v2, Xenova/all-mpnet-base-v2
9
+ * - Multimodal Mode: Xenova/clip-vit-base-patch32, Xenova/clip-vit-base-patch16
10
+ *
11
+ * Mode Selection Guide:
12
+ * - Use text mode for text-only content (faster, optimized for text similarity)
13
+ * - Use multimodal mode for mixed text/image content (enables cross-modal search)
14
+ */
15
+ // Ensure DOM polyfills are set up before any transformers.js usage
16
+ import '../dom-polyfills.js';
17
+ import { ModelRegistry } from './model-registry.js';
18
+ import { ModelValidator } from './model-validator.js';
19
+ import { createModelValidationError } from './model-validator.js';
20
+ import { createValidationErrorMessage } from './validation-messages.js';
21
+ // =============================================================================
22
+ // SIMPLE EMBEDDER CREATION FUNCTION
23
+ // =============================================================================
24
+ /**
25
+ * Create a universal embedder for the specified model
26
+ *
27
+ * Simple function-based approach that validates model compatibility and creates
28
+ * the appropriate embedder. Models work reliably without fallback mechanisms -
29
+ * if there's an issue, you'll get clear error messages with actionable guidance.
30
+ *
31
+ * Mode Selection:
32
+ * - Text Mode: Use sentence-transformer models for text-only content
33
+ * - Fast, optimized for text similarity
34
+ * - Best for: document search, semantic similarity, text clustering
35
+ *
36
+ * - Multimodal Mode: Use CLIP models for mixed text/image content
37
+ * - Unified embedding space for text and images
38
+ * - Enables cross-modal search (text queries find images, image queries find text)
39
+ * - Best for: image search, visual question answering, multimodal retrieval
40
+ *
41
+ * @param modelName - Name of the model to create
42
+ * @param options - Optional configuration options
43
+ * @returns Promise resolving to a UniversalEmbedder instance
44
+ * @throws {Error} If model is not supported or validation fails
45
+ *
46
+ * @example
47
+ * ```typescript
48
+ * // Text mode - optimized for text-only content
49
+ * const textEmbedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2');
50
+ * const textResult = await textEmbedder.embedText('machine learning');
51
+ *
52
+ * // Multimodal mode - enables cross-modal search
53
+ * const clipEmbedder = await createEmbedder('Xenova/clip-vit-base-patch32');
54
+ * const textResult = await clipEmbedder.embedText('red sports car');
55
+ * const imageResult = await clipEmbedder.embedImage('./car.jpg');
56
+ *
57
+ * // Create with custom options
58
+ * const embedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2', {
59
+ * maxBatchSize: 16,
60
+ * cachePath: './models'
61
+ * });
62
+ * ```
63
+ */
64
+ export async function createEmbedder(modelName, options = {}) {
65
+ // Step 0: Ensure polyfills are set up before any transformers.js usage
66
+ if (typeof globalThis.self === 'undefined') {
67
+ globalThis.self = globalThis;
68
+ }
69
+ if (typeof global.self === 'undefined') {
70
+ global.self = global;
71
+ }
72
+ // Step 1: Initialize model validator if not already done
73
+ if (!ModelValidator.getTransformersVersion()) {
74
+ await ModelValidator.detectTransformersVersion();
75
+ }
76
+ // Step 1: Validate the model
77
+ const modelInfo = ModelRegistry.getModelInfo(modelName);
78
+ if (!modelInfo) {
79
+ const errorMessage = createValidationErrorMessage(modelName, 'not_found', {
80
+ suggestions: ModelRegistry.getSupportedModels()
81
+ });
82
+ console.error(errorMessage);
83
+ throw createModelValidationError(modelName, 'Model not found in supported models registry');
84
+ }
85
+ // Step 2: Perform detailed validation
86
+ try {
87
+ const detailedValidation = await ModelValidator.validateModelDetailed(modelName);
88
+ if (!detailedValidation.isValid) {
89
+ const firstError = detailedValidation.errors[0] || 'Validation failed';
90
+ const errorMessage = createValidationErrorMessage(modelName, 'version_incompatible', {
91
+ required: modelInfo.requirements.transformersJsVersion,
92
+ current: ModelValidator.getTransformersVersion() || 'unknown'
93
+ });
94
+ console.error(errorMessage);
95
+ throw createModelValidationError(modelName, firstError);
96
+ }
97
+ // Log warnings if any
98
+ if (detailedValidation.warnings.length > 0) {
99
+ console.warn(`⚠️ Warnings for model '${modelName}':`);
100
+ detailedValidation.warnings.forEach(warning => console.warn(` • ${warning}`));
101
+ }
102
+ // Log suggestions if any
103
+ if (detailedValidation.suggestions.length > 0) {
104
+ console.info(`💡 Suggestions for model '${modelName}':`);
105
+ detailedValidation.suggestions.forEach(suggestion => console.info(` • ${suggestion}`));
106
+ }
107
+ }
108
+ catch (error) {
109
+ // Re-throw validation errors
110
+ if (error instanceof Error && error.name === 'ModelValidationError') {
111
+ throw error;
112
+ }
113
+ // Handle unexpected validation errors
114
+ console.warn(`Warning: Could not perform detailed validation for '${modelName}': ${error}`);
115
+ console.info('Proceeding with basic validation only...');
116
+ }
117
+ // Step 3: Create the appropriate embedder based on model type
118
+ const modelType = modelInfo.type;
119
+ switch (modelType) {
120
+ case 'sentence-transformer':
121
+ return await createSentenceTransformerEmbedder(modelName, options);
122
+ case 'clip':
123
+ return await createCLIPEmbedder(modelName, options);
124
+ default:
125
+ const errorMessage = createValidationErrorMessage(modelName, 'not_found', {
126
+ suggestions: [`Unsupported model type: ${modelType}`]
127
+ });
128
+ console.error(errorMessage);
129
+ throw createModelValidationError(modelName, `Unsupported model type: ${modelType}. Supported types: sentence-transformer, clip`);
130
+ }
131
+ }
132
+ // =============================================================================
133
+ // MODEL-SPECIFIC CREATION FUNCTIONS
134
+ // =============================================================================
135
+ /**
136
+ * Create a sentence transformer embedder using lazy loading
137
+ * @private
138
+ */
139
+ async function createSentenceTransformerEmbedder(modelName, options) {
140
+ // Use lazy loading to avoid loading text dependencies unless needed
141
+ const { LazyEmbedderLoader } = await import('./lazy-dependency-loader.js');
142
+ return LazyEmbedderLoader.loadSentenceTransformerEmbedder(modelName, options);
143
+ }
144
+ /**
145
+ * Create a CLIP embedder using lazy loading
146
+ * @private
147
+ */
148
+ async function createCLIPEmbedder(modelName, options) {
149
+ // Use lazy loading to avoid loading multimodal dependencies unless needed
150
+ const { LazyEmbedderLoader } = await import('./lazy-dependency-loader.js');
151
+ return LazyEmbedderLoader.loadCLIPEmbedder(modelName, options);
152
+ }
153
+ // =============================================================================
154
+ // UTILITY FUNCTIONS
155
+ // =============================================================================
156
+ /**
157
+ * Get supported models for a specific content type
158
+ * Convenience function for filtering models by capability
159
+ *
160
+ * @param contentType - Content type to filter by ('text', 'image', etc.)
161
+ * @returns Array of model names that support the content type
162
+ *
163
+ * @example
164
+ * ```typescript
165
+ * const textModels = getSupportedModelsForContentType('text');
166
+ * const imageModels = getSupportedModelsForContentType('image');
167
+ * ```
168
+ */
169
+ export function getSupportedModelsForContentType(contentType) {
170
+ return ModelRegistry.getModelsByContentType(contentType);
171
+ }
172
+ /**
173
+ * Get recommended model for a specific use case
174
+ *
175
+ * Provides intelligent model selection based on content types and constraints.
176
+ * Returns models that work reliably for the specified requirements.
177
+ *
178
+ * Mode Selection Guide:
179
+ * - Text only (['text']): Returns sentence-transformer models
180
+ * - Fast, optimized for text similarity
181
+ * - Best for document search and text clustering
182
+ *
183
+ * - Text + Images (['text', 'image']): Returns CLIP models
184
+ * - Unified embedding space for cross-modal search
185
+ * - Text queries can find images, image queries can find text
186
+ * - Best for visual search and multimodal retrieval
187
+ *
188
+ * @param contentTypes - Required content types
189
+ * @param constraints - Optional constraints (memory, performance, etc.)
190
+ * @param constraints.maxMemory - Maximum memory in MB
191
+ * @param constraints.preferPerformance - Prefer faster models
192
+ * @param constraints.preferAccuracy - Prefer more accurate models
193
+ * @returns Recommended model name or null if no suitable model found
194
+ *
195
+ * @example
196
+ * ```typescript
197
+ * // Get best text model (fast, optimized for text)
198
+ * const textModel = getRecommendedModel(['text']);
199
+ * // Returns: 'sentence-transformers/all-MiniLM-L6-v2'
200
+ *
201
+ * // Get best multimodal model (enables cross-modal search)
202
+ * const multimodalModel = getRecommendedModel(['text', 'image']);
203
+ * // Returns: 'Xenova/clip-vit-base-patch32'
204
+ *
205
+ * // Get performance-optimized model
206
+ * const fastModel = getRecommendedModel(['text'], { preferPerformance: true });
207
+ *
208
+ * // Get accuracy-optimized model
209
+ * const accurateModel = getRecommendedModel(['text'], { preferAccuracy: true });
210
+ * ```
211
+ */
212
+ export function getRecommendedModel(contentTypes, constraints = {}) {
213
+ const transformersVersion = ModelValidator.getTransformersVersion();
214
+ const compatibleModels = ModelValidator.getRecommendedModels(contentTypes, constraints.maxMemory, transformersVersion || undefined);
215
+ if (compatibleModels.length === 0) {
216
+ return null;
217
+ }
218
+ // Apply preference-based sorting
219
+ if (constraints.preferPerformance) {
220
+ // Prefer smaller, faster models
221
+ const performanceOrder = [
222
+ 'sentence-transformers/all-MiniLM-L6-v2',
223
+ 'Xenova/clip-vit-base-patch32',
224
+ 'Xenova/all-mpnet-base-v2',
225
+ 'Xenova/clip-vit-base-patch16'
226
+ ];
227
+ for (const preferred of performanceOrder) {
228
+ if (compatibleModels.includes(preferred)) {
229
+ return preferred;
230
+ }
231
+ }
232
+ }
233
+ if (constraints.preferAccuracy) {
234
+ // Prefer larger, more accurate models
235
+ const accuracyOrder = [
236
+ 'Xenova/all-mpnet-base-v2',
237
+ 'Xenova/clip-vit-base-patch16',
238
+ 'sentence-transformers/all-MiniLM-L6-v2',
239
+ 'Xenova/clip-vit-base-patch32'
240
+ ];
241
+ for (const preferred of accuracyOrder) {
242
+ if (compatibleModels.includes(preferred)) {
243
+ return preferred;
244
+ }
245
+ }
246
+ }
247
+ // Default: return first compatible model
248
+ return compatibleModels[0];
249
+ }
250
+ /**
251
+ * Validate model compatibility before creation
252
+ * Useful for checking compatibility without creating the embedder
253
+ *
254
+ * @param modelName - Name of the model to validate
255
+ * @returns Promise resolving to validation result
256
+ *
257
+ * @example
258
+ * ```typescript
259
+ * const isValid = await validateModelCompatibility('sentence-transformers/all-MiniLM-L6-v2');
260
+ * if (isValid) {
261
+ * const embedder = await createEmbedder('sentence-transformers/all-MiniLM-L6-v2');
262
+ * }
263
+ * ```
264
+ */
265
+ export async function validateModelCompatibility(modelName) {
266
+ try {
267
+ const validation = await ModelValidator.validateModelDetailed(modelName);
268
+ return validation.isValid;
269
+ }
270
+ catch (error) {
271
+ console.warn(`Validation failed for '${modelName}': ${error}`);
272
+ return false;
273
+ }
274
+ }
275
+ /**
276
+ * List all available models with their capabilities
277
+ * Useful for displaying model options to users
278
+ *
279
+ * @returns Array of model information objects
280
+ *
281
+ * @example
282
+ * ```typescript
283
+ * const models = listAvailableModels();
284
+ * models.forEach(model => {
285
+ * console.log(`${model.name}: ${model.supportedContentTypes.join(', ')}`);
286
+ * });
287
+ * ```
288
+ */
289
+ export function listAvailableModels() {
290
+ return ModelRegistry.getSupportedModels().map(modelName => {
291
+ const info = ModelRegistry.getModelInfo(modelName);
292
+ return {
293
+ name: info.name,
294
+ type: info.type,
295
+ dimensions: info.dimensions,
296
+ supportedContentTypes: info.supportedContentTypes,
297
+ memoryRequirement: info.requirements.minimumMemory
298
+ };
299
+ });
300
+ }
301
+ // =============================================================================
302
+ // BACKWARD COMPATIBILITY
303
+ // =============================================================================
304
+ /**
305
+ * @deprecated Use createEmbedder() instead
306
+ * Legacy factory-style interface for backward compatibility
307
+ */
308
+ export const UniversalEmbedderFactory = {
309
+ /**
310
+ * @deprecated Use createEmbedder() instead
311
+ */
312
+ async create(modelName, options) {
313
+ console.warn('UniversalEmbedderFactory.create() is deprecated. Use createEmbedder() instead.');
314
+ return createEmbedder(modelName, options);
315
+ },
316
+ /**
317
+ * @deprecated Use ModelRegistry.validateModel() instead
318
+ */
319
+ validateModel(modelName) {
320
+ console.warn('UniversalEmbedderFactory.validateModel() is deprecated. Use ModelRegistry.validateModel() instead.');
321
+ return ModelRegistry.validateModel(modelName);
322
+ },
323
+ /**
324
+ * @deprecated Use ModelRegistry.getModelInfo() instead
325
+ */
326
+ getModelInfo(modelName) {
327
+ console.warn('UniversalEmbedderFactory.getModelInfo() is deprecated. Use ModelRegistry.getModelInfo() instead.');
328
+ return ModelRegistry.getModelInfo(modelName);
329
+ },
330
+ /**
331
+ * @deprecated Use ModelRegistry.getSupportedModels() instead
332
+ */
333
+ getSupportedModels(modelType) {
334
+ console.warn('UniversalEmbedderFactory.getSupportedModels() is deprecated. Use ModelRegistry.getSupportedModels() instead.');
335
+ return ModelRegistry.getSupportedModels(modelType);
336
+ }
337
+ };
338
+ //# sourceMappingURL=embedder-factory.js.map
@@ -47,11 +47,13 @@ export { type ContentDocument, type ContentChunk, type Document, type Chunk, typ
47
47
  export { type EmbedFunction, type RerankFunction, type EmbeddingQueryInterface, type RerankingInterface, type SearchEngineConfig, type ContentTypeStrategy, type ModelAgnosticInterface, type ExtendedEmbeddingInterface, type ExtendedRerankingInterface, type SearchPipelineInterface, type SearchDependencyFactory, InterfaceValidator } from './interfaces.js';
48
48
  export * from './adapters.js';
49
49
  export * from './config.js';
50
- export { type DatabaseConnection, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './db.js';
50
+ export { type DatabaseConnection, type ContentMetadata, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
51
51
  export { type VectorIndexOptions, VectorIndex } from './vector-index.js';
52
52
  export { type ChunkConfig, type GenericDocument, type GenericChunk, type ChunkingStrategy, ChunkingStrategyRegistry, DEFAULT_CHUNK_CONFIG, chunkingRegistry, chunkGenericDocument, registerTextChunkingStrategy } from './chunker.js';
53
53
  export * from './search.js';
54
54
  export * from './ingestion.js';
55
55
  export * from './path-manager.js';
56
+ export { ContentManager, type MemoryContentMetadata, type ContentIngestionResult, type ContentManagerConfig } from './content-manager.js';
57
+ export { ContentResolver, type ContentRequest, type ContentResult } from './content-resolver.js';
56
58
  export * from './error-handler.js';
57
59
  //# sourceMappingURL=index.d.ts.map
@@ -50,7 +50,7 @@ export * from './adapters.js';
50
50
  // Core configuration management - model-agnostic settings
51
51
  export * from './config.js';
52
52
  // Database operations - supports different content types through metadata
53
- export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './db.js';
53
+ export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
54
54
  // Vector index operations - works with any embedding dimensions
55
55
  export { VectorIndex } from './vector-index.js';
56
56
  // Generic chunking interfaces and strategies - supports text, image metadata, etc.
@@ -61,6 +61,9 @@ export * from './search.js';
61
61
  export * from './ingestion.js';
62
62
  // Path management utilities - content-type agnostic
63
63
  export * from './path-manager.js';
64
+ // Unified content system - handles both filesystem and memory content
65
+ export { ContentManager } from './content-manager.js';
66
+ export { ContentResolver } from './content-resolver.js';
64
67
  // Error handling framework - supports implementation-specific error contexts
65
68
  export * from './error-handler.js';
66
69
  //# sourceMappingURL=index.js.map