rag-lite-ts 2.0.3 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/cli/indexer.js +4 -4
  2. package/dist/cli/search.js +3 -3
  3. package/dist/cli.js +31 -4
  4. package/dist/config.js +3 -0
  5. package/dist/core/actionable-error-messages.js +3 -3
  6. package/dist/core/content-manager.d.ts +0 -8
  7. package/dist/core/content-manager.js +2 -30
  8. package/dist/core/database-connection-manager.js +15 -9
  9. package/dist/core/db.d.ts +0 -32
  10. package/dist/core/db.js +11 -68
  11. package/dist/core/embedder-factory.d.ts +0 -22
  12. package/dist/core/embedder-factory.js +8 -35
  13. package/dist/core/index.d.ts +3 -3
  14. package/dist/core/index.js +3 -3
  15. package/dist/core/ingestion.d.ts +1 -16
  16. package/dist/core/ingestion.js +4 -30
  17. package/dist/core/interfaces.d.ts +1 -1
  18. package/dist/core/interfaces.js +1 -1
  19. package/dist/core/model-registry.d.ts +0 -4
  20. package/dist/core/model-registry.js +5 -9
  21. package/dist/core/search.d.ts +2 -2
  22. package/dist/core/search.js +2 -2
  23. package/dist/factories/index.d.ts +11 -29
  24. package/dist/factories/index.js +12 -29
  25. package/dist/factories/ingestion-factory.d.ts +200 -0
  26. package/dist/factories/ingestion-factory.js +475 -0
  27. package/dist/{core/polymorphic-search-factory.d.ts → factories/search-factory.d.ts} +7 -7
  28. package/dist/{core/polymorphic-search-factory.js → factories/search-factory.js} +22 -22
  29. package/dist/index-manager.js +25 -14
  30. package/dist/index.d.ts +5 -30
  31. package/dist/index.js +9 -24
  32. package/dist/ingestion.d.ts +2 -4
  33. package/dist/ingestion.js +2 -2
  34. package/dist/mcp-server.js +34 -30
  35. package/dist/search.js +2 -2
  36. package/dist/text/embedder.d.ts +0 -11
  37. package/dist/text/embedder.js +11 -22
  38. package/dist/text/index.d.ts +2 -2
  39. package/dist/text/index.js +2 -2
  40. package/dist/text/reranker.d.ts +0 -10
  41. package/dist/text/reranker.js +10 -33
  42. package/package.json +105 -101
  43. package/dist/factories/polymorphic-factory.d.ts +0 -50
  44. package/dist/factories/polymorphic-factory.js +0 -159
  45. package/dist/factories/text-factory.d.ts +0 -560
  46. package/dist/factories/text-factory.js +0 -982
@@ -35,7 +35,7 @@
35
35
  * });
36
36
  *
37
37
  * // Factory with custom configuration
38
- * const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
38
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
39
39
  * chunkSize: 300,
40
40
  * chunkOverlap: 50
41
41
  * });
@@ -35,7 +35,7 @@
35
35
  * });
36
36
  *
37
37
  * // Factory with custom configuration
38
- * const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
38
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
39
39
  * chunkSize: 300,
40
40
  * chunkOverlap: 50
41
41
  * });
@@ -113,8 +113,4 @@ export declare function getRecommendedBatchSize(modelName: string): number;
113
113
  * Default model names for different types
114
114
  */
115
115
  export declare const DEFAULT_MODELS: Record<ModelType, string>;
116
- /**
117
- * Model type mappings for backward compatibility
118
- */
119
- export declare const MODEL_TYPE_ALIASES: Record<string, ModelType>;
120
116
  //# sourceMappingURL=model-registry.d.ts.map
@@ -403,13 +403,9 @@ export const DEFAULT_MODELS = {
403
403
  'sentence-transformer': 'sentence-transformers/all-MiniLM-L6-v2',
404
404
  'clip': 'Xenova/clip-vit-base-patch32'
405
405
  };
406
- /**
407
- * Model type mappings for backward compatibility
408
- */
409
- export const MODEL_TYPE_ALIASES = {
410
- 'text': 'sentence-transformer',
411
- 'multimodal': 'clip',
412
- 'sentence': 'sentence-transformer',
413
- 'embedding': 'sentence-transformer'
414
- };
406
+ // =============================================================================
407
+ // REMOVED IN v3.0.0: MODEL_TYPE_ALIASES
408
+ // =============================================================================
409
+ // Model type aliases have been removed as they were not used anywhere in the codebase.
410
+ // Use ModelType directly: 'sentence-transformer' or 'clip'
415
411
  //# sourceMappingURL=model-registry.js.map
@@ -54,8 +54,8 @@ export declare class SearchEngine {
54
54
  * USAGE EXAMPLES:
55
55
  * ```typescript
56
56
  * // Text-only search engine
57
- * const textEmbedFn = await createTextEmbedder();
58
- * const textRerankFn = await createTextReranker();
57
+ * const textEmbedFn = createTextEmbedFunction();
58
+ * const textRerankFn = createTextRerankFunction();
59
59
  * const indexManager = new IndexManager('./index.bin');
60
60
  * const db = await openDatabase('./db.sqlite');
61
61
  * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
@@ -53,8 +53,8 @@ export class SearchEngine {
53
53
  * USAGE EXAMPLES:
54
54
  * ```typescript
55
55
  * // Text-only search engine
56
- * const textEmbedFn = await createTextEmbedder();
57
- * const textRerankFn = await createTextReranker();
56
+ * const textEmbedFn = createTextEmbedFunction();
57
+ * const textRerankFn = createTextRerankFunction();
58
58
  * const indexManager = new IndexManager('./index.bin');
59
59
  * const db = await openDatabase('./db.sqlite');
60
60
  * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
@@ -1,45 +1,27 @@
1
1
  /**
2
- * Factory exports for creating text-specific RAG instances
2
+ * Factory exports for creating RAG instances
3
3
  * Provides convenient factory functions for common use cases
4
4
  *
5
5
  * This module serves as the main entry point for factory functions that
6
- * simplify the creation of text-based search and ingestion systems.
6
+ * simplify the creation of search and ingestion systems.
7
7
  * The factories handle complex initialization while providing clean APIs.
8
8
  *
9
9
  * MAIN FACTORY CLASSES:
10
- * - TextSearchFactory: Creates SearchEngine instances for text search
11
- * - TextIngestionFactory: Creates IngestionPipeline instances for text ingestion
12
- * - TextRAGFactory: Creates both search and ingestion instances together
13
- * - TextFactoryHelpers: Utility functions for validation and error recovery
14
- *
15
- * CONVENIENCE ALIASES:
16
- * - SearchFactory: Alias for TextSearchFactory
17
- * - IngestionFactory: Alias for TextIngestionFactory
18
- * - RAGFactory: Alias for TextRAGFactory
10
+ * - IngestionFactory: Creates IngestionPipeline instances for document ingestion
11
+ * - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
19
12
  *
20
13
  * @example
21
14
  * ```typescript
22
- * import { TextSearchFactory, TextIngestionFactory } from './factories';
23
- *
24
- * // Create search engine
25
- * const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
15
+ * import { IngestionFactory, SearchFactory } from './factories';
26
16
  *
27
17
  * // Create ingestion pipeline
28
- * const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin');
18
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
29
19
  *
30
- * // Or create both together
31
- * import { TextRAGFactory } from './factories';
32
- * const { searchEngine, ingestionPipeline } = await TextRAGFactory.createBoth(
33
- * './index.bin',
34
- * './db.sqlite'
35
- * );
20
+ * // Create search engine with automatic mode detection
21
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite');
36
22
  * ```
37
23
  */
38
- export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './text-factory.js';
39
- export { PolymorphicSearchFactory } from './polymorphic-factory.js';
40
- export type { PolymorphicSearchOptions } from './polymorphic-factory.js';
41
- export type { TextSearchOptions, TextIngestionOptions, ContentSystemConfig } from './text-factory.js';
42
- export { TextSearchFactory as SearchFactory } from './text-factory.js';
43
- export { TextIngestionFactory as IngestionFactory } from './text-factory.js';
44
- export { TextRAGFactory as RAGFactory } from './text-factory.js';
24
+ export { IngestionFactory } from './ingestion-factory.js';
25
+ export { SearchFactory } from './search-factory.js';
26
+ export type { IngestionFactoryOptions, ContentSystemConfig } from './ingestion-factory.js';
45
27
  //# sourceMappingURL=index.d.ts.map
@@ -1,46 +1,29 @@
1
1
  /**
2
- * Factory exports for creating text-specific RAG instances
2
+ * Factory exports for creating RAG instances
3
3
  * Provides convenient factory functions for common use cases
4
4
  *
5
5
  * This module serves as the main entry point for factory functions that
6
- * simplify the creation of text-based search and ingestion systems.
6
+ * simplify the creation of search and ingestion systems.
7
7
  * The factories handle complex initialization while providing clean APIs.
8
8
  *
9
9
  * MAIN FACTORY CLASSES:
10
- * - TextSearchFactory: Creates SearchEngine instances for text search
11
- * - TextIngestionFactory: Creates IngestionPipeline instances for text ingestion
12
- * - TextRAGFactory: Creates both search and ingestion instances together
13
- * - TextFactoryHelpers: Utility functions for validation and error recovery
14
- *
15
- * CONVENIENCE ALIASES:
16
- * - SearchFactory: Alias for TextSearchFactory
17
- * - IngestionFactory: Alias for TextIngestionFactory
18
- * - RAGFactory: Alias for TextRAGFactory
10
+ * - IngestionFactory: Creates IngestionPipeline instances for document ingestion
11
+ * - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
19
12
  *
20
13
  * @example
21
14
  * ```typescript
22
- * import { TextSearchFactory, TextIngestionFactory } from './factories';
23
- *
24
- * // Create search engine
25
- * const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
15
+ * import { IngestionFactory, SearchFactory } from './factories';
26
16
  *
27
17
  * // Create ingestion pipeline
28
- * const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin');
18
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
29
19
  *
30
- * // Or create both together
31
- * import { TextRAGFactory } from './factories';
32
- * const { searchEngine, ingestionPipeline } = await TextRAGFactory.createBoth(
33
- * './index.bin',
34
- * './db.sqlite'
35
- * );
20
+ * // Create search engine with automatic mode detection
21
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite');
36
22
  * ```
37
23
  */
38
24
  // Main factory classes
39
- export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './text-factory.js';
40
- // Polymorphic factory for mode-aware search
41
- export { PolymorphicSearchFactory } from './polymorphic-factory.js';
42
- // Convenience re-exports for common patterns
43
- export { TextSearchFactory as SearchFactory } from './text-factory.js';
44
- export { TextIngestionFactory as IngestionFactory } from './text-factory.js';
45
- export { TextRAGFactory as RAGFactory } from './text-factory.js';
25
+ export { IngestionFactory } from './ingestion-factory.js';
26
+ // Polymorphic search factory (recommended for automatic mode detection)
27
+ // Re-exported from core for convenience
28
+ export { SearchFactory } from './search-factory.js';
46
29
  //# sourceMappingURL=index.js.map
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Factory functions for creating text-specific search and ingestion instances
3
+ * Handles complex initialization logic while providing clean API for common use cases
4
+ *
5
+ * FACTORY PATTERN BENEFITS:
6
+ * - Abstracts complex initialization (model loading, database setup, index initialization)
7
+ * - Provides simple API for common use cases while preserving access to dependency injection
8
+ * - Clear validation and error handling without fallback mechanisms
9
+ * - Supports different embedding models and configurations
10
+ * - Enables clean separation between simple usage and advanced customization
11
+ *
12
+ * MODE SELECTION GUIDE:
13
+ * - Text Mode (default): Optimized for text-only content
14
+ * - Uses sentence-transformer models (fast, accurate for text)
15
+ * - Images converted to text descriptions
16
+ * - Best for: document search, text clustering, semantic similarity
17
+ *
18
+ * - Multimodal Mode: Optimized for mixed text/image content
19
+ * - Uses CLIP models (unified embedding space)
20
+ * - True cross-modal search (text finds images, images find text)
21
+ * - Best for: image search, visual QA, multimodal retrieval
22
+ *
23
+ * USAGE PATTERNS:
24
+ *
25
+ * 1. Mode Selection:
26
+ * ```typescript
27
+ * // Text mode (default) - optimized for text-only content
28
+ * const textIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
29
+ * mode: 'text',
30
+ * embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
31
+ * });
32
+ *
33
+ * // Multimodal mode - enables cross-modal search
34
+ * const multimodalIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
35
+ * mode: 'multimodal',
36
+ * embeddingModel: 'Xenova/clip-vit-base-patch32',
37
+ * rerankingStrategy: 'text-derived'
38
+ * });
39
+ * ```
40
+ */
41
+ import { IngestionPipeline } from '../core/ingestion.js';
42
+ /**
43
+ * Content system configuration options
44
+ */
45
+ export interface ContentSystemConfig {
46
+ /** Content directory path (default: '.raglite/content') */
47
+ contentDir?: string;
48
+ /** Maximum file size in bytes (default: 50MB) */
49
+ maxFileSize?: number;
50
+ /** Maximum content directory size in bytes (default: 2GB) */
51
+ maxContentDirSize?: number;
52
+ /** Enable content deduplication (default: true) */
53
+ enableDeduplication?: boolean;
54
+ /** Enable storage tracking (default: true) */
55
+ enableStorageTracking?: boolean;
56
+ }
57
+ /**
58
+ * Options for text ingestion factory
59
+ */
60
+ export interface IngestionFactoryOptions {
61
+ /** Embedding model name override */
62
+ embeddingModel?: string;
63
+ /** Embedding batch size override */
64
+ batchSize?: number;
65
+ /** Chunk size override */
66
+ chunkSize?: number;
67
+ /** Chunk overlap override */
68
+ chunkOverlap?: number;
69
+ /** Whether to force rebuild the index */
70
+ forceRebuild?: boolean;
71
+ /** Mode for the ingestion pipeline (text or multimodal) */
72
+ mode?: 'text' | 'multimodal';
73
+ /** Reranking strategy for multimodal mode */
74
+ rerankingStrategy?: 'cross-encoder' | 'text-derived' | 'metadata' | 'hybrid' | 'disabled';
75
+ /** Content system configuration */
76
+ contentSystemConfig?: ContentSystemConfig;
77
+ }
78
+ /**
79
+ * Factory for creating text-based IngestionPipeline instances
80
+ * Handles model loading, database initialization, and index setup
81
+ *
82
+ * This factory abstracts the complex initialization process required for text ingestion:
83
+ * 1. Creates necessary directories if they don't exist
84
+ * 2. Validates mode-model compatibility (no fallback mechanisms)
85
+ * 3. Loads and validates embedding models with clear error reporting
86
+ * 4. Establishes database connections and initializes schema
87
+ * 5. Stores mode configuration in database for automatic detection
88
+ * 6. Creates or loads vector indexes with proper configuration
89
+ * 7. Creates IngestionPipeline with proper dependency injection
90
+ *
91
+ * Mode Configuration:
92
+ * - Text Mode (default): Uses sentence-transformer models for text-only content
93
+ * - Multimodal Mode: Uses CLIP models for mixed text/image content
94
+ * - Mode is stored in database and auto-detected during search
95
+ * - Clear validation prevents mode-model mismatches
96
+ *
97
+ * @example
98
+ * ```typescript
99
+ * // Basic usage
100
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
101
+ * await ingestion.ingestDirectory('./documents');
102
+ *
103
+ * // With custom configuration
104
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
105
+ * embeddingModel: 'all-MiniLM-L6-v2',
106
+ * chunkSize: 512,
107
+ * chunkOverlap: 50,
108
+ * forceRebuild: true
109
+ * });
110
+ *
111
+ * // With defaults
112
+ * const ingestion = await IngestionFactory.createWithDefaults({
113
+ * batchSize: 32 // Faster processing
114
+ * });
115
+ * ```
116
+ */
117
+ export declare class IngestionFactory {
118
+ /**
119
+ * Create an IngestionPipeline configured for text ingestion
120
+ *
121
+ * This method handles the complete initialization process:
122
+ * - Creates necessary directories if they don't exist
123
+ * - Loads text embedding model (with lazy initialization)
124
+ * - Opens database connection and initializes schema
125
+ * - Creates or loads vector index (with force rebuild option)
126
+ * - Creates IngestionPipeline with dependency injection
127
+ * - Validates the complete setup
128
+ *
129
+ * @param dbPath - Path to the SQLite database file (will be created if doesn't exist)
130
+ * @param indexPath - Path to the vector index file (will be created if doesn't exist)
131
+ * @param options - Optional configuration overrides
132
+ * @param options.embeddingModel - Override embedding model (default: from config)
133
+ * @param options.batchSize - Override embedding batch size (default: from config)
134
+ * @param options.chunkSize - Override chunk size (default: from config)
135
+ * @param options.chunkOverlap - Override chunk overlap (default: from config)
136
+ * @param options.forceRebuild - Force rebuild of existing index (default: false)
137
+ * @param options.contentSystemConfig - Content system configuration options
138
+ * @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
139
+ * @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
140
+ * @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
141
+ * @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
142
+ * @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
143
+ * @returns Promise resolving to configured IngestionPipeline
144
+ * @throws {Error} If initialization fails
145
+ *
146
+ * @example
147
+ * ```typescript
148
+ * // Create ingestion pipeline with default content system
149
+ * const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin');
150
+ *
151
+ * // Create with custom content system configuration
152
+ * const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin', {
153
+ * contentSystemConfig: {
154
+ * contentDir: './custom-content',
155
+ * maxFileSize: 100 * 1024 * 1024, // 100MB
156
+ * maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
157
+ * enableDeduplication: true
158
+ * }
159
+ * });
160
+ *
161
+ * // Ingest documents from directory
162
+ * const result = await ingestion.ingestDirectory('./documents');
163
+ * console.log(`Processed ${result.documentsProcessed} documents`);
164
+ *
165
+ * // Ingest content from memory (MCP integration)
166
+ * const contentId = await ingestion.ingestFromMemory(buffer, {
167
+ * displayName: 'uploaded-file.pdf',
168
+ * contentType: 'application/pdf'
169
+ * });
170
+ *
171
+ * // Clean up when done
172
+ * await ingestion.cleanup();
173
+ * ```
174
+ */
175
+ static create(dbPath: string, indexPath: string, options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
176
+ /**
177
+ * Create an IngestionPipeline with automatic path resolution
178
+ * Uses default paths based on current working directory
179
+ * @param options - Optional configuration overrides
180
+ * @returns Promise resolving to configured IngestionPipeline
181
+ */
182
+ static createWithDefaults(options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
183
+ /**
184
+ * Handles mode storage during ingestion
185
+ * Creates or validates system info based on the provided mode and options
186
+ * @private
187
+ */
188
+ private static handleModeStorage;
189
+ /**
190
+ * Updates system info in the database
191
+ * @private
192
+ */
193
+ private static updateSystemInfo;
194
+ /**
195
+ * Validates and prepares content system configuration
196
+ * @private
197
+ */
198
+ private static validateAndPrepareContentSystemConfig;
199
+ }
200
+ //# sourceMappingURL=ingestion-factory.d.ts.map