rag-lite-ts 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/README.md +651 -109
  2. package/dist/cli/indexer.js +262 -46
  3. package/dist/cli/search.js +54 -32
  4. package/dist/cli.js +185 -28
  5. package/dist/config.d.ts +34 -73
  6. package/dist/config.js +50 -255
  7. package/dist/core/abstract-embedder.d.ts +125 -0
  8. package/dist/core/abstract-embedder.js +264 -0
  9. package/dist/core/actionable-error-messages.d.ts +60 -0
  10. package/dist/core/actionable-error-messages.js +397 -0
  11. package/dist/core/adapters.d.ts +93 -0
  12. package/dist/core/adapters.js +139 -0
  13. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  14. package/dist/core/batch-processing-optimizer.js +541 -0
  15. package/dist/core/chunker.d.ts +119 -0
  16. package/dist/core/chunker.js +73 -0
  17. package/dist/core/cli-database-utils.d.ts +53 -0
  18. package/dist/core/cli-database-utils.js +239 -0
  19. package/dist/core/config.d.ts +102 -0
  20. package/dist/core/config.js +247 -0
  21. package/dist/core/content-errors.d.ts +111 -0
  22. package/dist/core/content-errors.js +362 -0
  23. package/dist/core/content-manager.d.ts +343 -0
  24. package/dist/core/content-manager.js +1504 -0
  25. package/dist/core/content-performance-optimizer.d.ts +150 -0
  26. package/dist/core/content-performance-optimizer.js +516 -0
  27. package/dist/core/content-resolver.d.ts +104 -0
  28. package/dist/core/content-resolver.js +285 -0
  29. package/dist/core/cross-modal-search.d.ts +164 -0
  30. package/dist/core/cross-modal-search.js +342 -0
  31. package/dist/core/database-connection-manager.d.ts +109 -0
  32. package/dist/core/database-connection-manager.js +304 -0
  33. package/dist/core/db.d.ts +245 -0
  34. package/dist/core/db.js +952 -0
  35. package/dist/core/embedder-factory.d.ts +176 -0
  36. package/dist/core/embedder-factory.js +338 -0
  37. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  38. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  39. package/dist/core/index.d.ts +59 -0
  40. package/dist/core/index.js +69 -0
  41. package/dist/core/ingestion.d.ts +213 -0
  42. package/dist/core/ingestion.js +812 -0
  43. package/dist/core/interfaces.d.ts +408 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  46. package/dist/core/lazy-dependency-loader.js +453 -0
  47. package/dist/core/mode-detection-service.d.ts +150 -0
  48. package/dist/core/mode-detection-service.js +565 -0
  49. package/dist/core/mode-model-validator.d.ts +92 -0
  50. package/dist/core/mode-model-validator.js +203 -0
  51. package/dist/core/model-registry.d.ts +120 -0
  52. package/dist/core/model-registry.js +415 -0
  53. package/dist/core/model-validator.d.ts +217 -0
  54. package/dist/core/model-validator.js +782 -0
  55. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  56. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  57. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  58. package/dist/core/polymorphic-search-factory.js +344 -0
  59. package/dist/core/raglite-paths.d.ts +121 -0
  60. package/dist/core/raglite-paths.js +145 -0
  61. package/dist/core/reranking-config.d.ts +42 -0
  62. package/dist/core/reranking-config.js +156 -0
  63. package/dist/core/reranking-factory.d.ts +92 -0
  64. package/dist/core/reranking-factory.js +591 -0
  65. package/dist/core/reranking-strategies.d.ts +325 -0
  66. package/dist/core/reranking-strategies.js +720 -0
  67. package/dist/core/resource-cleanup.d.ts +163 -0
  68. package/dist/core/resource-cleanup.js +371 -0
  69. package/dist/core/resource-manager.d.ts +212 -0
  70. package/dist/core/resource-manager.js +564 -0
  71. package/dist/core/search-pipeline.d.ts +111 -0
  72. package/dist/core/search-pipeline.js +287 -0
  73. package/dist/core/search.d.ts +131 -0
  74. package/dist/core/search.js +296 -0
  75. package/dist/core/streaming-operations.d.ts +145 -0
  76. package/dist/core/streaming-operations.js +409 -0
  77. package/dist/core/types.d.ts +66 -0
  78. package/dist/core/types.js +6 -0
  79. package/dist/core/universal-embedder.d.ts +177 -0
  80. package/dist/core/universal-embedder.js +139 -0
  81. package/dist/core/validation-messages.d.ts +99 -0
  82. package/dist/core/validation-messages.js +334 -0
  83. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  84. package/dist/{vector-index.js → core/vector-index.js} +21 -3
  85. package/dist/dom-polyfills.d.ts +6 -0
  86. package/dist/dom-polyfills.js +40 -0
  87. package/dist/factories/index.d.ts +43 -0
  88. package/dist/factories/index.js +44 -0
  89. package/dist/factories/text-factory.d.ts +560 -0
  90. package/dist/factories/text-factory.js +968 -0
  91. package/dist/file-processor.d.ts +90 -4
  92. package/dist/file-processor.js +723 -20
  93. package/dist/index-manager.d.ts +3 -2
  94. package/dist/index-manager.js +13 -11
  95. package/dist/index.d.ts +72 -8
  96. package/dist/index.js +102 -16
  97. package/dist/indexer.js +1 -1
  98. package/dist/ingestion.d.ts +44 -154
  99. package/dist/ingestion.js +75 -671
  100. package/dist/mcp-server.d.ts +35 -3
  101. package/dist/mcp-server.js +1186 -79
  102. package/dist/multimodal/clip-embedder.d.ts +314 -0
  103. package/dist/multimodal/clip-embedder.js +945 -0
  104. package/dist/multimodal/index.d.ts +6 -0
  105. package/dist/multimodal/index.js +6 -0
  106. package/dist/preprocess.js +1 -1
  107. package/dist/run-error-recovery-tests.d.ts +7 -0
  108. package/dist/run-error-recovery-tests.js +101 -0
  109. package/dist/search-standalone.js +1 -1
  110. package/dist/search.d.ts +51 -69
  111. package/dist/search.js +117 -412
  112. package/dist/test-utils.d.ts +8 -26
  113. package/dist/text/chunker.d.ts +33 -0
  114. package/dist/{chunker.js → text/chunker.js} +98 -75
  115. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  116. package/dist/{embedder.js → text/embedder.js} +84 -10
  117. package/dist/text/index.d.ts +8 -0
  118. package/dist/text/index.js +9 -0
  119. package/dist/text/preprocessors/index.d.ts +17 -0
  120. package/dist/text/preprocessors/index.js +38 -0
  121. package/dist/text/preprocessors/mdx.d.ts +25 -0
  122. package/dist/text/preprocessors/mdx.js +101 -0
  123. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  124. package/dist/text/preprocessors/mermaid.js +330 -0
  125. package/dist/text/preprocessors/registry.d.ts +56 -0
  126. package/dist/text/preprocessors/registry.js +180 -0
  127. package/dist/text/reranker.d.ts +59 -0
  128. package/dist/{reranker.js → text/reranker.js} +138 -53
  129. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  130. package/dist/text/sentence-transformer-embedder.js +340 -0
  131. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  132. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  133. package/dist/types.d.ts +40 -1
  134. package/dist/utils/vector-math.d.ts +31 -0
  135. package/dist/utils/vector-math.js +70 -0
  136. package/package.json +16 -4
  137. package/dist/api-errors.d.ts.map +0 -1
  138. package/dist/api-errors.js.map +0 -1
  139. package/dist/chunker.d.ts +0 -47
  140. package/dist/chunker.d.ts.map +0 -1
  141. package/dist/chunker.js.map +0 -1
  142. package/dist/cli/indexer.d.ts.map +0 -1
  143. package/dist/cli/indexer.js.map +0 -1
  144. package/dist/cli/search.d.ts.map +0 -1
  145. package/dist/cli/search.js.map +0 -1
  146. package/dist/cli.d.ts.map +0 -1
  147. package/dist/cli.js.map +0 -1
  148. package/dist/config.d.ts.map +0 -1
  149. package/dist/config.js.map +0 -1
  150. package/dist/db.d.ts +0 -90
  151. package/dist/db.d.ts.map +0 -1
  152. package/dist/db.js +0 -340
  153. package/dist/db.js.map +0 -1
  154. package/dist/embedder.d.ts.map +0 -1
  155. package/dist/embedder.js.map +0 -1
  156. package/dist/error-handler.d.ts.map +0 -1
  157. package/dist/error-handler.js.map +0 -1
  158. package/dist/file-processor.d.ts.map +0 -1
  159. package/dist/file-processor.js.map +0 -1
  160. package/dist/index-manager.d.ts.map +0 -1
  161. package/dist/index-manager.js.map +0 -1
  162. package/dist/index.d.ts.map +0 -1
  163. package/dist/index.js.map +0 -1
  164. package/dist/indexer.d.ts.map +0 -1
  165. package/dist/indexer.js.map +0 -1
  166. package/dist/ingestion.d.ts.map +0 -1
  167. package/dist/ingestion.js.map +0 -1
  168. package/dist/mcp-server.d.ts.map +0 -1
  169. package/dist/mcp-server.js.map +0 -1
  170. package/dist/path-manager.d.ts.map +0 -1
  171. package/dist/path-manager.js.map +0 -1
  172. package/dist/preprocess.d.ts.map +0 -1
  173. package/dist/preprocess.js.map +0 -1
  174. package/dist/preprocessors/index.d.ts.map +0 -1
  175. package/dist/preprocessors/index.js.map +0 -1
  176. package/dist/preprocessors/mdx.d.ts.map +0 -1
  177. package/dist/preprocessors/mdx.js.map +0 -1
  178. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  179. package/dist/preprocessors/mermaid.js.map +0 -1
  180. package/dist/preprocessors/registry.d.ts.map +0 -1
  181. package/dist/preprocessors/registry.js.map +0 -1
  182. package/dist/reranker.d.ts +0 -40
  183. package/dist/reranker.d.ts.map +0 -1
  184. package/dist/reranker.js.map +0 -1
  185. package/dist/resource-manager-demo.d.ts +0 -7
  186. package/dist/resource-manager-demo.d.ts.map +0 -1
  187. package/dist/resource-manager-demo.js +0 -52
  188. package/dist/resource-manager-demo.js.map +0 -1
  189. package/dist/resource-manager.d.ts +0 -129
  190. package/dist/resource-manager.d.ts.map +0 -1
  191. package/dist/resource-manager.js +0 -389
  192. package/dist/resource-manager.js.map +0 -1
  193. package/dist/search-standalone.d.ts.map +0 -1
  194. package/dist/search-standalone.js.map +0 -1
  195. package/dist/search.d.ts.map +0 -1
  196. package/dist/search.js.map +0 -1
  197. package/dist/test-utils.d.ts.map +0 -1
  198. package/dist/test-utils.js.map +0 -1
  199. package/dist/tokenizer.d.ts.map +0 -1
  200. package/dist/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
  203. package/dist/vector-index.d.ts.map +0 -1
  204. package/dist/vector-index.js.map +0 -1
@@ -1,4 +1,4 @@
1
- import type { EmbeddingResult } from './types.js';
1
+ import type { EmbeddingResult } from './core/types.js';
2
2
  export interface IndexStats {
3
3
  totalVectors: number;
4
4
  modelVersion: string;
@@ -17,8 +17,9 @@ export declare class IndexManager {
17
17
  /**
18
18
  * Initialize the index manager and load existing index if available
19
19
  * @param skipModelCheck - Skip model compatibility check (used for rebuilds)
20
+ * @param forceRecreate - Force recreation of index (used for model changes)
20
21
  */
21
- initialize(skipModelCheck?: boolean): Promise<void>;
22
+ initialize(skipModelCheck?: boolean, forceRecreate?: boolean): Promise<void>;
22
23
  /**
23
24
  * Check model compatibility between stored and current configuration
24
25
  * Requirements: 2.1, 2.2, 2.4, 5.1, 5.2, 5.3, 5.4
@@ -1,6 +1,6 @@
1
- import { VectorIndex } from './vector-index.js';
2
- import { openDatabase, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './db.js';
3
- import { config, getModelDefaults } from './config.js';
1
+ import { VectorIndex } from './core/vector-index.js';
2
+ import { openDatabase, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './core/db.js';
3
+ import { config, getModelDefaults } from './core/config.js';
4
4
  export class IndexManager {
5
5
  modelName;
6
6
  vectorIndex;
@@ -25,8 +25,9 @@ export class IndexManager {
25
25
  /**
26
26
  * Initialize the index manager and load existing index if available
27
27
  * @param skipModelCheck - Skip model compatibility check (used for rebuilds)
28
+ * @param forceRecreate - Force recreation of index (used for model changes)
28
29
  */
29
- async initialize(skipModelCheck = false) {
30
+ async initialize(skipModelCheck = false, forceRecreate = false) {
30
31
  if (this.isInitialized) {
31
32
  return;
32
33
  }
@@ -35,17 +36,18 @@ export class IndexManager {
35
36
  this.db = await openDatabase(this.dbPath);
36
37
  // Check model compatibility BEFORE trying to load the vector index
37
38
  // This prevents WebAssembly exceptions when dimensions don't match
38
- if (!skipModelCheck) {
39
+ if (!skipModelCheck && !forceRecreate) {
39
40
  await this.checkModelCompatibility();
40
41
  }
41
- if (this.vectorIndex.indexExists()) {
42
- console.log('Loading existing vector index...');
43
- await this.vectorIndex.loadIndex();
44
- }
45
- else {
42
+ if (forceRecreate || !this.vectorIndex.indexExists()) {
46
43
  console.log('Creating new vector index...');
47
44
  await this.vectorIndex.initialize();
48
45
  }
46
+ else {
47
+ // Only try to load existing index if not forcing recreation
48
+ console.log('Loading existing vector index...');
49
+ await this.vectorIndex.loadIndex();
50
+ }
49
51
  // Always populate the embedding ID mapping from existing database entries
50
52
  // This is needed both for new and existing indexes
51
53
  const existingChunks = await this.db.all('SELECT embedding_id FROM chunks ORDER BY id');
@@ -383,7 +385,7 @@ export class IndexManager {
383
385
  throw new Error('Database not initialized');
384
386
  }
385
387
  try {
386
- const rows = await this.db.all('SELECT embedding_id, text, document_id FROM chunks ORDER BY id');
388
+ const rows = await this.db.all('SELECT embedding_id, content as text, document_id FROM chunks ORDER BY id');
387
389
  return rows.map(row => ({
388
390
  embedding_id: row.embedding_id,
389
391
  text: row.text,
package/dist/index.d.ts CHANGED
@@ -1,13 +1,77 @@
1
+ /**
2
+ * rag-lite-ts - Clean Architecture with Factory Pattern
3
+ *
4
+ * Quick Start (Recommended):
5
+ * ```typescript
6
+ * import { SearchFactory, IngestionFactory } from 'rag-lite-ts';
7
+ *
8
+ * // Simple search - just works!
9
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite');
10
+ * const results = await search.search('your query');
11
+ *
12
+ * // Simple ingestion - just works!
13
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
14
+ * await ingestion.ingestDirectory('./documents');
15
+ * ```
16
+ *
17
+ * With Configuration:
18
+ * ```typescript
19
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite', {
20
+ * embeddingModel: 'Xenova/all-mpnet-base-v2',
21
+ * enableReranking: true
22
+ * });
23
+ * ```
24
+ *
25
+ * Complete RAG System:
26
+ * ```typescript
27
+ * import { RAGFactory } from 'rag-lite-ts';
28
+ *
29
+ * const { searchEngine, ingestionPipeline } = await RAGFactory.createBoth(
30
+ * './index.bin',
31
+ * './db.sqlite'
32
+ * );
33
+ * ```
34
+ *
35
+ * Advanced Usage (Direct Dependency Injection):
36
+ * ```typescript
37
+ * import { CoreSearchEngine, createTextEmbedFunction } from 'rag-lite-ts';
38
+ *
39
+ * const embedFn = await createTextEmbedFunction();
40
+ * const search = new CoreSearchEngine(embedFn, indexManager, db);
41
+ * ```
42
+ */
43
+ export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './factories/index.js';
44
+ export { TextSearchFactory as SearchFactory, TextIngestionFactory as IngestionFactory, TextRAGFactory as RAGFactory } from './factories/index.js';
45
+ export type { TextSearchOptions, TextIngestionOptions } from './factories/index.js';
46
+ export type { TextSearchOptions as SearchEngineOptions, TextIngestionOptions as IngestionPipelineOptions } from './factories/index.js';
47
+ export { SearchEngine as CoreSearchEngine } from './core/search.js';
48
+ export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js';
1
49
  export { SearchEngine } from './search.js';
2
50
  export { IngestionPipeline } from './ingestion.js';
3
- export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine } from './embedder.js';
4
- export { config, validateConfig, getModelDefaults, type Config } from './config.js';
5
- export { openDatabase, initializeSchema, insertDocument, insertChunk, getChunksByEmbeddingIds, getModelVersion, setModelVersion, type DatabaseConnection, type ChunkResult } from './db.js';
6
- export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS, type FileProcessorOptions, type FileDiscoveryResult, type DocumentProcessingResult } from './file-processor.js';
7
- export { countTokens } from './tokenizer.js';
8
- export { VectorIndex } from './vector-index.js';
51
+ export { LazyEmbedderLoader, LazyRerankerLoader, LazyMultimodalLoader, LazyDependencyManager } from './core/lazy-dependency-loader.js';
52
+ export type { EmbedFunction, RerankFunction, EmbeddingQueryInterface, RerankingInterface, SearchEngineConfig, ContentTypeStrategy, ModelAgnosticInterface, ExtendedEmbeddingInterface, ExtendedRerankingInterface, SearchPipelineInterface, SearchDependencyFactory } from './core/interfaces.js';
53
+ export { InterfaceValidator } from './core/interfaces.js';
54
+ export { validateModeModelCompatibility, validateModeModelCompatibilityOrThrow, getRecommendedModelsForMode, isModeModelCompatible, getCompatibleModelsForMode, type ModeModelValidationResult } from './core/mode-model-validator.js';
55
+ export { createMissingFileError, createInvalidPathError, createModelLoadingError, createDimensionMismatchError, createModeMismatchError, createInvalidContentError, createMissingDependencyError, createFactoryCreationError, enhanceError, createContextualError, type ActionableErrorConfig } from './core/actionable-error-messages.js';
56
+ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction, createTextEmbedder } from './text/embedder.js';
57
+ export type { UniversalEmbedder } from './core/universal-embedder.js';
58
+ export { CLIPEmbedder } from './multimodal/clip-embedder.js';
59
+ export { createEmbedder } from './core/embedder-factory.js';
60
+ export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './text/reranker.js';
61
+ export { countTokens } from './text/tokenizer.js';
62
+ export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
63
+ export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
64
+ export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo, type DatabaseConnection } from './core/db.js';
9
65
  export { IndexManager } from './index-manager.js';
10
- export { ResourceManager } from './resource-manager.js';
66
+ export { VectorIndex } from './core/vector-index.js';
67
+ export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
68
+ export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS, type FileProcessorOptions, type FileDiscoveryResult, type DocumentProcessingResult } from './file-processor.js';
69
+ export { chunkDocument, type ChunkConfig } from './core/chunker.js';
70
+ export { DocumentPathManager } from './core/path-manager.js';
71
+ export { resolveRagLitePaths, ensureRagLiteStructure, migrateToRagLiteStructure, getStandardRagLitePaths, type RagLiteConfig, type RagLitePaths } from './core/raglite-paths.js';
72
+ export type { SearchResult, SearchOptions, Document, EmbeddingResult, ContentDocument, ContentChunk } from './core/types.js';
73
+ export type { Chunk, Preprocessor, PreprocessorOptions, PreprocessingConfig } from './types.js';
74
+ export type { IngestionOptions, IngestionResult } from './core/ingestion.js';
75
+ export { handleError, safeExecute, ErrorCategory, ErrorSeverity, createError, type ErrorContext } from './core/error-handler.js';
11
76
  export { APIError, IngestionError, SearchError, ResourceError, ModelCompatibilityError, ErrorFactory, CommonErrors, handleAPIError } from './api-errors.js';
12
- export type { SearchResult, SearchOptions, Chunk, Document, EmbeddingResult, Preprocessor, PreprocessorOptions, PreprocessingConfig } from './types.js';
13
77
  //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -1,21 +1,107 @@
1
- // Main entry point for rag-lite-ts
2
- // Core API classes (simple interface matching README)
1
+ /**
2
+ * rag-lite-ts - Clean Architecture with Factory Pattern
3
+ *
4
+ * Quick Start (Recommended):
5
+ * ```typescript
6
+ * import { SearchFactory, IngestionFactory } from 'rag-lite-ts';
7
+ *
8
+ * // Simple search - just works!
9
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite');
10
+ * const results = await search.search('your query');
11
+ *
12
+ * // Simple ingestion - just works!
13
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
14
+ * await ingestion.ingestDirectory('./documents');
15
+ * ```
16
+ *
17
+ * With Configuration:
18
+ * ```typescript
19
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite', {
20
+ * embeddingModel: 'Xenova/all-mpnet-base-v2',
21
+ * enableReranking: true
22
+ * });
23
+ * ```
24
+ *
25
+ * Complete RAG System:
26
+ * ```typescript
27
+ * import { RAGFactory } from 'rag-lite-ts';
28
+ *
29
+ * const { searchEngine, ingestionPipeline } = await RAGFactory.createBoth(
30
+ * './index.bin',
31
+ * './db.sqlite'
32
+ * );
33
+ * ```
34
+ *
35
+ * Advanced Usage (Direct Dependency Injection):
36
+ * ```typescript
37
+ * import { CoreSearchEngine, createTextEmbedFunction } from 'rag-lite-ts';
38
+ *
39
+ * const embedFn = await createTextEmbedFunction();
40
+ * const search = new CoreSearchEngine(embedFn, indexManager, db);
41
+ * ```
42
+ */
43
+ // =============================================================================
44
+ // PRIMARY API (FACTORY PATTERN)
45
+ // =============================================================================
46
+ // Main factory classes for simple usage
47
+ export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './factories/index.js';
48
+ // Convenience aliases for common usage
49
+ export { TextSearchFactory as SearchFactory, TextIngestionFactory as IngestionFactory, TextRAGFactory as RAGFactory } from './factories/index.js';
50
+ // =============================================================================
51
+ // CORE ARCHITECTURE (FOR LIBRARY AUTHORS)
52
+ // =============================================================================
53
+ // Core classes for direct dependency injection (advanced)
54
+ export { SearchEngine as CoreSearchEngine } from './core/search.js';
55
+ export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js';
56
+ // Public API classes
3
57
  export { SearchEngine } from './search.js';
4
58
  export { IngestionPipeline } from './ingestion.js';
5
- // Embedding operations (required for README examples)
6
- export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine } from './embedder.js';
7
- // Configuration (documented in README API reference)
8
- export { config, validateConfig, getModelDefaults } from './config.js';
9
- // Database operations (documented in README API reference)
10
- export { openDatabase, initializeSchema, insertDocument, insertChunk, getChunksByEmbeddingIds, getModelVersion, setModelVersion } from './db.js';
11
- // File processing operations (documented in README API reference)
12
- export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS } from './file-processor.js';
13
- // Tokenization (documented in README API reference)
14
- export { countTokens } from './tokenizer.js';
15
- // Advanced/Internal operations (for backward compatibility and advanced use cases)
16
- export { VectorIndex } from './vector-index.js';
59
+ // Lazy loading system for performance optimization
60
+ export { LazyEmbedderLoader, LazyRerankerLoader, LazyMultimodalLoader, LazyDependencyManager } from './core/lazy-dependency-loader.js';
61
+ // Interface validation utilities
62
+ export { InterfaceValidator } from './core/interfaces.js';
63
+ // Mode-model compatibility validation
64
+ export { validateModeModelCompatibility, validateModeModelCompatibilityOrThrow, getRecommendedModelsForMode, isModeModelCompatible, getCompatibleModelsForMode } from './core/mode-model-validator.js';
65
+ // Actionable error messages
66
+ export { createMissingFileError, createInvalidPathError, createModelLoadingError, createDimensionMismatchError, createModeMismatchError, createInvalidContentError, createMissingDependencyError, createFactoryCreationError, enhanceError, createContextualError } from './core/actionable-error-messages.js';
67
+ // =============================================================================
68
+ // TEXT IMPLEMENTATIONS (FOR CUSTOM DEPENDENCY INJECTION)
69
+ // =============================================================================
70
+ // Text-specific embedding implementations
71
+ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction, createTextEmbedder } from './text/embedder.js';
72
+ export { CLIPEmbedder } from './multimodal/clip-embedder.js';
73
+ export { createEmbedder } from './core/embedder-factory.js';
74
+ // Text-specific reranking implementations
75
+ export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './text/reranker.js';
76
+ // Text tokenization utilities
77
+ export { countTokens } from './text/tokenizer.js';
78
+ export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
79
+ // =============================================================================
80
+ // CORE INFRASTRUCTURE (FOR ADVANCED USERS)
81
+ // =============================================================================
82
+ // Database operations
83
+ export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './core/db.js';
84
+ // Vector index management
17
85
  export { IndexManager } from './index-manager.js';
18
- export { ResourceManager } from './resource-manager.js';
19
- // Error handling
86
+ export { VectorIndex } from './core/vector-index.js';
87
+ // Configuration and utilities
88
+ export { config, getModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
89
+ // =============================================================================
90
+ // FILE PROCESSING AND UTILITIES
91
+ // =============================================================================
92
+ // File processing operations
93
+ export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS } from './file-processor.js';
94
+ // Document chunking
95
+ export { chunkDocument } from './core/chunker.js';
96
+ // Path management
97
+ export { DocumentPathManager } from './core/path-manager.js';
98
+ // RAG-lite directory structure management
99
+ export { resolveRagLitePaths, ensureRagLiteStructure, migrateToRagLiteStructure, getStandardRagLitePaths } from './core/raglite-paths.js';
100
+ // =============================================================================
101
+ // ERROR HANDLING
102
+ // =============================================================================
103
+ // Core error handling
104
+ export { handleError, safeExecute, ErrorCategory, ErrorSeverity, createError } from './core/error-handler.js';
105
+ // API-specific errors
20
106
  export { APIError, IngestionError, SearchError, ResourceError, ModelCompatibilityError, ErrorFactory, CommonErrors, handleAPIError } from './api-errors.js';
21
107
  //# sourceMappingURL=index.js.map
package/dist/indexer.js CHANGED
@@ -4,7 +4,7 @@
4
4
  * Usage: node indexer.js <path>
5
5
  */
6
6
  import { runIngest } from './cli/indexer.js';
7
- import { EXIT_CODES, ConfigurationError } from './config.js';
7
+ import { EXIT_CODES, ConfigurationError } from './core/config.js';
8
8
  async function main() {
9
9
  const args = process.argv.slice(2);
10
10
  if (args.length === 0) {
@@ -1,175 +1,65 @@
1
- import { type FileProcessorOptions } from './file-processor.js';
2
- import { type ChunkConfig } from './chunker.js';
3
- import { type EmbeddingEngine } from './embedder.js';
4
- import { Config } from './config.js';
5
1
  /**
6
- * User-friendly error class with actionable suggestions
2
+ * Public API IngestionPipeline - Simple constructor interface with internal factory usage
3
+ *
4
+ * This class provides a clean, simple API while using the new core architecture
5
+ * internally. It handles dependency injection automatically.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * // Simple usage
10
+ * const pipeline = new IngestionPipeline('./db.sqlite', './index.bin');
11
+ * await pipeline.ingestDirectory('./documents');
12
+ *
13
+ * // With options
14
+ * const pipeline = new IngestionPipeline('./db.sqlite', './index.bin', {
15
+ * embeddingModel: 'all-MiniLM-L6-v2',
16
+ * chunkSize: 512
17
+ * });
18
+ * ```
7
19
  */
8
- export declare class IngestionError extends Error {
9
- code: string;
10
- suggestions: string[];
11
- constructor(message: string, code: string, suggestions: string[]);
20
+ import { type TextIngestionOptions } from './factories/index.js';
21
+ import type { IngestionOptions, IngestionResult } from './core/ingestion.js';
22
+ import type { MemoryContentMetadata } from './core/content-manager.js';
23
+ export interface IngestionPipelineOptions extends TextIngestionOptions {
12
24
  }
13
- /**
14
- * Options for the ingestion pipeline
15
- */
16
- export interface IngestionOptions {
17
- /** File processing options */
18
- fileOptions?: FileProcessorOptions;
19
- /** Chunking configuration */
20
- chunkConfig?: ChunkConfig;
21
- /** Whether to force rebuild the index */
22
- forceRebuild?: boolean;
23
- }
24
- /**
25
- * Result of the ingestion process
26
- */
27
- export interface IngestionResult {
28
- /** Total documents processed */
29
- documentsProcessed: number;
30
- /** Total chunks created */
31
- chunksCreated: number;
32
- /** Total embeddings generated */
33
- embeddingsGenerated: number;
34
- /** Number of documents that failed processing */
35
- documentErrors: number;
36
- /** Number of chunks that failed embedding */
37
- embeddingErrors: number;
38
- /** Processing time in milliseconds */
39
- processingTimeMs: number;
40
- }
41
- /**
42
- * Main ingestion pipeline class
43
- * Coordinates the entire process from file discovery to vector storage
44
- */
45
25
  export declare class IngestionPipeline {
46
- private static instances;
47
- private static cleanupHandlersSet;
48
- private db;
49
- private indexManager;
50
- private embeddingEngine;
51
- private pathManager;
52
- private isInitialized;
53
26
  private dbPath;
54
27
  private indexPath;
55
- private basePath;
56
- private configOverrides;
28
+ private options;
29
+ private corePipeline;
30
+ private initPromise;
31
+ private defaultChunkConfig;
32
+ constructor(dbPath: string, indexPath: string, options?: IngestionPipelineOptions);
57
33
  /**
58
- * Creates a new IngestionPipeline with simplified constructor
59
- * Pipeline is ready to use immediately without requiring initialization calls (Requirement 1.5)
60
- * @param basePath - Base directory path for database and index files (defaults to current directory)
61
- * @param embedder - Pre-initialized embedding engine (optional, will use default if not provided)
34
+ * Initialize the ingestion pipeline using the factory
62
35
  */
63
- constructor(basePath?: string, embedder?: EmbeddingEngine);
36
+ private initialize;
64
37
  /**
65
- * Set configuration overrides (for internal use)
66
- * @param overrides - Configuration overrides to apply
38
+ * Ingest a single document
67
39
  */
68
- setConfigOverrides(overrides: Partial<Config>): void;
40
+ ingestDocument(filePath: string, options?: IngestionOptions): Promise<IngestionResult>;
69
41
  /**
70
- * Set path storage strategy
71
- * @param strategy - Path storage strategy ('absolute' or 'relative')
72
- * @param basePath - Base path for relative paths (optional, defaults to current base path)
73
- */
74
- setPathStorageStrategy(strategy: 'absolute' | 'relative', basePath?: string): void;
75
- /**
76
- * Get effective configuration with overrides applied
77
- */
78
- private getEffectiveConfig;
79
- /**
80
- * Automatically initialize resources on first use with user-friendly error handling
81
- * Implements lazy initialization as required by 5.2
82
- */
83
- private ensureInitialized;
84
- /**
85
- * Create user-friendly error messages with actionable suggestions
86
- * Implements requirement 5.3: Clear, actionable error messages with specific next steps
87
- */
88
- private createUserFriendlyError;
89
- /**
90
- * Initialize the ingestion pipeline (public method for backward compatibility)
91
- * Sets up database, index manager, and embedding engine
92
- */
93
- initialize(): Promise<void>;
94
- /**
95
- * Ingest documents from a directory (matches README API)
96
- * Automatically initializes resources on first use (Requirements 2.1, 2.3, 5.2)
97
- * @param directoryPath - Path to directory containing documents
98
- * @param options - Optional ingestion configuration
99
- * @returns Promise resolving to ingestion results
42
+ * Ingest all documents in a directory
100
43
  */
101
44
  ingestDirectory(directoryPath: string, options?: IngestionOptions): Promise<IngestionResult>;
102
45
  /**
103
- * Ingest a single file (matches README API)
104
- * Automatically initializes resources on first use (Requirements 2.2, 2.3, 5.2)
105
- * @param filePath - Path to the file to ingest
106
- * @param options - Optional ingestion configuration
107
- * @returns Promise resolving to ingestion results
108
- */
109
- ingestFile(filePath: string, options?: IngestionOptions): Promise<IngestionResult>;
110
- /**
111
- * Ingest documents from a path (file or directory)
112
- * Implements the complete pipeline: file processing → chunking → embedding → storage
46
+ * Ingest content from memory buffer
47
+ * Enables MCP integration and real-time content processing
113
48
  *
114
- * Requirements addressed:
115
- * - 7.5: Single-threaded write processing to avoid SQLite lock contention
116
- * - 3.3: Graceful handling of embedding failures without stopping ingestion
117
- * - 10.1: Progress logging and error reporting during batch ingestion
118
- * - 2.3: Automatic creation of database and index files in appropriate locations
119
- */
120
- ingestPath(path: string, options?: IngestionOptions): Promise<IngestionResult>;
121
- /**
122
- * Chunk all documents and organize results
123
- */
124
- private chunkDocuments;
125
- /**
126
- * Generate embeddings for all chunks with error handling
127
- * Requirement 3.3: Graceful handling of embedding failures without stopping ingestion
128
- */
129
- private generateEmbeddings;
130
- /**
131
- * Store documents and chunks in database with single-threaded writes
132
- * Requirement 7.5: Single-threaded write processing to avoid SQLite lock contention
133
- */
134
- private storeDocumentsAndChunks;
135
- /**
136
- * Update vector index with new embeddings
137
- */
138
- private updateVectorIndex;
139
- /**
140
- * Initialize the pipeline for rebuild (skips model compatibility check)
141
- */
142
- private initializeForRebuild;
143
- /**
144
- * Rebuild the entire index from scratch
145
- * Useful when model version changes or for maintenance
146
- * Automatically initializes resources if needed (Requirement 5.2)
147
- */
148
- rebuildIndex(): Promise<void>;
149
- /**
150
- * Get pipeline statistics
151
- */
152
- getStats(): Promise<{
153
- indexStats: any;
154
- isInitialized: boolean;
155
- }>;
156
- /**
157
- * Set up automatic cleanup on process exit (Requirement 5.5)
158
- */
159
- private setupAutomaticCleanup;
49
+ * @example
50
+ * ```typescript
51
+ * const pipeline = new IngestionPipeline('./db.sqlite', './index.bin');
52
+ * const contentId = await pipeline.ingestFromMemory(buffer, {
53
+ * displayName: 'uploaded-file.txt',
54
+ * contentType: 'text/plain'
55
+ * });
56
+ * console.log('Content ingested with ID:', contentId);
57
+ * ```
58
+ */
59
+ ingestFromMemory(content: Buffer, metadata: MemoryContentMetadata, options?: IngestionOptions): Promise<string>;
160
60
  /**
161
61
  * Clean up resources
162
62
  */
163
63
  cleanup(): Promise<void>;
164
64
  }
165
- /**
166
- * Convenience function to ingest documents from a path
167
- * Creates a pipeline instance, runs ingestion, and cleans up
168
- */
169
- export declare function ingestDocuments(path: string, options?: IngestionOptions): Promise<IngestionResult>;
170
- /**
171
- * Convenience function to rebuild the index
172
- * Creates a pipeline instance, rebuilds index, and cleans up
173
- */
174
- export declare function rebuildIndex(): Promise<void>;
175
65
  //# sourceMappingURL=ingestion.d.ts.map