rag-lite-ts 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +651 -109
- package/dist/cli/indexer.js +262 -46
- package/dist/cli/search.js +54 -32
- package/dist/cli.js +185 -28
- package/dist/config.d.ts +34 -73
- package/dist/config.js +50 -255
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/adapters.d.ts +93 -0
- package/dist/core/adapters.js +139 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/chunker.d.ts +119 -0
- package/dist/core/chunker.js +73 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.d.ts +102 -0
- package/dist/core/config.js +247 -0
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +245 -0
- package/dist/core/db.js +952 -0
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
- package/dist/{error-handler.js → core/error-handler.js} +51 -8
- package/dist/core/index.d.ts +59 -0
- package/dist/core/index.js +69 -0
- package/dist/core/ingestion.d.ts +213 -0
- package/dist/core/ingestion.js +812 -0
- package/dist/core/interfaces.d.ts +408 -0
- package/dist/core/interfaces.js +106 -0
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
- package/dist/{path-manager.js → core/path-manager.js} +5 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search-pipeline.d.ts +111 -0
- package/dist/core/search-pipeline.js +287 -0
- package/dist/core/search.d.ts +131 -0
- package/dist/core/search.js +296 -0
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +66 -0
- package/dist/core/types.js +6 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
- package/dist/{vector-index.js → core/vector-index.js} +21 -3
- package/dist/dom-polyfills.d.ts +6 -0
- package/dist/dom-polyfills.js +40 -0
- package/dist/factories/index.d.ts +43 -0
- package/dist/factories/index.js +44 -0
- package/dist/factories/text-factory.d.ts +560 -0
- package/dist/factories/text-factory.js +968 -0
- package/dist/file-processor.d.ts +90 -4
- package/dist/file-processor.js +723 -20
- package/dist/index-manager.d.ts +3 -2
- package/dist/index-manager.js +13 -11
- package/dist/index.d.ts +72 -8
- package/dist/index.js +102 -16
- package/dist/indexer.js +1 -1
- package/dist/ingestion.d.ts +44 -154
- package/dist/ingestion.js +75 -671
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1186 -79
- package/dist/multimodal/clip-embedder.d.ts +314 -0
- package/dist/multimodal/clip-embedder.js +945 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/preprocess.js +1 -1
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search-standalone.js +1 -1
- package/dist/search.d.ts +51 -69
- package/dist/search.js +117 -412
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +33 -0
- package/dist/{chunker.js → text/chunker.js} +98 -75
- package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
- package/dist/{embedder.js → text/embedder.js} +84 -10
- package/dist/text/index.d.ts +8 -0
- package/dist/text/index.js +9 -0
- package/dist/text/preprocessors/index.d.ts +17 -0
- package/dist/text/preprocessors/index.js +38 -0
- package/dist/text/preprocessors/mdx.d.ts +25 -0
- package/dist/text/preprocessors/mdx.js +101 -0
- package/dist/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/text/preprocessors/mermaid.js +330 -0
- package/dist/text/preprocessors/registry.d.ts +56 -0
- package/dist/text/preprocessors/registry.js +180 -0
- package/dist/text/reranker.d.ts +59 -0
- package/dist/{reranker.js → text/reranker.js} +138 -53
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
- package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
- package/dist/types.d.ts +40 -1
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +16 -4
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/chunker.d.ts +0 -47
- package/dist/chunker.d.ts.map +0 -1
- package/dist/chunker.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/db.d.ts +0 -90
- package/dist/db.d.ts.map +0 -1
- package/dist/db.js +0 -340
- package/dist/db.js.map +0 -1
- package/dist/embedder.d.ts.map +0 -1
- package/dist/embedder.js.map +0 -1
- package/dist/error-handler.d.ts.map +0 -1
- package/dist/error-handler.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/path-manager.d.ts.map +0 -1
- package/dist/path-manager.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/reranker.d.ts +0 -40
- package/dist/reranker.d.ts.map +0 -1
- package/dist/reranker.js.map +0 -1
- package/dist/resource-manager-demo.d.ts +0 -7
- package/dist/resource-manager-demo.d.ts.map +0 -1
- package/dist/resource-manager-demo.js +0 -52
- package/dist/resource-manager-demo.js.map +0 -1
- package/dist/resource-manager.d.ts +0 -129
- package/dist/resource-manager.d.ts.map +0 -1
- package/dist/resource-manager.js +0 -389
- package/dist/resource-manager.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/tokenizer.d.ts.map +0 -1
- package/dist/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
- package/dist/vector-index.d.ts.map +0 -1
- package/dist/vector-index.js.map +0 -1
package/dist/index-manager.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { EmbeddingResult } from './types.js';
|
|
1
|
+
import type { EmbeddingResult } from './core/types.js';
|
|
2
2
|
export interface IndexStats {
|
|
3
3
|
totalVectors: number;
|
|
4
4
|
modelVersion: string;
|
|
@@ -17,8 +17,9 @@ export declare class IndexManager {
|
|
|
17
17
|
/**
|
|
18
18
|
* Initialize the index manager and load existing index if available
|
|
19
19
|
* @param skipModelCheck - Skip model compatibility check (used for rebuilds)
|
|
20
|
+
* @param forceRecreate - Force recreation of index (used for model changes)
|
|
20
21
|
*/
|
|
21
|
-
initialize(skipModelCheck?: boolean): Promise<void>;
|
|
22
|
+
initialize(skipModelCheck?: boolean, forceRecreate?: boolean): Promise<void>;
|
|
22
23
|
/**
|
|
23
24
|
* Check model compatibility between stored and current configuration
|
|
24
25
|
* Requirements: 2.1, 2.2, 2.4, 5.1, 5.2, 5.3, 5.4
|
package/dist/index-manager.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { VectorIndex } from './vector-index.js';
|
|
2
|
-
import { openDatabase, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './db.js';
|
|
3
|
-
import { config, getModelDefaults } from './config.js';
|
|
1
|
+
import { VectorIndex } from './core/vector-index.js';
|
|
2
|
+
import { openDatabase, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './core/db.js';
|
|
3
|
+
import { config, getModelDefaults } from './core/config.js';
|
|
4
4
|
export class IndexManager {
|
|
5
5
|
modelName;
|
|
6
6
|
vectorIndex;
|
|
@@ -25,8 +25,9 @@ export class IndexManager {
|
|
|
25
25
|
/**
|
|
26
26
|
* Initialize the index manager and load existing index if available
|
|
27
27
|
* @param skipModelCheck - Skip model compatibility check (used for rebuilds)
|
|
28
|
+
* @param forceRecreate - Force recreation of index (used for model changes)
|
|
28
29
|
*/
|
|
29
|
-
async initialize(skipModelCheck = false) {
|
|
30
|
+
async initialize(skipModelCheck = false, forceRecreate = false) {
|
|
30
31
|
if (this.isInitialized) {
|
|
31
32
|
return;
|
|
32
33
|
}
|
|
@@ -35,17 +36,18 @@ export class IndexManager {
|
|
|
35
36
|
this.db = await openDatabase(this.dbPath);
|
|
36
37
|
// Check model compatibility BEFORE trying to load the vector index
|
|
37
38
|
// This prevents WebAssembly exceptions when dimensions don't match
|
|
38
|
-
if (!skipModelCheck) {
|
|
39
|
+
if (!skipModelCheck && !forceRecreate) {
|
|
39
40
|
await this.checkModelCompatibility();
|
|
40
41
|
}
|
|
41
|
-
if (this.vectorIndex.indexExists()) {
|
|
42
|
-
console.log('Loading existing vector index...');
|
|
43
|
-
await this.vectorIndex.loadIndex();
|
|
44
|
-
}
|
|
45
|
-
else {
|
|
42
|
+
if (forceRecreate || !this.vectorIndex.indexExists()) {
|
|
46
43
|
console.log('Creating new vector index...');
|
|
47
44
|
await this.vectorIndex.initialize();
|
|
48
45
|
}
|
|
46
|
+
else {
|
|
47
|
+
// Only try to load existing index if not forcing recreation
|
|
48
|
+
console.log('Loading existing vector index...');
|
|
49
|
+
await this.vectorIndex.loadIndex();
|
|
50
|
+
}
|
|
49
51
|
// Always populate the embedding ID mapping from existing database entries
|
|
50
52
|
// This is needed both for new and existing indexes
|
|
51
53
|
const existingChunks = await this.db.all('SELECT embedding_id FROM chunks ORDER BY id');
|
|
@@ -383,7 +385,7 @@ export class IndexManager {
|
|
|
383
385
|
throw new Error('Database not initialized');
|
|
384
386
|
}
|
|
385
387
|
try {
|
|
386
|
-
const rows = await this.db.all('SELECT embedding_id, text, document_id FROM chunks ORDER BY id');
|
|
388
|
+
const rows = await this.db.all('SELECT embedding_id, content as text, document_id FROM chunks ORDER BY id');
|
|
387
389
|
return rows.map(row => ({
|
|
388
390
|
embedding_id: row.embedding_id,
|
|
389
391
|
text: row.text,
|
package/dist/index.d.ts
CHANGED
|
@@ -1,13 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* rag-lite-ts - Clean Architecture with Factory Pattern
|
|
3
|
+
*
|
|
4
|
+
* Quick Start (Recommended):
|
|
5
|
+
* ```typescript
|
|
6
|
+
* import { SearchFactory, IngestionFactory } from 'rag-lite-ts';
|
|
7
|
+
*
|
|
8
|
+
* // Simple search - just works!
|
|
9
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
10
|
+
* const results = await search.search('your query');
|
|
11
|
+
*
|
|
12
|
+
* // Simple ingestion - just works!
|
|
13
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
14
|
+
* await ingestion.ingestDirectory('./documents');
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* With Configuration:
|
|
18
|
+
* ```typescript
|
|
19
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite', {
|
|
20
|
+
* embeddingModel: 'Xenova/all-mpnet-base-v2',
|
|
21
|
+
* enableReranking: true
|
|
22
|
+
* });
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* Complete RAG System:
|
|
26
|
+
* ```typescript
|
|
27
|
+
* import { RAGFactory } from 'rag-lite-ts';
|
|
28
|
+
*
|
|
29
|
+
* const { searchEngine, ingestionPipeline } = await RAGFactory.createBoth(
|
|
30
|
+
* './index.bin',
|
|
31
|
+
* './db.sqlite'
|
|
32
|
+
* );
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* Advanced Usage (Direct Dependency Injection):
|
|
36
|
+
* ```typescript
|
|
37
|
+
* import { CoreSearchEngine, createTextEmbedFunction } from 'rag-lite-ts';
|
|
38
|
+
*
|
|
39
|
+
* const embedFn = await createTextEmbedFunction();
|
|
40
|
+
* const search = new CoreSearchEngine(embedFn, indexManager, db);
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './factories/index.js';
|
|
44
|
+
export { TextSearchFactory as SearchFactory, TextIngestionFactory as IngestionFactory, TextRAGFactory as RAGFactory } from './factories/index.js';
|
|
45
|
+
export type { TextSearchOptions, TextIngestionOptions } from './factories/index.js';
|
|
46
|
+
export type { TextSearchOptions as SearchEngineOptions, TextIngestionOptions as IngestionPipelineOptions } from './factories/index.js';
|
|
47
|
+
export { SearchEngine as CoreSearchEngine } from './core/search.js';
|
|
48
|
+
export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js';
|
|
1
49
|
export { SearchEngine } from './search.js';
|
|
2
50
|
export { IngestionPipeline } from './ingestion.js';
|
|
3
|
-
export {
|
|
4
|
-
export {
|
|
5
|
-
export {
|
|
6
|
-
export {
|
|
7
|
-
export {
|
|
8
|
-
export {
|
|
51
|
+
export { LazyEmbedderLoader, LazyRerankerLoader, LazyMultimodalLoader, LazyDependencyManager } from './core/lazy-dependency-loader.js';
|
|
52
|
+
export type { EmbedFunction, RerankFunction, EmbeddingQueryInterface, RerankingInterface, SearchEngineConfig, ContentTypeStrategy, ModelAgnosticInterface, ExtendedEmbeddingInterface, ExtendedRerankingInterface, SearchPipelineInterface, SearchDependencyFactory } from './core/interfaces.js';
|
|
53
|
+
export { InterfaceValidator } from './core/interfaces.js';
|
|
54
|
+
export { validateModeModelCompatibility, validateModeModelCompatibilityOrThrow, getRecommendedModelsForMode, isModeModelCompatible, getCompatibleModelsForMode, type ModeModelValidationResult } from './core/mode-model-validator.js';
|
|
55
|
+
export { createMissingFileError, createInvalidPathError, createModelLoadingError, createDimensionMismatchError, createModeMismatchError, createInvalidContentError, createMissingDependencyError, createFactoryCreationError, enhanceError, createContextualError, type ActionableErrorConfig } from './core/actionable-error-messages.js';
|
|
56
|
+
export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction, createTextEmbedder } from './text/embedder.js';
|
|
57
|
+
export type { UniversalEmbedder } from './core/universal-embedder.js';
|
|
58
|
+
export { CLIPEmbedder } from './multimodal/clip-embedder.js';
|
|
59
|
+
export { createEmbedder } from './core/embedder-factory.js';
|
|
60
|
+
export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './text/reranker.js';
|
|
61
|
+
export { countTokens } from './text/tokenizer.js';
|
|
62
|
+
export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
|
|
63
|
+
export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
|
|
64
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo, type DatabaseConnection } from './core/db.js';
|
|
9
65
|
export { IndexManager } from './index-manager.js';
|
|
10
|
-
export {
|
|
66
|
+
export { VectorIndex } from './core/vector-index.js';
|
|
67
|
+
export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
|
|
68
|
+
export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS, type FileProcessorOptions, type FileDiscoveryResult, type DocumentProcessingResult } from './file-processor.js';
|
|
69
|
+
export { chunkDocument, type ChunkConfig } from './core/chunker.js';
|
|
70
|
+
export { DocumentPathManager } from './core/path-manager.js';
|
|
71
|
+
export { resolveRagLitePaths, ensureRagLiteStructure, migrateToRagLiteStructure, getStandardRagLitePaths, type RagLiteConfig, type RagLitePaths } from './core/raglite-paths.js';
|
|
72
|
+
export type { SearchResult, SearchOptions, Document, EmbeddingResult, ContentDocument, ContentChunk } from './core/types.js';
|
|
73
|
+
export type { Chunk, Preprocessor, PreprocessorOptions, PreprocessingConfig } from './types.js';
|
|
74
|
+
export type { IngestionOptions, IngestionResult } from './core/ingestion.js';
|
|
75
|
+
export { handleError, safeExecute, ErrorCategory, ErrorSeverity, createError, type ErrorContext } from './core/error-handler.js';
|
|
11
76
|
export { APIError, IngestionError, SearchError, ResourceError, ModelCompatibilityError, ErrorFactory, CommonErrors, handleAPIError } from './api-errors.js';
|
|
12
|
-
export type { SearchResult, SearchOptions, Chunk, Document, EmbeddingResult, Preprocessor, PreprocessorOptions, PreprocessingConfig } from './types.js';
|
|
13
77
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -1,21 +1,107 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* rag-lite-ts - Clean Architecture with Factory Pattern
|
|
3
|
+
*
|
|
4
|
+
* Quick Start (Recommended):
|
|
5
|
+
* ```typescript
|
|
6
|
+
* import { SearchFactory, IngestionFactory } from 'rag-lite-ts';
|
|
7
|
+
*
|
|
8
|
+
* // Simple search - just works!
|
|
9
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
10
|
+
* const results = await search.search('your query');
|
|
11
|
+
*
|
|
12
|
+
* // Simple ingestion - just works!
|
|
13
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
14
|
+
* await ingestion.ingestDirectory('./documents');
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* With Configuration:
|
|
18
|
+
* ```typescript
|
|
19
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite', {
|
|
20
|
+
* embeddingModel: 'Xenova/all-mpnet-base-v2',
|
|
21
|
+
* enableReranking: true
|
|
22
|
+
* });
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* Complete RAG System:
|
|
26
|
+
* ```typescript
|
|
27
|
+
* import { RAGFactory } from 'rag-lite-ts';
|
|
28
|
+
*
|
|
29
|
+
* const { searchEngine, ingestionPipeline } = await RAGFactory.createBoth(
|
|
30
|
+
* './index.bin',
|
|
31
|
+
* './db.sqlite'
|
|
32
|
+
* );
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* Advanced Usage (Direct Dependency Injection):
|
|
36
|
+
* ```typescript
|
|
37
|
+
* import { CoreSearchEngine, createTextEmbedFunction } from 'rag-lite-ts';
|
|
38
|
+
*
|
|
39
|
+
* const embedFn = await createTextEmbedFunction();
|
|
40
|
+
* const search = new CoreSearchEngine(embedFn, indexManager, db);
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
// =============================================================================
|
|
44
|
+
// PRIMARY API (FACTORY PATTERN)
|
|
45
|
+
// =============================================================================
|
|
46
|
+
// Main factory classes for simple usage
|
|
47
|
+
export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './factories/index.js';
|
|
48
|
+
// Convenience aliases for common usage
|
|
49
|
+
export { TextSearchFactory as SearchFactory, TextIngestionFactory as IngestionFactory, TextRAGFactory as RAGFactory } from './factories/index.js';
|
|
50
|
+
// =============================================================================
|
|
51
|
+
// CORE ARCHITECTURE (FOR LIBRARY AUTHORS)
|
|
52
|
+
// =============================================================================
|
|
53
|
+
// Core classes for direct dependency injection (advanced)
|
|
54
|
+
export { SearchEngine as CoreSearchEngine } from './core/search.js';
|
|
55
|
+
export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js';
|
|
56
|
+
// Public API classes
|
|
3
57
|
export { SearchEngine } from './search.js';
|
|
4
58
|
export { IngestionPipeline } from './ingestion.js';
|
|
5
|
-
//
|
|
6
|
-
export {
|
|
7
|
-
//
|
|
8
|
-
export {
|
|
9
|
-
//
|
|
10
|
-
export {
|
|
11
|
-
//
|
|
12
|
-
export {
|
|
13
|
-
//
|
|
14
|
-
|
|
15
|
-
//
|
|
16
|
-
|
|
59
|
+
// Lazy loading system for performance optimization
|
|
60
|
+
export { LazyEmbedderLoader, LazyRerankerLoader, LazyMultimodalLoader, LazyDependencyManager } from './core/lazy-dependency-loader.js';
|
|
61
|
+
// Interface validation utilities
|
|
62
|
+
export { InterfaceValidator } from './core/interfaces.js';
|
|
63
|
+
// Mode-model compatibility validation
|
|
64
|
+
export { validateModeModelCompatibility, validateModeModelCompatibilityOrThrow, getRecommendedModelsForMode, isModeModelCompatible, getCompatibleModelsForMode } from './core/mode-model-validator.js';
|
|
65
|
+
// Actionable error messages
|
|
66
|
+
export { createMissingFileError, createInvalidPathError, createModelLoadingError, createDimensionMismatchError, createModeMismatchError, createInvalidContentError, createMissingDependencyError, createFactoryCreationError, enhanceError, createContextualError } from './core/actionable-error-messages.js';
|
|
67
|
+
// =============================================================================
|
|
68
|
+
// TEXT IMPLEMENTATIONS (FOR CUSTOM DEPENDENCY INJECTION)
|
|
69
|
+
// =============================================================================
|
|
70
|
+
// Text-specific embedding implementations
|
|
71
|
+
export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction, createTextEmbedder } from './text/embedder.js';
|
|
72
|
+
export { CLIPEmbedder } from './multimodal/clip-embedder.js';
|
|
73
|
+
export { createEmbedder } from './core/embedder-factory.js';
|
|
74
|
+
// Text-specific reranking implementations
|
|
75
|
+
export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './text/reranker.js';
|
|
76
|
+
// Text tokenization utilities
|
|
77
|
+
export { countTokens } from './text/tokenizer.js';
|
|
78
|
+
export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
|
|
79
|
+
// =============================================================================
|
|
80
|
+
// CORE INFRASTRUCTURE (FOR ADVANCED USERS)
|
|
81
|
+
// =============================================================================
|
|
82
|
+
// Database operations
|
|
83
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo } from './core/db.js';
|
|
84
|
+
// Vector index management
|
|
17
85
|
export { IndexManager } from './index-manager.js';
|
|
18
|
-
export {
|
|
19
|
-
//
|
|
86
|
+
export { VectorIndex } from './core/vector-index.js';
|
|
87
|
+
// Configuration and utilities
|
|
88
|
+
export { config, getModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
|
|
89
|
+
// =============================================================================
|
|
90
|
+
// FILE PROCESSING AND UTILITIES
|
|
91
|
+
// =============================================================================
|
|
92
|
+
// File processing operations
|
|
93
|
+
export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS } from './file-processor.js';
|
|
94
|
+
// Document chunking
|
|
95
|
+
export { chunkDocument } from './core/chunker.js';
|
|
96
|
+
// Path management
|
|
97
|
+
export { DocumentPathManager } from './core/path-manager.js';
|
|
98
|
+
// RAG-lite directory structure management
|
|
99
|
+
export { resolveRagLitePaths, ensureRagLiteStructure, migrateToRagLiteStructure, getStandardRagLitePaths } from './core/raglite-paths.js';
|
|
100
|
+
// =============================================================================
|
|
101
|
+
// ERROR HANDLING
|
|
102
|
+
// =============================================================================
|
|
103
|
+
// Core error handling
|
|
104
|
+
export { handleError, safeExecute, ErrorCategory, ErrorSeverity, createError } from './core/error-handler.js';
|
|
105
|
+
// API-specific errors
|
|
20
106
|
export { APIError, IngestionError, SearchError, ResourceError, ModelCompatibilityError, ErrorFactory, CommonErrors, handleAPIError } from './api-errors.js';
|
|
21
107
|
//# sourceMappingURL=index.js.map
|
package/dist/indexer.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Usage: node indexer.js <path>
|
|
5
5
|
*/
|
|
6
6
|
import { runIngest } from './cli/indexer.js';
|
|
7
|
-
import { EXIT_CODES, ConfigurationError } from './config.js';
|
|
7
|
+
import { EXIT_CODES, ConfigurationError } from './core/config.js';
|
|
8
8
|
async function main() {
|
|
9
9
|
const args = process.argv.slice(2);
|
|
10
10
|
if (args.length === 0) {
|
package/dist/ingestion.d.ts
CHANGED
|
@@ -1,175 +1,65 @@
|
|
|
1
|
-
import { type FileProcessorOptions } from './file-processor.js';
|
|
2
|
-
import { type ChunkConfig } from './chunker.js';
|
|
3
|
-
import { type EmbeddingEngine } from './embedder.js';
|
|
4
|
-
import { Config } from './config.js';
|
|
5
1
|
/**
|
|
6
|
-
*
|
|
2
|
+
* Public API IngestionPipeline - Simple constructor interface with internal factory usage
|
|
3
|
+
*
|
|
4
|
+
* This class provides a clean, simple API while using the new core architecture
|
|
5
|
+
* internally. It handles dependency injection automatically.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* // Simple usage
|
|
10
|
+
* const pipeline = new IngestionPipeline('./db.sqlite', './index.bin');
|
|
11
|
+
* await pipeline.ingestDirectory('./documents');
|
|
12
|
+
*
|
|
13
|
+
* // With options
|
|
14
|
+
* const pipeline = new IngestionPipeline('./db.sqlite', './index.bin', {
|
|
15
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
16
|
+
* chunkSize: 512
|
|
17
|
+
* });
|
|
18
|
+
* ```
|
|
7
19
|
*/
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
20
|
+
import { type TextIngestionOptions } from './factories/index.js';
|
|
21
|
+
import type { IngestionOptions, IngestionResult } from './core/ingestion.js';
|
|
22
|
+
import type { MemoryContentMetadata } from './core/content-manager.js';
|
|
23
|
+
export interface IngestionPipelineOptions extends TextIngestionOptions {
|
|
12
24
|
}
|
|
13
|
-
/**
|
|
14
|
-
* Options for the ingestion pipeline
|
|
15
|
-
*/
|
|
16
|
-
export interface IngestionOptions {
|
|
17
|
-
/** File processing options */
|
|
18
|
-
fileOptions?: FileProcessorOptions;
|
|
19
|
-
/** Chunking configuration */
|
|
20
|
-
chunkConfig?: ChunkConfig;
|
|
21
|
-
/** Whether to force rebuild the index */
|
|
22
|
-
forceRebuild?: boolean;
|
|
23
|
-
}
|
|
24
|
-
/**
|
|
25
|
-
* Result of the ingestion process
|
|
26
|
-
*/
|
|
27
|
-
export interface IngestionResult {
|
|
28
|
-
/** Total documents processed */
|
|
29
|
-
documentsProcessed: number;
|
|
30
|
-
/** Total chunks created */
|
|
31
|
-
chunksCreated: number;
|
|
32
|
-
/** Total embeddings generated */
|
|
33
|
-
embeddingsGenerated: number;
|
|
34
|
-
/** Number of documents that failed processing */
|
|
35
|
-
documentErrors: number;
|
|
36
|
-
/** Number of chunks that failed embedding */
|
|
37
|
-
embeddingErrors: number;
|
|
38
|
-
/** Processing time in milliseconds */
|
|
39
|
-
processingTimeMs: number;
|
|
40
|
-
}
|
|
41
|
-
/**
|
|
42
|
-
* Main ingestion pipeline class
|
|
43
|
-
* Coordinates the entire process from file discovery to vector storage
|
|
44
|
-
*/
|
|
45
25
|
export declare class IngestionPipeline {
|
|
46
|
-
private static instances;
|
|
47
|
-
private static cleanupHandlersSet;
|
|
48
|
-
private db;
|
|
49
|
-
private indexManager;
|
|
50
|
-
private embeddingEngine;
|
|
51
|
-
private pathManager;
|
|
52
|
-
private isInitialized;
|
|
53
26
|
private dbPath;
|
|
54
27
|
private indexPath;
|
|
55
|
-
private
|
|
56
|
-
private
|
|
28
|
+
private options;
|
|
29
|
+
private corePipeline;
|
|
30
|
+
private initPromise;
|
|
31
|
+
private defaultChunkConfig;
|
|
32
|
+
constructor(dbPath: string, indexPath: string, options?: IngestionPipelineOptions);
|
|
57
33
|
/**
|
|
58
|
-
*
|
|
59
|
-
* Pipeline is ready to use immediately without requiring initialization calls (Requirement 1.5)
|
|
60
|
-
* @param basePath - Base directory path for database and index files (defaults to current directory)
|
|
61
|
-
* @param embedder - Pre-initialized embedding engine (optional, will use default if not provided)
|
|
34
|
+
* Initialize the ingestion pipeline using the factory
|
|
62
35
|
*/
|
|
63
|
-
|
|
36
|
+
private initialize;
|
|
64
37
|
/**
|
|
65
|
-
*
|
|
66
|
-
* @param overrides - Configuration overrides to apply
|
|
38
|
+
* Ingest a single document
|
|
67
39
|
*/
|
|
68
|
-
|
|
40
|
+
ingestDocument(filePath: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
69
41
|
/**
|
|
70
|
-
*
|
|
71
|
-
* @param strategy - Path storage strategy ('absolute' or 'relative')
|
|
72
|
-
* @param basePath - Base path for relative paths (optional, defaults to current base path)
|
|
73
|
-
*/
|
|
74
|
-
setPathStorageStrategy(strategy: 'absolute' | 'relative', basePath?: string): void;
|
|
75
|
-
/**
|
|
76
|
-
* Get effective configuration with overrides applied
|
|
77
|
-
*/
|
|
78
|
-
private getEffectiveConfig;
|
|
79
|
-
/**
|
|
80
|
-
* Automatically initialize resources on first use with user-friendly error handling
|
|
81
|
-
* Implements lazy initialization as required by 5.2
|
|
82
|
-
*/
|
|
83
|
-
private ensureInitialized;
|
|
84
|
-
/**
|
|
85
|
-
* Create user-friendly error messages with actionable suggestions
|
|
86
|
-
* Implements requirement 5.3: Clear, actionable error messages with specific next steps
|
|
87
|
-
*/
|
|
88
|
-
private createUserFriendlyError;
|
|
89
|
-
/**
|
|
90
|
-
* Initialize the ingestion pipeline (public method for backward compatibility)
|
|
91
|
-
* Sets up database, index manager, and embedding engine
|
|
92
|
-
*/
|
|
93
|
-
initialize(): Promise<void>;
|
|
94
|
-
/**
|
|
95
|
-
* Ingest documents from a directory (matches README API)
|
|
96
|
-
* Automatically initializes resources on first use (Requirements 2.1, 2.3, 5.2)
|
|
97
|
-
* @param directoryPath - Path to directory containing documents
|
|
98
|
-
* @param options - Optional ingestion configuration
|
|
99
|
-
* @returns Promise resolving to ingestion results
|
|
42
|
+
* Ingest all documents in a directory
|
|
100
43
|
*/
|
|
101
44
|
ingestDirectory(directoryPath: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
102
45
|
/**
|
|
103
|
-
* Ingest
|
|
104
|
-
*
|
|
105
|
-
* @param filePath - Path to the file to ingest
|
|
106
|
-
* @param options - Optional ingestion configuration
|
|
107
|
-
* @returns Promise resolving to ingestion results
|
|
108
|
-
*/
|
|
109
|
-
ingestFile(filePath: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
110
|
-
/**
|
|
111
|
-
* Ingest documents from a path (file or directory)
|
|
112
|
-
* Implements the complete pipeline: file processing → chunking → embedding → storage
|
|
46
|
+
* Ingest content from memory buffer
|
|
47
|
+
* Enables MCP integration and real-time content processing
|
|
113
48
|
*
|
|
114
|
-
*
|
|
115
|
-
*
|
|
116
|
-
*
|
|
117
|
-
*
|
|
118
|
-
* -
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
*
|
|
123
|
-
*/
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Generate embeddings for all chunks with error handling
|
|
127
|
-
* Requirement 3.3: Graceful handling of embedding failures without stopping ingestion
|
|
128
|
-
*/
|
|
129
|
-
private generateEmbeddings;
|
|
130
|
-
/**
|
|
131
|
-
* Store documents and chunks in database with single-threaded writes
|
|
132
|
-
* Requirement 7.5: Single-threaded write processing to avoid SQLite lock contention
|
|
133
|
-
*/
|
|
134
|
-
private storeDocumentsAndChunks;
|
|
135
|
-
/**
|
|
136
|
-
* Update vector index with new embeddings
|
|
137
|
-
*/
|
|
138
|
-
private updateVectorIndex;
|
|
139
|
-
/**
|
|
140
|
-
* Initialize the pipeline for rebuild (skips model compatibility check)
|
|
141
|
-
*/
|
|
142
|
-
private initializeForRebuild;
|
|
143
|
-
/**
|
|
144
|
-
* Rebuild the entire index from scratch
|
|
145
|
-
* Useful when model version changes or for maintenance
|
|
146
|
-
* Automatically initializes resources if needed (Requirement 5.2)
|
|
147
|
-
*/
|
|
148
|
-
rebuildIndex(): Promise<void>;
|
|
149
|
-
/**
|
|
150
|
-
* Get pipeline statistics
|
|
151
|
-
*/
|
|
152
|
-
getStats(): Promise<{
|
|
153
|
-
indexStats: any;
|
|
154
|
-
isInitialized: boolean;
|
|
155
|
-
}>;
|
|
156
|
-
/**
|
|
157
|
-
* Set up automatic cleanup on process exit (Requirement 5.5)
|
|
158
|
-
*/
|
|
159
|
-
private setupAutomaticCleanup;
|
|
49
|
+
* @example
|
|
50
|
+
* ```typescript
|
|
51
|
+
* const pipeline = new IngestionPipeline('./db.sqlite', './index.bin');
|
|
52
|
+
* const contentId = await pipeline.ingestFromMemory(buffer, {
|
|
53
|
+
* displayName: 'uploaded-file.txt',
|
|
54
|
+
* contentType: 'text/plain'
|
|
55
|
+
* });
|
|
56
|
+
* console.log('Content ingested with ID:', contentId);
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
ingestFromMemory(content: Buffer, metadata: MemoryContentMetadata, options?: IngestionOptions): Promise<string>;
|
|
160
60
|
/**
|
|
161
61
|
* Clean up resources
|
|
162
62
|
*/
|
|
163
63
|
cleanup(): Promise<void>;
|
|
164
64
|
}
|
|
165
|
-
/**
|
|
166
|
-
* Convenience function to ingest documents from a path
|
|
167
|
-
* Creates a pipeline instance, runs ingestion, and cleans up
|
|
168
|
-
*/
|
|
169
|
-
export declare function ingestDocuments(path: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
170
|
-
/**
|
|
171
|
-
* Convenience function to rebuild the index
|
|
172
|
-
* Creates a pipeline instance, rebuilds index, and cleans up
|
|
173
|
-
*/
|
|
174
|
-
export declare function rebuildIndex(): Promise<void>;
|
|
175
65
|
//# sourceMappingURL=ingestion.d.ts.map
|