rag-lite-ts 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/cli/indexer.js +25 -6
- package/dist/cli/search.js +3 -3
- package/dist/cli.js +33 -6
- package/dist/core/actionable-error-messages.js +3 -3
- package/dist/core/content-manager.d.ts +0 -8
- package/dist/core/content-manager.js +2 -30
- package/dist/core/database-connection-manager.js +10 -0
- package/dist/core/db.d.ts +0 -32
- package/dist/core/db.js +11 -68
- package/dist/core/embedder-factory.d.ts +0 -22
- package/dist/core/embedder-factory.js +8 -35
- package/dist/core/index.d.ts +3 -3
- package/dist/core/index.js +3 -3
- package/dist/core/ingestion.d.ts +1 -16
- package/dist/core/ingestion.js +1 -30
- package/dist/core/interfaces.d.ts +1 -1
- package/dist/core/interfaces.js +1 -1
- package/dist/core/model-registry.d.ts +0 -4
- package/dist/core/model-registry.js +5 -9
- package/dist/core/search.d.ts +2 -2
- package/dist/core/search.js +2 -2
- package/dist/factories/index.d.ts +11 -29
- package/dist/factories/index.js +12 -29
- package/dist/factories/ingestion-factory.d.ts +200 -0
- package/dist/factories/ingestion-factory.js +475 -0
- package/dist/{core/polymorphic-search-factory.d.ts → factories/search-factory.d.ts} +7 -7
- package/dist/{core/polymorphic-search-factory.js → factories/search-factory.js} +22 -22
- package/dist/index-manager.js +25 -14
- package/dist/index.d.ts +5 -30
- package/dist/index.js +9 -24
- package/dist/indexer.js +5 -2
- package/dist/ingestion.d.ts +2 -4
- package/dist/ingestion.js +2 -2
- package/dist/mcp-server.js +31 -25
- package/dist/search.js +2 -2
- package/dist/text/embedder.d.ts +0 -11
- package/dist/text/embedder.js +11 -22
- package/dist/text/index.d.ts +2 -2
- package/dist/text/index.js +2 -2
- package/dist/text/reranker.d.ts +0 -10
- package/dist/text/reranker.js +10 -33
- package/package.json +7 -3
- package/dist/factories/polymorphic-factory.d.ts +0 -50
- package/dist/factories/polymorphic-factory.js +0 -159
- package/dist/factories/text-factory.d.ts +0 -560
- package/dist/factories/text-factory.js +0 -968
package/dist/core/ingestion.js
CHANGED
|
@@ -59,7 +59,7 @@ export class IngestionPipeline {
|
|
|
59
59
|
* USAGE EXAMPLES:
|
|
60
60
|
* ```typescript
|
|
61
61
|
* // Text-only ingestion pipeline with unified content system
|
|
62
|
-
* const textEmbedFn =
|
|
62
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
63
63
|
* const indexManager = new IndexManager('./index.bin');
|
|
64
64
|
* const db = await openDatabase('./db.sqlite');
|
|
65
65
|
* const contentManager = new ContentManager(db);
|
|
@@ -428,19 +428,6 @@ export class IngestionPipeline {
|
|
|
428
428
|
console.log(`✓ Chunking complete: Created ${totalChunks} chunks from ${documentChunks.length} documents`);
|
|
429
429
|
return { documentChunks, allChunks, totalChunks };
|
|
430
430
|
}
|
|
431
|
-
/**
|
|
432
|
-
* Chunk all documents and organize results (legacy method for backward compatibility)
|
|
433
|
-
* @deprecated Use chunkDocumentsWithContentTypes for multimodal support
|
|
434
|
-
*/
|
|
435
|
-
async chunkDocuments(documents, chunkConfig) {
|
|
436
|
-
const result = await this.chunkDocumentsWithContentTypes(documents, chunkConfig);
|
|
437
|
-
// Convert to legacy format for backward compatibility
|
|
438
|
-
return {
|
|
439
|
-
documentChunks: result.documentChunks,
|
|
440
|
-
allChunks: result.allChunks.map(chunk => chunk.text),
|
|
441
|
-
totalChunks: result.totalChunks
|
|
442
|
-
};
|
|
443
|
-
}
|
|
444
431
|
/**
|
|
445
432
|
* Generate embeddings for all chunks with content-type support
|
|
446
433
|
* Enhanced to handle different content types and pass metadata to embedding function
|
|
@@ -488,15 +475,6 @@ export class IngestionPipeline {
|
|
|
488
475
|
throw new Error(`Embedding generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
489
476
|
}
|
|
490
477
|
}
|
|
491
|
-
/**
|
|
492
|
-
* Generate embeddings for all chunks with error handling (legacy method for backward compatibility)
|
|
493
|
-
* @deprecated Use generateEmbeddingsWithContentTypes for multimodal support
|
|
494
|
-
*/
|
|
495
|
-
async generateEmbeddings(chunkTexts) {
|
|
496
|
-
// Convert to new format for backward compatibility
|
|
497
|
-
const chunks = chunkTexts.map(text => ({ text, contentType: 'text' }));
|
|
498
|
-
return this.generateEmbeddingsWithContentTypes(chunks);
|
|
499
|
-
}
|
|
500
478
|
/**
|
|
501
479
|
* Store documents and chunks in database with content-type support
|
|
502
480
|
* Enhanced to handle content type metadata and multimodal content
|
|
@@ -584,13 +562,6 @@ export class IngestionPipeline {
|
|
|
584
562
|
console.log(`✓ Storage complete: ${documentsStored} documents, ${totalChunksStored} chunks saved to database`);
|
|
585
563
|
return contentIds;
|
|
586
564
|
}
|
|
587
|
-
/**
|
|
588
|
-
* Store documents and chunks in database (legacy method for backward compatibility)
|
|
589
|
-
* @deprecated Use storeDocumentsAndChunksWithContentTypes for multimodal support
|
|
590
|
-
*/
|
|
591
|
-
async storeDocumentsAndChunks(documentChunks, embeddings) {
|
|
592
|
-
await this.storeDocumentsAndChunksWithContentTypes(documentChunks, embeddings);
|
|
593
|
-
}
|
|
594
565
|
/**
|
|
595
566
|
* Update vector index with new embeddings
|
|
596
567
|
*/
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
* });
|
|
36
36
|
*
|
|
37
37
|
* // Factory with custom configuration
|
|
38
|
-
* const ingestion = await
|
|
38
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
39
39
|
* chunkSize: 300,
|
|
40
40
|
* chunkOverlap: 50
|
|
41
41
|
* });
|
package/dist/core/interfaces.js
CHANGED
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
* });
|
|
36
36
|
*
|
|
37
37
|
* // Factory with custom configuration
|
|
38
|
-
* const ingestion = await
|
|
38
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
39
39
|
* chunkSize: 300,
|
|
40
40
|
* chunkOverlap: 50
|
|
41
41
|
* });
|
|
@@ -113,8 +113,4 @@ export declare function getRecommendedBatchSize(modelName: string): number;
|
|
|
113
113
|
* Default model names for different types
|
|
114
114
|
*/
|
|
115
115
|
export declare const DEFAULT_MODELS: Record<ModelType, string>;
|
|
116
|
-
/**
|
|
117
|
-
* Model type mappings for backward compatibility
|
|
118
|
-
*/
|
|
119
|
-
export declare const MODEL_TYPE_ALIASES: Record<string, ModelType>;
|
|
120
116
|
//# sourceMappingURL=model-registry.d.ts.map
|
|
@@ -403,13 +403,9 @@ export const DEFAULT_MODELS = {
|
|
|
403
403
|
'sentence-transformer': 'sentence-transformers/all-MiniLM-L6-v2',
|
|
404
404
|
'clip': 'Xenova/clip-vit-base-patch32'
|
|
405
405
|
};
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
'multimodal': 'clip',
|
|
412
|
-
'sentence': 'sentence-transformer',
|
|
413
|
-
'embedding': 'sentence-transformer'
|
|
414
|
-
};
|
|
406
|
+
// =============================================================================
|
|
407
|
+
// REMOVED IN v3.0.0: MODEL_TYPE_ALIASES
|
|
408
|
+
// =============================================================================
|
|
409
|
+
// Model type aliases have been removed as they were not used anywhere in the codebase.
|
|
410
|
+
// Use ModelType directly: 'sentence-transformer' or 'clip'
|
|
415
411
|
//# sourceMappingURL=model-registry.js.map
|
package/dist/core/search.d.ts
CHANGED
|
@@ -54,8 +54,8 @@ export declare class SearchEngine {
|
|
|
54
54
|
* USAGE EXAMPLES:
|
|
55
55
|
* ```typescript
|
|
56
56
|
* // Text-only search engine
|
|
57
|
-
* const textEmbedFn =
|
|
58
|
-
* const textRerankFn =
|
|
57
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
58
|
+
* const textRerankFn = createTextRerankFunction();
|
|
59
59
|
* const indexManager = new IndexManager('./index.bin');
|
|
60
60
|
* const db = await openDatabase('./db.sqlite');
|
|
61
61
|
* const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
|
package/dist/core/search.js
CHANGED
|
@@ -53,8 +53,8 @@ export class SearchEngine {
|
|
|
53
53
|
* USAGE EXAMPLES:
|
|
54
54
|
* ```typescript
|
|
55
55
|
* // Text-only search engine
|
|
56
|
-
* const textEmbedFn =
|
|
57
|
-
* const textRerankFn =
|
|
56
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
57
|
+
* const textRerankFn = createTextRerankFunction();
|
|
58
58
|
* const indexManager = new IndexManager('./index.bin');
|
|
59
59
|
* const db = await openDatabase('./db.sqlite');
|
|
60
60
|
* const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
|
|
@@ -1,45 +1,27 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Factory exports for creating
|
|
2
|
+
* Factory exports for creating RAG instances
|
|
3
3
|
* Provides convenient factory functions for common use cases
|
|
4
4
|
*
|
|
5
5
|
* This module serves as the main entry point for factory functions that
|
|
6
|
-
* simplify the creation of
|
|
6
|
+
* simplify the creation of search and ingestion systems.
|
|
7
7
|
* The factories handle complex initialization while providing clean APIs.
|
|
8
8
|
*
|
|
9
9
|
* MAIN FACTORY CLASSES:
|
|
10
|
-
* -
|
|
11
|
-
* -
|
|
12
|
-
* - TextRAGFactory: Creates both search and ingestion instances together
|
|
13
|
-
* - TextFactoryHelpers: Utility functions for validation and error recovery
|
|
14
|
-
*
|
|
15
|
-
* CONVENIENCE ALIASES:
|
|
16
|
-
* - SearchFactory: Alias for TextSearchFactory
|
|
17
|
-
* - IngestionFactory: Alias for TextIngestionFactory
|
|
18
|
-
* - RAGFactory: Alias for TextRAGFactory
|
|
10
|
+
* - IngestionFactory: Creates IngestionPipeline instances for document ingestion
|
|
11
|
+
* - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
|
|
19
12
|
*
|
|
20
13
|
* @example
|
|
21
14
|
* ```typescript
|
|
22
|
-
* import {
|
|
23
|
-
*
|
|
24
|
-
* // Create search engine
|
|
25
|
-
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
|
|
15
|
+
* import { IngestionFactory, SearchFactory } from './factories';
|
|
26
16
|
*
|
|
27
17
|
* // Create ingestion pipeline
|
|
28
|
-
* const ingestion = await
|
|
18
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
29
19
|
*
|
|
30
|
-
* //
|
|
31
|
-
*
|
|
32
|
-
* const { searchEngine, ingestionPipeline } = await TextRAGFactory.createBoth(
|
|
33
|
-
* './index.bin',
|
|
34
|
-
* './db.sqlite'
|
|
35
|
-
* );
|
|
20
|
+
* // Create search engine with automatic mode detection
|
|
21
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
36
22
|
* ```
|
|
37
23
|
*/
|
|
38
|
-
export {
|
|
39
|
-
export {
|
|
40
|
-
export type {
|
|
41
|
-
export type { TextSearchOptions, TextIngestionOptions, ContentSystemConfig } from './text-factory.js';
|
|
42
|
-
export { TextSearchFactory as SearchFactory } from './text-factory.js';
|
|
43
|
-
export { TextIngestionFactory as IngestionFactory } from './text-factory.js';
|
|
44
|
-
export { TextRAGFactory as RAGFactory } from './text-factory.js';
|
|
24
|
+
export { IngestionFactory } from './ingestion-factory.js';
|
|
25
|
+
export { SearchFactory } from './search-factory.js';
|
|
26
|
+
export type { IngestionFactoryOptions, ContentSystemConfig } from './ingestion-factory.js';
|
|
45
27
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/factories/index.js
CHANGED
|
@@ -1,46 +1,29 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Factory exports for creating
|
|
2
|
+
* Factory exports for creating RAG instances
|
|
3
3
|
* Provides convenient factory functions for common use cases
|
|
4
4
|
*
|
|
5
5
|
* This module serves as the main entry point for factory functions that
|
|
6
|
-
* simplify the creation of
|
|
6
|
+
* simplify the creation of search and ingestion systems.
|
|
7
7
|
* The factories handle complex initialization while providing clean APIs.
|
|
8
8
|
*
|
|
9
9
|
* MAIN FACTORY CLASSES:
|
|
10
|
-
* -
|
|
11
|
-
* -
|
|
12
|
-
* - TextRAGFactory: Creates both search and ingestion instances together
|
|
13
|
-
* - TextFactoryHelpers: Utility functions for validation and error recovery
|
|
14
|
-
*
|
|
15
|
-
* CONVENIENCE ALIASES:
|
|
16
|
-
* - SearchFactory: Alias for TextSearchFactory
|
|
17
|
-
* - IngestionFactory: Alias for TextIngestionFactory
|
|
18
|
-
* - RAGFactory: Alias for TextRAGFactory
|
|
10
|
+
* - IngestionFactory: Creates IngestionPipeline instances for document ingestion
|
|
11
|
+
* - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
|
|
19
12
|
*
|
|
20
13
|
* @example
|
|
21
14
|
* ```typescript
|
|
22
|
-
* import {
|
|
23
|
-
*
|
|
24
|
-
* // Create search engine
|
|
25
|
-
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
|
|
15
|
+
* import { IngestionFactory, SearchFactory } from './factories';
|
|
26
16
|
*
|
|
27
17
|
* // Create ingestion pipeline
|
|
28
|
-
* const ingestion = await
|
|
18
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
29
19
|
*
|
|
30
|
-
* //
|
|
31
|
-
*
|
|
32
|
-
* const { searchEngine, ingestionPipeline } = await TextRAGFactory.createBoth(
|
|
33
|
-
* './index.bin',
|
|
34
|
-
* './db.sqlite'
|
|
35
|
-
* );
|
|
20
|
+
* // Create search engine with automatic mode detection
|
|
21
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
36
22
|
* ```
|
|
37
23
|
*/
|
|
38
24
|
// Main factory classes
|
|
39
|
-
export {
|
|
40
|
-
// Polymorphic factory for mode
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
export { TextSearchFactory as SearchFactory } from './text-factory.js';
|
|
44
|
-
export { TextIngestionFactory as IngestionFactory } from './text-factory.js';
|
|
45
|
-
export { TextRAGFactory as RAGFactory } from './text-factory.js';
|
|
25
|
+
export { IngestionFactory } from './ingestion-factory.js';
|
|
26
|
+
// Polymorphic search factory (recommended for automatic mode detection)
|
|
27
|
+
// Re-exported from core for convenience
|
|
28
|
+
export { SearchFactory } from './search-factory.js';
|
|
46
29
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory functions for creating text-specific search and ingestion instances
|
|
3
|
+
* Handles complex initialization logic while providing clean API for common use cases
|
|
4
|
+
*
|
|
5
|
+
* FACTORY PATTERN BENEFITS:
|
|
6
|
+
* - Abstracts complex initialization (model loading, database setup, index initialization)
|
|
7
|
+
* - Provides simple API for common use cases while preserving access to dependency injection
|
|
8
|
+
* - Clear validation and error handling without fallback mechanisms
|
|
9
|
+
* - Supports different embedding models and configurations
|
|
10
|
+
* - Enables clean separation between simple usage and advanced customization
|
|
11
|
+
*
|
|
12
|
+
* MODE SELECTION GUIDE:
|
|
13
|
+
* - Text Mode (default): Optimized for text-only content
|
|
14
|
+
* - Uses sentence-transformer models (fast, accurate for text)
|
|
15
|
+
* - Images converted to text descriptions
|
|
16
|
+
* - Best for: document search, text clustering, semantic similarity
|
|
17
|
+
*
|
|
18
|
+
* - Multimodal Mode: Optimized for mixed text/image content
|
|
19
|
+
* - Uses CLIP models (unified embedding space)
|
|
20
|
+
* - True cross-modal search (text finds images, images find text)
|
|
21
|
+
* - Best for: image search, visual QA, multimodal retrieval
|
|
22
|
+
*
|
|
23
|
+
* USAGE PATTERNS:
|
|
24
|
+
*
|
|
25
|
+
* 1. Mode Selection:
|
|
26
|
+
* ```typescript
|
|
27
|
+
* // Text mode (default) - optimized for text-only content
|
|
28
|
+
* const textIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
29
|
+
* mode: 'text',
|
|
30
|
+
* embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* // Multimodal mode - enables cross-modal search
|
|
34
|
+
* const multimodalIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
35
|
+
* mode: 'multimodal',
|
|
36
|
+
* embeddingModel: 'Xenova/clip-vit-base-patch32',
|
|
37
|
+
* rerankingStrategy: 'text-derived'
|
|
38
|
+
* });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
import { IngestionPipeline } from '../core/ingestion.js';
|
|
42
|
+
/**
|
|
43
|
+
* Content system configuration options
|
|
44
|
+
*/
|
|
45
|
+
export interface ContentSystemConfig {
|
|
46
|
+
/** Content directory path (default: '.raglite/content') */
|
|
47
|
+
contentDir?: string;
|
|
48
|
+
/** Maximum file size in bytes (default: 50MB) */
|
|
49
|
+
maxFileSize?: number;
|
|
50
|
+
/** Maximum content directory size in bytes (default: 2GB) */
|
|
51
|
+
maxContentDirSize?: number;
|
|
52
|
+
/** Enable content deduplication (default: true) */
|
|
53
|
+
enableDeduplication?: boolean;
|
|
54
|
+
/** Enable storage tracking (default: true) */
|
|
55
|
+
enableStorageTracking?: boolean;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Options for text ingestion factory
|
|
59
|
+
*/
|
|
60
|
+
export interface IngestionFactoryOptions {
|
|
61
|
+
/** Embedding model name override */
|
|
62
|
+
embeddingModel?: string;
|
|
63
|
+
/** Embedding batch size override */
|
|
64
|
+
batchSize?: number;
|
|
65
|
+
/** Chunk size override */
|
|
66
|
+
chunkSize?: number;
|
|
67
|
+
/** Chunk overlap override */
|
|
68
|
+
chunkOverlap?: number;
|
|
69
|
+
/** Whether to force rebuild the index */
|
|
70
|
+
forceRebuild?: boolean;
|
|
71
|
+
/** Mode for the ingestion pipeline (text or multimodal) */
|
|
72
|
+
mode?: 'text' | 'multimodal';
|
|
73
|
+
/** Reranking strategy for multimodal mode */
|
|
74
|
+
rerankingStrategy?: 'cross-encoder' | 'text-derived' | 'metadata' | 'hybrid' | 'disabled';
|
|
75
|
+
/** Content system configuration */
|
|
76
|
+
contentSystemConfig?: ContentSystemConfig;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Factory for creating text-based IngestionPipeline instances
|
|
80
|
+
* Handles model loading, database initialization, and index setup
|
|
81
|
+
*
|
|
82
|
+
* This factory abstracts the complex initialization process required for text ingestion:
|
|
83
|
+
* 1. Creates necessary directories if they don't exist
|
|
84
|
+
* 2. Validates mode-model compatibility (no fallback mechanisms)
|
|
85
|
+
* 3. Loads and validates embedding models with clear error reporting
|
|
86
|
+
* 4. Establishes database connections and initializes schema
|
|
87
|
+
* 5. Stores mode configuration in database for automatic detection
|
|
88
|
+
* 6. Creates or loads vector indexes with proper configuration
|
|
89
|
+
* 7. Creates IngestionPipeline with proper dependency injection
|
|
90
|
+
*
|
|
91
|
+
* Mode Configuration:
|
|
92
|
+
* - Text Mode (default): Uses sentence-transformer models for text-only content
|
|
93
|
+
* - Multimodal Mode: Uses CLIP models for mixed text/image content
|
|
94
|
+
* - Mode is stored in database and auto-detected during search
|
|
95
|
+
* - Clear validation prevents mode-model mismatches
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
* ```typescript
|
|
99
|
+
* // Basic usage
|
|
100
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
101
|
+
* await ingestion.ingestDirectory('./documents');
|
|
102
|
+
*
|
|
103
|
+
* // With custom configuration
|
|
104
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
105
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
106
|
+
* chunkSize: 512,
|
|
107
|
+
* chunkOverlap: 50,
|
|
108
|
+
* forceRebuild: true
|
|
109
|
+
* });
|
|
110
|
+
*
|
|
111
|
+
* // With defaults
|
|
112
|
+
* const ingestion = await IngestionFactory.createWithDefaults({
|
|
113
|
+
* batchSize: 32 // Faster processing
|
|
114
|
+
* });
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
117
|
+
export declare class IngestionFactory {
|
|
118
|
+
/**
|
|
119
|
+
* Create an IngestionPipeline configured for text ingestion
|
|
120
|
+
*
|
|
121
|
+
* This method handles the complete initialization process:
|
|
122
|
+
* - Creates necessary directories if they don't exist
|
|
123
|
+
* - Loads text embedding model (with lazy initialization)
|
|
124
|
+
* - Opens database connection and initializes schema
|
|
125
|
+
* - Creates or loads vector index (with force rebuild option)
|
|
126
|
+
* - Creates IngestionPipeline with dependency injection
|
|
127
|
+
* - Validates the complete setup
|
|
128
|
+
*
|
|
129
|
+
* @param dbPath - Path to the SQLite database file (will be created if doesn't exist)
|
|
130
|
+
* @param indexPath - Path to the vector index file (will be created if doesn't exist)
|
|
131
|
+
* @param options - Optional configuration overrides
|
|
132
|
+
* @param options.embeddingModel - Override embedding model (default: from config)
|
|
133
|
+
* @param options.batchSize - Override embedding batch size (default: from config)
|
|
134
|
+
* @param options.chunkSize - Override chunk size (default: from config)
|
|
135
|
+
* @param options.chunkOverlap - Override chunk overlap (default: from config)
|
|
136
|
+
* @param options.forceRebuild - Force rebuild of existing index (default: false)
|
|
137
|
+
* @param options.contentSystemConfig - Content system configuration options
|
|
138
|
+
* @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
|
|
139
|
+
* @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
|
|
140
|
+
* @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
|
|
141
|
+
* @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
|
|
142
|
+
* @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
|
|
143
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
144
|
+
* @throws {Error} If initialization fails
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* ```typescript
|
|
148
|
+
* // Create ingestion pipeline with default content system
|
|
149
|
+
* const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin');
|
|
150
|
+
*
|
|
151
|
+
* // Create with custom content system configuration
|
|
152
|
+
* const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin', {
|
|
153
|
+
* contentSystemConfig: {
|
|
154
|
+
* contentDir: './custom-content',
|
|
155
|
+
* maxFileSize: 100 * 1024 * 1024, // 100MB
|
|
156
|
+
* maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
|
|
157
|
+
* enableDeduplication: true
|
|
158
|
+
* }
|
|
159
|
+
* });
|
|
160
|
+
*
|
|
161
|
+
* // Ingest documents from directory
|
|
162
|
+
* const result = await ingestion.ingestDirectory('./documents');
|
|
163
|
+
* console.log(`Processed ${result.documentsProcessed} documents`);
|
|
164
|
+
*
|
|
165
|
+
* // Ingest content from memory (MCP integration)
|
|
166
|
+
* const contentId = await ingestion.ingestFromMemory(buffer, {
|
|
167
|
+
* displayName: 'uploaded-file.pdf',
|
|
168
|
+
* contentType: 'application/pdf'
|
|
169
|
+
* });
|
|
170
|
+
*
|
|
171
|
+
* // Clean up when done
|
|
172
|
+
* await ingestion.cleanup();
|
|
173
|
+
* ```
|
|
174
|
+
*/
|
|
175
|
+
static create(dbPath: string, indexPath: string, options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
|
|
176
|
+
/**
|
|
177
|
+
* Create an IngestionPipeline with automatic path resolution
|
|
178
|
+
* Uses default paths based on current working directory
|
|
179
|
+
* @param options - Optional configuration overrides
|
|
180
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
181
|
+
*/
|
|
182
|
+
static createWithDefaults(options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
|
|
183
|
+
/**
|
|
184
|
+
* Handles mode storage during ingestion
|
|
185
|
+
* Creates or validates system info based on the provided mode and options
|
|
186
|
+
* @private
|
|
187
|
+
*/
|
|
188
|
+
private static handleModeStorage;
|
|
189
|
+
/**
|
|
190
|
+
* Updates system info in the database
|
|
191
|
+
* @private
|
|
192
|
+
*/
|
|
193
|
+
private static updateSystemInfo;
|
|
194
|
+
/**
|
|
195
|
+
* Validates and prepares content system configuration
|
|
196
|
+
* @private
|
|
197
|
+
*/
|
|
198
|
+
private static validateAndPrepareContentSystemConfig;
|
|
199
|
+
}
|
|
200
|
+
//# sourceMappingURL=ingestion-factory.d.ts.map
|