rag-lite-ts 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +651 -109
- package/dist/cli/indexer.js +262 -46
- package/dist/cli/search.js +54 -32
- package/dist/cli.js +185 -28
- package/dist/config.d.ts +34 -73
- package/dist/config.js +50 -255
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/adapters.d.ts +93 -0
- package/dist/core/adapters.js +139 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/chunker.d.ts +119 -0
- package/dist/core/chunker.js +73 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.d.ts +102 -0
- package/dist/core/config.js +247 -0
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +245 -0
- package/dist/core/db.js +952 -0
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
- package/dist/{error-handler.js → core/error-handler.js} +51 -8
- package/dist/core/index.d.ts +59 -0
- package/dist/core/index.js +69 -0
- package/dist/core/ingestion.d.ts +213 -0
- package/dist/core/ingestion.js +812 -0
- package/dist/core/interfaces.d.ts +408 -0
- package/dist/core/interfaces.js +106 -0
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
- package/dist/{path-manager.js → core/path-manager.js} +5 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search-pipeline.d.ts +111 -0
- package/dist/core/search-pipeline.js +287 -0
- package/dist/core/search.d.ts +131 -0
- package/dist/core/search.js +296 -0
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +66 -0
- package/dist/core/types.js +6 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
- package/dist/{vector-index.js → core/vector-index.js} +21 -3
- package/dist/dom-polyfills.d.ts +6 -0
- package/dist/dom-polyfills.js +40 -0
- package/dist/factories/index.d.ts +43 -0
- package/dist/factories/index.js +44 -0
- package/dist/factories/text-factory.d.ts +560 -0
- package/dist/factories/text-factory.js +968 -0
- package/dist/file-processor.d.ts +90 -4
- package/dist/file-processor.js +723 -20
- package/dist/index-manager.d.ts +3 -2
- package/dist/index-manager.js +13 -11
- package/dist/index.d.ts +72 -8
- package/dist/index.js +102 -16
- package/dist/indexer.js +1 -1
- package/dist/ingestion.d.ts +44 -154
- package/dist/ingestion.js +75 -671
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1186 -79
- package/dist/multimodal/clip-embedder.d.ts +314 -0
- package/dist/multimodal/clip-embedder.js +945 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/preprocess.js +1 -1
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search-standalone.js +1 -1
- package/dist/search.d.ts +51 -69
- package/dist/search.js +117 -412
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +33 -0
- package/dist/{chunker.js → text/chunker.js} +98 -75
- package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
- package/dist/{embedder.js → text/embedder.js} +84 -10
- package/dist/text/index.d.ts +8 -0
- package/dist/text/index.js +9 -0
- package/dist/text/preprocessors/index.d.ts +17 -0
- package/dist/text/preprocessors/index.js +38 -0
- package/dist/text/preprocessors/mdx.d.ts +25 -0
- package/dist/text/preprocessors/mdx.js +101 -0
- package/dist/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/text/preprocessors/mermaid.js +330 -0
- package/dist/text/preprocessors/registry.d.ts +56 -0
- package/dist/text/preprocessors/registry.js +180 -0
- package/dist/text/reranker.d.ts +59 -0
- package/dist/{reranker.js → text/reranker.js} +138 -53
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
- package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
- package/dist/types.d.ts +40 -1
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +16 -4
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/chunker.d.ts +0 -47
- package/dist/chunker.d.ts.map +0 -1
- package/dist/chunker.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/db.d.ts +0 -90
- package/dist/db.d.ts.map +0 -1
- package/dist/db.js +0 -340
- package/dist/db.js.map +0 -1
- package/dist/embedder.d.ts.map +0 -1
- package/dist/embedder.js.map +0 -1
- package/dist/error-handler.d.ts.map +0 -1
- package/dist/error-handler.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/path-manager.d.ts.map +0 -1
- package/dist/path-manager.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/reranker.d.ts +0 -40
- package/dist/reranker.d.ts.map +0 -1
- package/dist/reranker.js.map +0 -1
- package/dist/resource-manager-demo.d.ts +0 -7
- package/dist/resource-manager-demo.d.ts.map +0 -1
- package/dist/resource-manager-demo.js +0 -52
- package/dist/resource-manager-demo.js.map +0 -1
- package/dist/resource-manager.d.ts +0 -129
- package/dist/resource-manager.d.ts.map +0 -1
- package/dist/resource-manager.js +0 -389
- package/dist/resource-manager.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/tokenizer.d.ts.map +0 -1
- package/dist/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
- package/dist/vector-index.d.ts.map +0 -1
- package/dist/vector-index.js.map +0 -1
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Configuration for chunking behavior
|
|
7
|
+
*/
|
|
8
|
+
export interface ChunkConfig {
|
|
9
|
+
/** Target chunk size in tokens (200-300 recommended) */
|
|
10
|
+
chunkSize: number;
|
|
11
|
+
/** Overlap between chunks in tokens (50 recommended) */
|
|
12
|
+
chunkOverlap: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Generic document interface that can represent different content types
|
|
16
|
+
*/
|
|
17
|
+
export interface GenericDocument {
|
|
18
|
+
/** Source path or identifier */
|
|
19
|
+
source: string;
|
|
20
|
+
/** Document title */
|
|
21
|
+
title: string;
|
|
22
|
+
/** Content (text, image path, etc.) */
|
|
23
|
+
content: string;
|
|
24
|
+
/** Content type identifier (text, image, etc.) */
|
|
25
|
+
contentType: string;
|
|
26
|
+
/** Optional metadata for the document */
|
|
27
|
+
metadata?: Record<string, any>;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Generic chunk interface that can represent different content types
|
|
31
|
+
*/
|
|
32
|
+
export interface GenericChunk {
|
|
33
|
+
/** The content of the chunk (text, image path, etc.) */
|
|
34
|
+
content: string;
|
|
35
|
+
/** Content type identifier (text, image, etc.) */
|
|
36
|
+
contentType: string;
|
|
37
|
+
/** Index of this chunk within the document */
|
|
38
|
+
chunkIndex: number;
|
|
39
|
+
/** Optional metadata for the chunk */
|
|
40
|
+
metadata?: Record<string, any>;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Strategy interface for chunking different content types
|
|
44
|
+
*/
|
|
45
|
+
export interface ChunkingStrategy {
|
|
46
|
+
/**
|
|
47
|
+
* Check if this strategy applies to the given content type
|
|
48
|
+
*/
|
|
49
|
+
appliesTo(contentType: string): boolean;
|
|
50
|
+
/**
|
|
51
|
+
* Chunk a document using this strategy
|
|
52
|
+
*/
|
|
53
|
+
chunk(document: GenericDocument, config: ChunkConfig): Promise<GenericChunk[]>;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Registry for chunking strategies
|
|
57
|
+
*/
|
|
58
|
+
export declare class ChunkingStrategyRegistry {
|
|
59
|
+
private strategies;
|
|
60
|
+
/**
|
|
61
|
+
* Register a chunking strategy
|
|
62
|
+
*/
|
|
63
|
+
register(strategy: ChunkingStrategy): void;
|
|
64
|
+
/**
|
|
65
|
+
* Find the appropriate strategy for a content type
|
|
66
|
+
*/
|
|
67
|
+
findStrategy(contentType: string): ChunkingStrategy | undefined;
|
|
68
|
+
/**
|
|
69
|
+
* Get all registered strategies
|
|
70
|
+
*/
|
|
71
|
+
getStrategies(): ChunkingStrategy[];
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Default chunking configuration
|
|
75
|
+
*/
|
|
76
|
+
export declare const DEFAULT_CHUNK_CONFIG: ChunkConfig;
|
|
77
|
+
/**
|
|
78
|
+
* Global chunking strategy registry
|
|
79
|
+
*/
|
|
80
|
+
export declare const chunkingRegistry: ChunkingStrategyRegistry;
|
|
81
|
+
/**
|
|
82
|
+
* Generic chunking function that uses registered strategies
|
|
83
|
+
*/
|
|
84
|
+
export declare function chunkGenericDocument(document: GenericDocument, config?: ChunkConfig): Promise<GenericChunk[]>;
|
|
85
|
+
/**
|
|
86
|
+
* Document interface for text chunking
|
|
87
|
+
*/
|
|
88
|
+
export interface Document {
|
|
89
|
+
/** Source path or identifier */
|
|
90
|
+
source: string;
|
|
91
|
+
/** Document title */
|
|
92
|
+
title: string;
|
|
93
|
+
/** Full text content */
|
|
94
|
+
content: string;
|
|
95
|
+
/** Optional metadata */
|
|
96
|
+
metadata?: Record<string, any>;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Chunk interface for text chunking results
|
|
100
|
+
*/
|
|
101
|
+
export interface Chunk {
|
|
102
|
+
/** The text content of the chunk */
|
|
103
|
+
text: string;
|
|
104
|
+
/** Index of this chunk within the document */
|
|
105
|
+
chunkIndex: number;
|
|
106
|
+
/** Number of tokens in this chunk */
|
|
107
|
+
tokenCount: number;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Text document chunking function
|
|
111
|
+
* Uses the text chunking strategy from the text implementation layer
|
|
112
|
+
*/
|
|
113
|
+
export declare function chunkDocument(document: Document, config?: ChunkConfig): Promise<Chunk[]>;
|
|
114
|
+
/**
|
|
115
|
+
* Register the text chunking strategy with the global registry
|
|
116
|
+
* This should be called during application initialization
|
|
117
|
+
*/
|
|
118
|
+
export declare function registerTextChunkingStrategy(): Promise<void>;
|
|
119
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Registry for chunking strategies
|
|
7
|
+
*/
|
|
8
|
+
export class ChunkingStrategyRegistry {
|
|
9
|
+
strategies = [];
|
|
10
|
+
/**
|
|
11
|
+
* Register a chunking strategy
|
|
12
|
+
*/
|
|
13
|
+
register(strategy) {
|
|
14
|
+
this.strategies.push(strategy);
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Find the appropriate strategy for a content type
|
|
18
|
+
*/
|
|
19
|
+
findStrategy(contentType) {
|
|
20
|
+
return this.strategies.find(strategy => strategy.appliesTo(contentType));
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Get all registered strategies
|
|
24
|
+
*/
|
|
25
|
+
getStrategies() {
|
|
26
|
+
return [...this.strategies];
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Default chunking configuration
|
|
31
|
+
*/
|
|
32
|
+
export const DEFAULT_CHUNK_CONFIG = {
|
|
33
|
+
chunkSize: 250, // Target 200-300 tokens
|
|
34
|
+
chunkOverlap: 50
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Global chunking strategy registry
|
|
38
|
+
*/
|
|
39
|
+
export const chunkingRegistry = new ChunkingStrategyRegistry();
|
|
40
|
+
/**
|
|
41
|
+
* Generic chunking function that uses registered strategies
|
|
42
|
+
*/
|
|
43
|
+
export async function chunkGenericDocument(document, config = DEFAULT_CHUNK_CONFIG) {
|
|
44
|
+
const strategy = chunkingRegistry.findStrategy(document.contentType);
|
|
45
|
+
if (!strategy) {
|
|
46
|
+
throw new Error(`No chunking strategy found for content type: ${document.contentType}`);
|
|
47
|
+
}
|
|
48
|
+
return strategy.chunk(document, config);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Text document chunking function
|
|
52
|
+
* Uses the text chunking strategy from the text implementation layer
|
|
53
|
+
*/
|
|
54
|
+
export async function chunkDocument(document, config = DEFAULT_CHUNK_CONFIG) {
|
|
55
|
+
// Import the text chunker implementation dynamically to avoid circular dependencies
|
|
56
|
+
const { chunkDocument: textChunkDocument } = await import('../text/chunker.js');
|
|
57
|
+
return textChunkDocument(document, config);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Register the text chunking strategy with the global registry
|
|
61
|
+
* This should be called during application initialization
|
|
62
|
+
*/
|
|
63
|
+
export async function registerTextChunkingStrategy() {
|
|
64
|
+
const { TextChunkingStrategy } = await import('../text/chunker.js');
|
|
65
|
+
const textStrategy = new TextChunkingStrategy();
|
|
66
|
+
chunkingRegistry.register(textStrategy);
|
|
67
|
+
}
|
|
68
|
+
// Auto-register the text strategy when this module is loaded
|
|
69
|
+
// This ensures text chunking works out of the box
|
|
70
|
+
registerTextChunkingStrategy().catch(error => {
|
|
71
|
+
console.warn('Failed to register text chunking strategy:', error);
|
|
72
|
+
});
|
|
73
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI Database Utilities - Database access helpers for CLI commands
|
|
3
|
+
* Provides database locking detection and retry mechanisms for CLI operations
|
|
4
|
+
* Prevents conflicts between CLI commands and long-running processes like MCP server
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* CLI-specific database access options
|
|
8
|
+
*/
|
|
9
|
+
export interface CLIDatabaseOptions {
|
|
10
|
+
/** Maximum time to wait for database access (ms) */
|
|
11
|
+
maxWaitMs?: number;
|
|
12
|
+
/** Retry interval (ms) */
|
|
13
|
+
retryIntervalMs?: number;
|
|
14
|
+
/** Show progress messages to user */
|
|
15
|
+
showProgress?: boolean;
|
|
16
|
+
/** Command name for better error messages */
|
|
17
|
+
commandName?: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Wait for database to become available for CLI operations
|
|
21
|
+
* Provides user-friendly progress messages and error handling
|
|
22
|
+
*/
|
|
23
|
+
export declare function waitForCLIDatabaseAccess(dbPath: string, options?: CLIDatabaseOptions): Promise<void>;
|
|
24
|
+
/**
|
|
25
|
+
* Execute a CLI operation with database access protection
|
|
26
|
+
* Automatically handles database locking and provides user feedback
|
|
27
|
+
*/
|
|
28
|
+
export declare function withCLIDatabaseAccess<T>(dbPath: string, operation: () => Promise<T>, options?: CLIDatabaseOptions): Promise<T>;
|
|
29
|
+
/**
|
|
30
|
+
* Check if database is currently busy (non-blocking)
|
|
31
|
+
* Useful for showing warnings or status information
|
|
32
|
+
*/
|
|
33
|
+
export declare function isDatabaseBusy(dbPath: string): Promise<{
|
|
34
|
+
isBusy: boolean;
|
|
35
|
+
reason?: string;
|
|
36
|
+
suggestions?: string[];
|
|
37
|
+
}>;
|
|
38
|
+
/**
|
|
39
|
+
* Show database status information for debugging
|
|
40
|
+
* Useful for troubleshooting CLI issues
|
|
41
|
+
*/
|
|
42
|
+
export declare function showDatabaseStatus(dbPath: string): Promise<void>;
|
|
43
|
+
/**
|
|
44
|
+
* Force cleanup of database connections (emergency use only)
|
|
45
|
+
* Use with caution - only for recovery from stuck states
|
|
46
|
+
*/
|
|
47
|
+
export declare function forceCleanupDatabase(dbPath: string): Promise<void>;
|
|
48
|
+
/**
|
|
49
|
+
* Graceful shutdown helper for CLI commands
|
|
50
|
+
* Ensures proper cleanup when CLI commands are interrupted
|
|
51
|
+
*/
|
|
52
|
+
export declare function setupCLICleanup(dbPath?: string): void;
|
|
53
|
+
//# sourceMappingURL=cli-database-utils.d.ts.map
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI Database Utilities - Database access helpers for CLI commands
|
|
3
|
+
* Provides database locking detection and retry mechanisms for CLI operations
|
|
4
|
+
* Prevents conflicts between CLI commands and long-running processes like MCP server
|
|
5
|
+
*/
|
|
6
|
+
import { DatabaseConnectionManager } from './database-connection-manager.js';
|
|
7
|
+
import { existsSync } from 'fs';
|
|
8
|
+
/**
|
|
9
|
+
* Default options for CLI database access
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULT_CLI_OPTIONS = {
|
|
12
|
+
maxWaitMs: 10000, // 10 seconds
|
|
13
|
+
retryIntervalMs: 500, // 0.5 seconds
|
|
14
|
+
showProgress: true,
|
|
15
|
+
commandName: 'CLI command'
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Wait for database to become available for CLI operations
|
|
19
|
+
* Provides user-friendly progress messages and error handling
|
|
20
|
+
*/
|
|
21
|
+
export async function waitForCLIDatabaseAccess(dbPath, options = {}) {
|
|
22
|
+
const opts = { ...DEFAULT_CLI_OPTIONS, ...options };
|
|
23
|
+
// Check if database file exists
|
|
24
|
+
if (!existsSync(dbPath)) {
|
|
25
|
+
throw new Error(`Database file not found: ${dbPath}\n` +
|
|
26
|
+
`Please run 'raglite ingest <path>' first to create the database.`);
|
|
27
|
+
}
|
|
28
|
+
const startTime = Date.now();
|
|
29
|
+
let attempts = 0;
|
|
30
|
+
let lastError = null;
|
|
31
|
+
while (Date.now() - startTime < opts.maxWaitMs) {
|
|
32
|
+
attempts++;
|
|
33
|
+
try {
|
|
34
|
+
// Try to get database access
|
|
35
|
+
await DatabaseConnectionManager.waitForDatabaseAccess(dbPath, 1000);
|
|
36
|
+
if (opts.showProgress && attempts > 1) {
|
|
37
|
+
console.log(`✅ Database is now available (after ${attempts} attempts)`);
|
|
38
|
+
}
|
|
39
|
+
return; // Success!
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
lastError = error;
|
|
43
|
+
if (lastError.message.includes('SQLITE_BUSY') || lastError.message.includes('busy')) {
|
|
44
|
+
// Database is busy - show progress and retry
|
|
45
|
+
if (opts.showProgress) {
|
|
46
|
+
if (attempts === 1) {
|
|
47
|
+
console.log(`⏳ Database is busy, waiting for access...`);
|
|
48
|
+
console.log(` This usually happens when another process is using the database.`);
|
|
49
|
+
console.log(` Common causes:`);
|
|
50
|
+
console.log(` • MCP server is running`);
|
|
51
|
+
console.log(` • Another CLI command is in progress`);
|
|
52
|
+
console.log(` • Long-running ingestion process`);
|
|
53
|
+
console.log('');
|
|
54
|
+
}
|
|
55
|
+
else if (attempts % 4 === 0) {
|
|
56
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
57
|
+
console.log(` Still waiting... (${elapsed}s elapsed, attempt ${attempts})`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// Wait before retrying
|
|
61
|
+
await new Promise(resolve => setTimeout(resolve, opts.retryIntervalMs));
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
// Other error - don't retry
|
|
66
|
+
throw new Error(`Failed to access database: ${lastError.message}\n` +
|
|
67
|
+
`Please check that the database file is not corrupted and you have proper permissions.`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Timeout reached
|
|
72
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
73
|
+
throw new Error(`Database is still busy after ${elapsed} seconds.\n` +
|
|
74
|
+
`\n` +
|
|
75
|
+
`This might be because:\n` +
|
|
76
|
+
`• Another process is using the database (MCP server, long ingestion, etc.)\n` +
|
|
77
|
+
`• The database is locked due to an interrupted operation\n` +
|
|
78
|
+
`\n` +
|
|
79
|
+
`Solutions:\n` +
|
|
80
|
+
`• Wait for other operations to complete\n` +
|
|
81
|
+
`• Stop the MCP server if running\n` +
|
|
82
|
+
`• Restart your terminal/process\n` +
|
|
83
|
+
`• As a last resort, restart your computer\n` +
|
|
84
|
+
`\n` +
|
|
85
|
+
`Last error: ${lastError?.message || 'Unknown error'}`);
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Execute a CLI operation with database access protection
|
|
89
|
+
* Automatically handles database locking and provides user feedback
|
|
90
|
+
*/
|
|
91
|
+
export async function withCLIDatabaseAccess(dbPath, operation, options = {}) {
|
|
92
|
+
const opts = { ...DEFAULT_CLI_OPTIONS, ...options };
|
|
93
|
+
try {
|
|
94
|
+
// Wait for database access
|
|
95
|
+
await waitForCLIDatabaseAccess(dbPath, opts);
|
|
96
|
+
// Execute the operation
|
|
97
|
+
return await operation();
|
|
98
|
+
}
|
|
99
|
+
catch (error) {
|
|
100
|
+
if (error instanceof Error) {
|
|
101
|
+
// Enhance error message with CLI context
|
|
102
|
+
const enhancedMessage = `${opts.commandName} failed: ${error.message}\n` +
|
|
103
|
+
`\n` +
|
|
104
|
+
`If this error persists:\n` +
|
|
105
|
+
`• Check that no other RAG-lite processes are running\n` +
|
|
106
|
+
`• Verify database file permissions\n` +
|
|
107
|
+
`• Try running the command again\n`;
|
|
108
|
+
throw new Error(enhancedMessage);
|
|
109
|
+
}
|
|
110
|
+
throw error;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Check if database is currently busy (non-blocking)
|
|
115
|
+
* Useful for showing warnings or status information
|
|
116
|
+
*/
|
|
117
|
+
export async function isDatabaseBusy(dbPath) {
|
|
118
|
+
try {
|
|
119
|
+
await DatabaseConnectionManager.waitForDatabaseAccess(dbPath, 100);
|
|
120
|
+
return { isBusy: false };
|
|
121
|
+
}
|
|
122
|
+
catch (error) {
|
|
123
|
+
if (error instanceof Error && (error.message.includes('SQLITE_BUSY') ||
|
|
124
|
+
error.message.includes('busy'))) {
|
|
125
|
+
return {
|
|
126
|
+
isBusy: true,
|
|
127
|
+
reason: 'Database is currently in use by another process',
|
|
128
|
+
suggestions: [
|
|
129
|
+
'Wait for other operations to complete',
|
|
130
|
+
'Stop MCP server if running',
|
|
131
|
+
'Check for other CLI commands in progress'
|
|
132
|
+
]
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
return {
|
|
136
|
+
isBusy: true,
|
|
137
|
+
reason: `Database access error: ${error instanceof Error ? error.message : String(error)}`,
|
|
138
|
+
suggestions: [
|
|
139
|
+
'Check database file permissions',
|
|
140
|
+
'Verify database file is not corrupted',
|
|
141
|
+
'Ensure you have read/write access to the database directory'
|
|
142
|
+
]
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Show database status information for debugging
|
|
148
|
+
* Useful for troubleshooting CLI issues
|
|
149
|
+
*/
|
|
150
|
+
export async function showDatabaseStatus(dbPath) {
|
|
151
|
+
console.log(`📊 Database Status: ${dbPath}`);
|
|
152
|
+
console.log('');
|
|
153
|
+
// Check file existence
|
|
154
|
+
if (!existsSync(dbPath)) {
|
|
155
|
+
console.log('❌ Database file does not exist');
|
|
156
|
+
console.log(' Run "raglite ingest <path>" to create the database');
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
// Check file stats
|
|
160
|
+
try {
|
|
161
|
+
const fs = await import('fs');
|
|
162
|
+
const stats = fs.statSync(dbPath);
|
|
163
|
+
console.log(`📁 File size: ${(stats.size / 1024).toFixed(1)} KB`);
|
|
164
|
+
console.log(`📅 Last modified: ${stats.mtime.toLocaleString()}`);
|
|
165
|
+
}
|
|
166
|
+
catch (error) {
|
|
167
|
+
console.log(`⚠️ Cannot read file stats: ${error}`);
|
|
168
|
+
}
|
|
169
|
+
// Check database access
|
|
170
|
+
const busyStatus = await isDatabaseBusy(dbPath);
|
|
171
|
+
if (busyStatus.isBusy) {
|
|
172
|
+
console.log(`🔒 Status: BUSY`);
|
|
173
|
+
console.log(` Reason: ${busyStatus.reason}`);
|
|
174
|
+
if (busyStatus.suggestions) {
|
|
175
|
+
console.log(' Suggestions:');
|
|
176
|
+
busyStatus.suggestions.forEach(suggestion => {
|
|
177
|
+
console.log(` • ${suggestion}`);
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
console.log(`✅ Status: AVAILABLE`);
|
|
183
|
+
}
|
|
184
|
+
// Show connection manager stats
|
|
185
|
+
const connectionStats = DatabaseConnectionManager.getConnectionStats();
|
|
186
|
+
if (connectionStats.totalConnections > 0) {
|
|
187
|
+
console.log('');
|
|
188
|
+
console.log(`🔗 Active connections: ${connectionStats.totalConnections}`);
|
|
189
|
+
connectionStats.connections.forEach((conn, index) => {
|
|
190
|
+
console.log(` ${index + 1}. ${conn.path}`);
|
|
191
|
+
console.log(` References: ${conn.refCount}`);
|
|
192
|
+
console.log(` Last accessed: ${conn.lastAccessed.toLocaleString()}`);
|
|
193
|
+
console.log(` Idle time: ${(conn.idleTime / 1000).toFixed(1)}s`);
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
console.log('');
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Force cleanup of database connections (emergency use only)
|
|
200
|
+
* Use with caution - only for recovery from stuck states
|
|
201
|
+
*/
|
|
202
|
+
export async function forceCleanupDatabase(dbPath) {
|
|
203
|
+
console.log(`🚨 Force cleaning up database connections: ${dbPath}`);
|
|
204
|
+
try {
|
|
205
|
+
await DatabaseConnectionManager.forceCloseConnection(dbPath);
|
|
206
|
+
console.log('✅ Force cleanup completed');
|
|
207
|
+
}
|
|
208
|
+
catch (error) {
|
|
209
|
+
console.log(`⚠️ Force cleanup failed: ${error}`);
|
|
210
|
+
console.log('You may need to restart the process or reboot your system');
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Graceful shutdown helper for CLI commands
|
|
215
|
+
* Ensures proper cleanup when CLI commands are interrupted
|
|
216
|
+
*/
|
|
217
|
+
export function setupCLICleanup(dbPath) {
|
|
218
|
+
const cleanup = async () => {
|
|
219
|
+
console.log('\n🛑 Shutting down gracefully...');
|
|
220
|
+
if (dbPath) {
|
|
221
|
+
try {
|
|
222
|
+
await DatabaseConnectionManager.releaseConnection(dbPath);
|
|
223
|
+
}
|
|
224
|
+
catch (error) {
|
|
225
|
+
// Ignore cleanup errors during shutdown
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
await DatabaseConnectionManager.closeAllConnections();
|
|
229
|
+
process.exit(0);
|
|
230
|
+
};
|
|
231
|
+
// Only set up handlers if they haven't been set up already
|
|
232
|
+
if (!process.listenerCount('SIGINT')) {
|
|
233
|
+
process.on('SIGINT', cleanup);
|
|
234
|
+
}
|
|
235
|
+
if (!process.listenerCount('SIGTERM')) {
|
|
236
|
+
process.on('SIGTERM', cleanup);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
//# sourceMappingURL=cli-database-utils.js.map
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Core configuration interface - model-agnostic settings
|
|
7
|
+
*/
|
|
8
|
+
export interface CoreConfig {
|
|
9
|
+
chunk_size: number;
|
|
10
|
+
chunk_overlap: number;
|
|
11
|
+
batch_size: number;
|
|
12
|
+
top_k: number;
|
|
13
|
+
db_file: string;
|
|
14
|
+
index_file: string;
|
|
15
|
+
model_cache_path?: string;
|
|
16
|
+
path_storage_strategy: 'absolute' | 'relative';
|
|
17
|
+
embedding_model: string;
|
|
18
|
+
rerank_enabled: boolean;
|
|
19
|
+
preprocessing?: any;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Extensible configuration interface that can be extended by implementations
|
|
23
|
+
*/
|
|
24
|
+
export interface ExtensibleConfig<T = {}> extends CoreConfig {
|
|
25
|
+
implementation: T;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Standard exit codes for different error conditions
|
|
29
|
+
*/
|
|
30
|
+
export declare const EXIT_CODES: {
|
|
31
|
+
readonly SUCCESS: 0;
|
|
32
|
+
readonly GENERAL_ERROR: 1;
|
|
33
|
+
readonly INVALID_ARGUMENTS: 2;
|
|
34
|
+
readonly CONFIGURATION_ERROR: 3;
|
|
35
|
+
readonly FILE_NOT_FOUND: 4;
|
|
36
|
+
readonly DATABASE_ERROR: 5;
|
|
37
|
+
readonly MODEL_ERROR: 6;
|
|
38
|
+
readonly INDEX_ERROR: 7;
|
|
39
|
+
readonly PERMISSION_ERROR: 8;
|
|
40
|
+
};
|
|
41
|
+
/**
|
|
42
|
+
* Configuration validation error with specific exit code
|
|
43
|
+
*/
|
|
44
|
+
export declare class ConfigurationError extends Error {
|
|
45
|
+
exitCode: number;
|
|
46
|
+
constructor(message: string, exitCode?: number);
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get the default model cache path as specified in the requirements
|
|
50
|
+
* @returns Default cache path (~/.raglite/models/)
|
|
51
|
+
*/
|
|
52
|
+
export declare function getDefaultModelCachePath(): string;
|
|
53
|
+
/**
|
|
54
|
+
* Validates core configuration fields
|
|
55
|
+
* @param config - Configuration object to validate
|
|
56
|
+
* @throws {ConfigurationError} If configuration is invalid
|
|
57
|
+
*/
|
|
58
|
+
export declare function validateCoreConfig(config: any): asserts config is CoreConfig;
|
|
59
|
+
/**
|
|
60
|
+
* Model defaults interface for different embedding models
|
|
61
|
+
*/
|
|
62
|
+
export interface ModelDefaults {
|
|
63
|
+
dimensions: number;
|
|
64
|
+
chunk_size: number;
|
|
65
|
+
chunk_overlap: number;
|
|
66
|
+
batch_size: number;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Get default configuration for different embedding models
|
|
70
|
+
* @param modelName - Name of the embedding model
|
|
71
|
+
* @returns Model-specific defaults
|
|
72
|
+
*/
|
|
73
|
+
export declare function getModelDefaults(modelName?: string): ModelDefaults;
|
|
74
|
+
/**
|
|
75
|
+
* Default core configuration object
|
|
76
|
+
* Model-agnostic settings that can be used by core modules
|
|
77
|
+
*/
|
|
78
|
+
export declare const config: CoreConfig;
|
|
79
|
+
/**
|
|
80
|
+
* Validate preprocessing configuration
|
|
81
|
+
*/
|
|
82
|
+
export declare function validatePreprocessingConfig(config: any): asserts config is any;
|
|
83
|
+
/**
|
|
84
|
+
* Merge preprocessing configurations with mode defaults
|
|
85
|
+
*/
|
|
86
|
+
export declare function mergePreprocessingConfig(config: any): any;
|
|
87
|
+
/**
|
|
88
|
+
* Utility function to handle unrecoverable errors with descriptive messages
|
|
89
|
+
* Logs error and exits immediately with appropriate exit code
|
|
90
|
+
* @param error - Error object or message
|
|
91
|
+
* @param context - Context where the error occurred
|
|
92
|
+
* @param exitCode - Exit code to use (defaults to GENERAL_ERROR)
|
|
93
|
+
*/
|
|
94
|
+
export declare function handleUnrecoverableError(error: Error | string, context: string, exitCode?: number): never;
|
|
95
|
+
/**
|
|
96
|
+
* Utility function for safe error logging with context
|
|
97
|
+
* @param error - Error to log
|
|
98
|
+
* @param context - Context where error occurred
|
|
99
|
+
* @param skipError - Whether to skip this error and continue (default: false)
|
|
100
|
+
*/
|
|
101
|
+
export declare function logError(error: Error | string, context: string, skipError?: boolean): void;
|
|
102
|
+
//# sourceMappingURL=config.d.ts.map
|