rag-lite-ts 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +240 -0
- package/dist/api-errors.d.ts +90 -0
- package/dist/api-errors.d.ts.map +1 -0
- package/dist/api-errors.js +320 -0
- package/dist/api-errors.js.map +1 -0
- package/dist/chunker.d.ts +47 -0
- package/dist/chunker.d.ts.map +1 -0
- package/dist/chunker.js +256 -0
- package/dist/chunker.js.map +1 -0
- package/dist/cli/indexer.d.ts +11 -0
- package/dist/cli/indexer.d.ts.map +1 -0
- package/dist/cli/indexer.js +272 -0
- package/dist/cli/indexer.js.map +1 -0
- package/dist/cli/search.d.ts +7 -0
- package/dist/cli/search.d.ts.map +1 -0
- package/dist/cli/search.js +206 -0
- package/dist/cli/search.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +362 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +90 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +281 -0
- package/dist/config.js.map +1 -0
- package/dist/db.d.ts +90 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +340 -0
- package/dist/db.js.map +1 -0
- package/dist/embedder.d.ts +101 -0
- package/dist/embedder.d.ts.map +1 -0
- package/dist/embedder.js +323 -0
- package/dist/embedder.js.map +1 -0
- package/dist/error-handler.d.ts +91 -0
- package/dist/error-handler.d.ts.map +1 -0
- package/dist/error-handler.js +196 -0
- package/dist/error-handler.js.map +1 -0
- package/dist/file-processor.d.ts +59 -0
- package/dist/file-processor.d.ts.map +1 -0
- package/dist/file-processor.js +312 -0
- package/dist/file-processor.js.map +1 -0
- package/dist/index-manager.d.ts +99 -0
- package/dist/index-manager.d.ts.map +1 -0
- package/dist/index-manager.js +444 -0
- package/dist/index-manager.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +7 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +51 -0
- package/dist/indexer.js.map +1 -0
- package/dist/ingestion.d.ts +175 -0
- package/dist/ingestion.d.ts.map +1 -0
- package/dist/ingestion.js +705 -0
- package/dist/ingestion.js.map +1 -0
- package/dist/mcp-server.d.ts +14 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +680 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/path-manager.d.ts +42 -0
- package/dist/path-manager.d.ts.map +1 -0
- package/dist/path-manager.js +66 -0
- package/dist/path-manager.js.map +1 -0
- package/dist/preprocess.d.ts +19 -0
- package/dist/preprocess.d.ts.map +1 -0
- package/dist/preprocess.js +203 -0
- package/dist/preprocess.js.map +1 -0
- package/dist/preprocessors/index.d.ts +17 -0
- package/dist/preprocessors/index.d.ts.map +1 -0
- package/dist/preprocessors/index.js +38 -0
- package/dist/preprocessors/index.js.map +1 -0
- package/dist/preprocessors/mdx.d.ts +25 -0
- package/dist/preprocessors/mdx.d.ts.map +1 -0
- package/dist/preprocessors/mdx.js +101 -0
- package/dist/preprocessors/mdx.js.map +1 -0
- package/dist/preprocessors/mermaid.d.ts +68 -0
- package/dist/preprocessors/mermaid.d.ts.map +1 -0
- package/dist/preprocessors/mermaid.js +329 -0
- package/dist/preprocessors/mermaid.js.map +1 -0
- package/dist/preprocessors/registry.d.ts +56 -0
- package/dist/preprocessors/registry.d.ts.map +1 -0
- package/dist/preprocessors/registry.js +179 -0
- package/dist/preprocessors/registry.js.map +1 -0
- package/dist/reranker.d.ts +40 -0
- package/dist/reranker.d.ts.map +1 -0
- package/dist/reranker.js +212 -0
- package/dist/reranker.js.map +1 -0
- package/dist/resource-manager-demo.d.ts +7 -0
- package/dist/resource-manager-demo.d.ts.map +1 -0
- package/dist/resource-manager-demo.js +52 -0
- package/dist/resource-manager-demo.js.map +1 -0
- package/dist/resource-manager.d.ts +129 -0
- package/dist/resource-manager.d.ts.map +1 -0
- package/dist/resource-manager.js +389 -0
- package/dist/resource-manager.js.map +1 -0
- package/dist/search-standalone.d.ts +7 -0
- package/dist/search-standalone.d.ts.map +1 -0
- package/dist/search-standalone.js +117 -0
- package/dist/search-standalone.js.map +1 -0
- package/dist/search.d.ts +92 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +454 -0
- package/dist/search.js.map +1 -0
- package/dist/test-utils.d.ts +36 -0
- package/dist/test-utils.d.ts.map +1 -0
- package/dist/test-utils.js +27 -0
- package/dist/test-utils.js.map +1 -0
- package/dist/tokenizer.d.ts +21 -0
- package/dist/tokenizer.d.ts.map +1 -0
- package/dist/tokenizer.js +59 -0
- package/dist/tokenizer.js.map +1 -0
- package/dist/types.d.ts +44 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/vector-index.d.ts +64 -0
- package/dist/vector-index.d.ts.map +1 -0
- package/dist/vector-index.js +308 -0
- package/dist/vector-index.js.map +1 -0
- package/package.json +80 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import { type FileProcessorOptions } from './file-processor.js';
|
|
2
|
+
import { type ChunkConfig } from './chunker.js';
|
|
3
|
+
import { type EmbeddingEngine } from './embedder.js';
|
|
4
|
+
import { Config } from './config.js';
|
|
5
|
+
/**
|
|
6
|
+
* User-friendly error class with actionable suggestions
|
|
7
|
+
*/
|
|
8
|
+
export declare class IngestionError extends Error {
|
|
9
|
+
code: string;
|
|
10
|
+
suggestions: string[];
|
|
11
|
+
constructor(message: string, code: string, suggestions: string[]);
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Options for the ingestion pipeline
|
|
15
|
+
*/
|
|
16
|
+
export interface IngestionOptions {
|
|
17
|
+
/** File processing options */
|
|
18
|
+
fileOptions?: FileProcessorOptions;
|
|
19
|
+
/** Chunking configuration */
|
|
20
|
+
chunkConfig?: ChunkConfig;
|
|
21
|
+
/** Whether to force rebuild the index */
|
|
22
|
+
forceRebuild?: boolean;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Result of the ingestion process
|
|
26
|
+
*/
|
|
27
|
+
export interface IngestionResult {
|
|
28
|
+
/** Total documents processed */
|
|
29
|
+
documentsProcessed: number;
|
|
30
|
+
/** Total chunks created */
|
|
31
|
+
chunksCreated: number;
|
|
32
|
+
/** Total embeddings generated */
|
|
33
|
+
embeddingsGenerated: number;
|
|
34
|
+
/** Number of documents that failed processing */
|
|
35
|
+
documentErrors: number;
|
|
36
|
+
/** Number of chunks that failed embedding */
|
|
37
|
+
embeddingErrors: number;
|
|
38
|
+
/** Processing time in milliseconds */
|
|
39
|
+
processingTimeMs: number;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Main ingestion pipeline class
|
|
43
|
+
* Coordinates the entire process from file discovery to vector storage
|
|
44
|
+
*/
|
|
45
|
+
export declare class IngestionPipeline {
|
|
46
|
+
private static instances;
|
|
47
|
+
private static cleanupHandlersSet;
|
|
48
|
+
private db;
|
|
49
|
+
private indexManager;
|
|
50
|
+
private embeddingEngine;
|
|
51
|
+
private pathManager;
|
|
52
|
+
private isInitialized;
|
|
53
|
+
private dbPath;
|
|
54
|
+
private indexPath;
|
|
55
|
+
private basePath;
|
|
56
|
+
private configOverrides;
|
|
57
|
+
/**
|
|
58
|
+
* Creates a new IngestionPipeline with simplified constructor
|
|
59
|
+
* Pipeline is ready to use immediately without requiring initialization calls (Requirement 1.5)
|
|
60
|
+
* @param basePath - Base directory path for database and index files (defaults to current directory)
|
|
61
|
+
* @param embedder - Pre-initialized embedding engine (optional, will use default if not provided)
|
|
62
|
+
*/
|
|
63
|
+
constructor(basePath?: string, embedder?: EmbeddingEngine);
|
|
64
|
+
/**
|
|
65
|
+
* Set configuration overrides (for internal use)
|
|
66
|
+
* @param overrides - Configuration overrides to apply
|
|
67
|
+
*/
|
|
68
|
+
setConfigOverrides(overrides: Partial<Config>): void;
|
|
69
|
+
/**
|
|
70
|
+
* Set path storage strategy
|
|
71
|
+
* @param strategy - Path storage strategy ('absolute' or 'relative')
|
|
72
|
+
* @param basePath - Base path for relative paths (optional, defaults to current base path)
|
|
73
|
+
*/
|
|
74
|
+
setPathStorageStrategy(strategy: 'absolute' | 'relative', basePath?: string): void;
|
|
75
|
+
/**
|
|
76
|
+
* Get effective configuration with overrides applied
|
|
77
|
+
*/
|
|
78
|
+
private getEffectiveConfig;
|
|
79
|
+
/**
|
|
80
|
+
* Automatically initialize resources on first use with user-friendly error handling
|
|
81
|
+
* Implements lazy initialization as required by 5.2
|
|
82
|
+
*/
|
|
83
|
+
private ensureInitialized;
|
|
84
|
+
/**
|
|
85
|
+
* Create user-friendly error messages with actionable suggestions
|
|
86
|
+
* Implements requirement 5.3: Clear, actionable error messages with specific next steps
|
|
87
|
+
*/
|
|
88
|
+
private createUserFriendlyError;
|
|
89
|
+
/**
|
|
90
|
+
* Initialize the ingestion pipeline (public method for backward compatibility)
|
|
91
|
+
* Sets up database, index manager, and embedding engine
|
|
92
|
+
*/
|
|
93
|
+
initialize(): Promise<void>;
|
|
94
|
+
/**
|
|
95
|
+
* Ingest documents from a directory (matches README API)
|
|
96
|
+
* Automatically initializes resources on first use (Requirements 2.1, 2.3, 5.2)
|
|
97
|
+
* @param directoryPath - Path to directory containing documents
|
|
98
|
+
* @param options - Optional ingestion configuration
|
|
99
|
+
* @returns Promise resolving to ingestion results
|
|
100
|
+
*/
|
|
101
|
+
ingestDirectory(directoryPath: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
102
|
+
/**
|
|
103
|
+
* Ingest a single file (matches README API)
|
|
104
|
+
* Automatically initializes resources on first use (Requirements 2.2, 2.3, 5.2)
|
|
105
|
+
* @param filePath - Path to the file to ingest
|
|
106
|
+
* @param options - Optional ingestion configuration
|
|
107
|
+
* @returns Promise resolving to ingestion results
|
|
108
|
+
*/
|
|
109
|
+
ingestFile(filePath: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
110
|
+
/**
|
|
111
|
+
* Ingest documents from a path (file or directory)
|
|
112
|
+
* Implements the complete pipeline: file processing → chunking → embedding → storage
|
|
113
|
+
*
|
|
114
|
+
* Requirements addressed:
|
|
115
|
+
* - 7.5: Single-threaded write processing to avoid SQLite lock contention
|
|
116
|
+
* - 3.3: Graceful handling of embedding failures without stopping ingestion
|
|
117
|
+
* - 10.1: Progress logging and error reporting during batch ingestion
|
|
118
|
+
* - 2.3: Automatic creation of database and index files in appropriate locations
|
|
119
|
+
*/
|
|
120
|
+
ingestPath(path: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
121
|
+
/**
|
|
122
|
+
* Chunk all documents and organize results
|
|
123
|
+
*/
|
|
124
|
+
private chunkDocuments;
|
|
125
|
+
/**
|
|
126
|
+
* Generate embeddings for all chunks with error handling
|
|
127
|
+
* Requirement 3.3: Graceful handling of embedding failures without stopping ingestion
|
|
128
|
+
*/
|
|
129
|
+
private generateEmbeddings;
|
|
130
|
+
/**
|
|
131
|
+
* Store documents and chunks in database with single-threaded writes
|
|
132
|
+
* Requirement 7.5: Single-threaded write processing to avoid SQLite lock contention
|
|
133
|
+
*/
|
|
134
|
+
private storeDocumentsAndChunks;
|
|
135
|
+
/**
|
|
136
|
+
* Update vector index with new embeddings
|
|
137
|
+
*/
|
|
138
|
+
private updateVectorIndex;
|
|
139
|
+
/**
|
|
140
|
+
* Initialize the pipeline for rebuild (skips model compatibility check)
|
|
141
|
+
*/
|
|
142
|
+
private initializeForRebuild;
|
|
143
|
+
/**
|
|
144
|
+
* Rebuild the entire index from scratch
|
|
145
|
+
* Useful when model version changes or for maintenance
|
|
146
|
+
* Automatically initializes resources if needed (Requirement 5.2)
|
|
147
|
+
*/
|
|
148
|
+
rebuildIndex(): Promise<void>;
|
|
149
|
+
/**
|
|
150
|
+
* Get pipeline statistics
|
|
151
|
+
*/
|
|
152
|
+
getStats(): Promise<{
|
|
153
|
+
indexStats: any;
|
|
154
|
+
isInitialized: boolean;
|
|
155
|
+
}>;
|
|
156
|
+
/**
|
|
157
|
+
* Set up automatic cleanup on process exit (Requirement 5.5)
|
|
158
|
+
*/
|
|
159
|
+
private setupAutomaticCleanup;
|
|
160
|
+
/**
|
|
161
|
+
* Clean up resources
|
|
162
|
+
*/
|
|
163
|
+
cleanup(): Promise<void>;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Convenience function to ingest documents from a path
|
|
167
|
+
* Creates a pipeline instance, runs ingestion, and cleans up
|
|
168
|
+
*/
|
|
169
|
+
export declare function ingestDocuments(path: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
170
|
+
/**
|
|
171
|
+
* Convenience function to rebuild the index
|
|
172
|
+
* Creates a pipeline instance, rebuilds index, and cleans up
|
|
173
|
+
*/
|
|
174
|
+
export declare function rebuildIndex(): Promise<void>;
|
|
175
|
+
//# sourceMappingURL=ingestion.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingestion.d.ts","sourceRoot":"","sources":["../src/ingestion.ts"],"names":[],"mappings":"AAAA,OAAO,EAA2B,KAAK,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AACzF,OAAO,EAAiB,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAC/D,OAAO,EAA6B,KAAK,eAAe,EAAE,MAAM,eAAe,CAAC;AAGhF,OAAO,EAA0B,MAAM,EAAoB,MAAM,aAAa,CAAC;AAO/E;;GAEG;AACH,qBAAa,cAAe,SAAQ,KAAK;IAG9B,IAAI,EAAE,MAAM;IACZ,WAAW,EAAE,MAAM,EAAE;gBAF5B,OAAO,EAAE,MAAM,EACR,IAAI,EAAE,MAAM,EACZ,WAAW,EAAE,MAAM,EAAE;CAK/B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,8BAA8B;IAC9B,WAAW,CAAC,EAAE,oBAAoB,CAAC;IACnC,6BAA6B;IAC7B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,yCAAyC;IACzC,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,gCAAgC;IAChC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,2BAA2B;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,iCAAiC;IACjC,mBAAmB,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,cAAc,EAAE,MAAM,CAAC;IACvB,6CAA6C;IAC7C,eAAe,EAAE,MAAM,CAAC;IACxB,sCAAsC;IACtC,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AA4BD;;;GAGG;AACH,qBAAa,iBAAiB;IAE5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAgC;IACxD,OAAO,CAAC,MAAM,CAAC,kBAAkB,CAAS;IAE1C,OAAO,CAAC,EAAE,CAAmC;IAC7C,OAAO,CAAC,YAAY,CAA6B;IACjD,OAAO,CAAC,eAAe,CAAgC;IACvD,OAAO,CAAC,WAAW,CAAoC;IACvD,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,eAAe,CAAuB;IAE9C;;;;;OAKG;gBACS,QAAQ,CAAC,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,eAAe;IAgCzD;;;OAGG;IACH,kBAAkB,CAAC,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,GAAG,IAAI;IAIpD;;;;OAIG;IACH,sBAAsB,CAAC,QAAQ,EAAE,UAAU,GAAG,UAAU,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI;IAKlF;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAuB1B;;;OAGG;YACW,iBAAiB;IA+C/B;;;OAGG;IACH,OAAO,CAAC,uBAAuB;IAuG/B;;;OAGG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAIjC;;;;;;OAMG;IACG,eAAe,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,OAAO,CAAC,eAAe,CAAC;IAetG;;;;;;OAMG;IACG,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,OAAO,CAAC,eAAe,CAAC;IAe5F;;;;;;;;;OASG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,OAAO,CAAC,eAAe,CAAC;IA+FxF;;OAEG;YACW,cAAc;IA2C5B;;;OAGG;YACW,kBAAkB;IA+BhC;;;OAGG;YACW,uBAAuB;IA4ErC;;OAEG;YACW,iBAAiB;IAqB/B;;OAEG;YACW,oBAAoB;IA2ClC;;;;OAIG;IACG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAuBnC;;OAEG;IACG,QAAQ,IAAI,OAAO,CAAC;QACxB,UAAU,EAAE,GAAG,CAAC;QAChB,aAAa,EAAE,OAAO,CAAC;KACxB,CAAC;IAiBF;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAyD7B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAuB/B;AAED;;;GAGG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,eAAe,CAAC,CAU1B;AAED;;;GAGG;AACH,wBAAsB,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAmClD"}
|