rag-lite-ts 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +240 -0
- package/dist/api-errors.d.ts +90 -0
- package/dist/api-errors.d.ts.map +1 -0
- package/dist/api-errors.js +320 -0
- package/dist/api-errors.js.map +1 -0
- package/dist/chunker.d.ts +47 -0
- package/dist/chunker.d.ts.map +1 -0
- package/dist/chunker.js +256 -0
- package/dist/chunker.js.map +1 -0
- package/dist/cli/indexer.d.ts +11 -0
- package/dist/cli/indexer.d.ts.map +1 -0
- package/dist/cli/indexer.js +272 -0
- package/dist/cli/indexer.js.map +1 -0
- package/dist/cli/search.d.ts +7 -0
- package/dist/cli/search.d.ts.map +1 -0
- package/dist/cli/search.js +206 -0
- package/dist/cli/search.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +362 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +90 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +281 -0
- package/dist/config.js.map +1 -0
- package/dist/db.d.ts +90 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +340 -0
- package/dist/db.js.map +1 -0
- package/dist/embedder.d.ts +101 -0
- package/dist/embedder.d.ts.map +1 -0
- package/dist/embedder.js +323 -0
- package/dist/embedder.js.map +1 -0
- package/dist/error-handler.d.ts +91 -0
- package/dist/error-handler.d.ts.map +1 -0
- package/dist/error-handler.js +196 -0
- package/dist/error-handler.js.map +1 -0
- package/dist/file-processor.d.ts +59 -0
- package/dist/file-processor.d.ts.map +1 -0
- package/dist/file-processor.js +312 -0
- package/dist/file-processor.js.map +1 -0
- package/dist/index-manager.d.ts +99 -0
- package/dist/index-manager.d.ts.map +1 -0
- package/dist/index-manager.js +444 -0
- package/dist/index-manager.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +7 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +51 -0
- package/dist/indexer.js.map +1 -0
- package/dist/ingestion.d.ts +175 -0
- package/dist/ingestion.d.ts.map +1 -0
- package/dist/ingestion.js +705 -0
- package/dist/ingestion.js.map +1 -0
- package/dist/mcp-server.d.ts +14 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +680 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/path-manager.d.ts +42 -0
- package/dist/path-manager.d.ts.map +1 -0
- package/dist/path-manager.js +66 -0
- package/dist/path-manager.js.map +1 -0
- package/dist/preprocess.d.ts +19 -0
- package/dist/preprocess.d.ts.map +1 -0
- package/dist/preprocess.js +203 -0
- package/dist/preprocess.js.map +1 -0
- package/dist/preprocessors/index.d.ts +17 -0
- package/dist/preprocessors/index.d.ts.map +1 -0
- package/dist/preprocessors/index.js +38 -0
- package/dist/preprocessors/index.js.map +1 -0
- package/dist/preprocessors/mdx.d.ts +25 -0
- package/dist/preprocessors/mdx.d.ts.map +1 -0
- package/dist/preprocessors/mdx.js +101 -0
- package/dist/preprocessors/mdx.js.map +1 -0
- package/dist/preprocessors/mermaid.d.ts +68 -0
- package/dist/preprocessors/mermaid.d.ts.map +1 -0
- package/dist/preprocessors/mermaid.js +329 -0
- package/dist/preprocessors/mermaid.js.map +1 -0
- package/dist/preprocessors/registry.d.ts +56 -0
- package/dist/preprocessors/registry.d.ts.map +1 -0
- package/dist/preprocessors/registry.js +179 -0
- package/dist/preprocessors/registry.js.map +1 -0
- package/dist/reranker.d.ts +40 -0
- package/dist/reranker.d.ts.map +1 -0
- package/dist/reranker.js +212 -0
- package/dist/reranker.js.map +1 -0
- package/dist/resource-manager-demo.d.ts +7 -0
- package/dist/resource-manager-demo.d.ts.map +1 -0
- package/dist/resource-manager-demo.js +52 -0
- package/dist/resource-manager-demo.js.map +1 -0
- package/dist/resource-manager.d.ts +129 -0
- package/dist/resource-manager.d.ts.map +1 -0
- package/dist/resource-manager.js +389 -0
- package/dist/resource-manager.js.map +1 -0
- package/dist/search-standalone.d.ts +7 -0
- package/dist/search-standalone.d.ts.map +1 -0
- package/dist/search-standalone.js +117 -0
- package/dist/search-standalone.js.map +1 -0
- package/dist/search.d.ts +92 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +454 -0
- package/dist/search.js.map +1 -0
- package/dist/test-utils.d.ts +36 -0
- package/dist/test-utils.d.ts.map +1 -0
- package/dist/test-utils.js +27 -0
- package/dist/test-utils.js.map +1 -0
- package/dist/tokenizer.d.ts +21 -0
- package/dist/tokenizer.d.ts.map +1 -0
- package/dist/tokenizer.js +59 -0
- package/dist/tokenizer.js.map +1 -0
- package/dist/types.d.ts +44 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/vector-index.d.ts +64 -0
- package/dist/vector-index.d.ts.map +1 -0
- package/dist/vector-index.js +308 -0
- package/dist/vector-index.js.map +1 -0
- package/package.json +80 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { DocumentPathManager } from './path-manager.js';
|
|
2
|
+
import type { Document } from './types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Options for file discovery and processing
|
|
5
|
+
*/
|
|
6
|
+
export interface FileProcessorOptions {
|
|
7
|
+
/** Whether to process files recursively in subdirectories */
|
|
8
|
+
recursive?: boolean;
|
|
9
|
+
/** Maximum file size in bytes (default: 10MB) */
|
|
10
|
+
maxFileSize?: number;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Default options for file processing
|
|
14
|
+
*/
|
|
15
|
+
export declare const DEFAULT_FILE_PROCESSOR_OPTIONS: FileProcessorOptions;
|
|
16
|
+
/**
|
|
17
|
+
* Result of file discovery operation
|
|
18
|
+
*/
|
|
19
|
+
export interface FileDiscoveryResult {
|
|
20
|
+
/** Successfully discovered files */
|
|
21
|
+
files: string[];
|
|
22
|
+
/** Files that were skipped due to errors */
|
|
23
|
+
skipped: Array<{
|
|
24
|
+
path: string;
|
|
25
|
+
reason: string;
|
|
26
|
+
}>;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Result of document processing operation
|
|
30
|
+
*/
|
|
31
|
+
export interface DocumentProcessingResult {
|
|
32
|
+
/** Successfully processed documents */
|
|
33
|
+
documents: Document[];
|
|
34
|
+
/** Files that failed to process */
|
|
35
|
+
errors: Array<{
|
|
36
|
+
path: string;
|
|
37
|
+
error: string;
|
|
38
|
+
}>;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Discover files for ingestion
|
|
42
|
+
* Supports both single files and directories (with optional recursion)
|
|
43
|
+
*/
|
|
44
|
+
export declare function discoverFiles(path: string, options?: FileProcessorOptions): Promise<FileDiscoveryResult>;
|
|
45
|
+
/**
|
|
46
|
+
* Process multiple files into Documents
|
|
47
|
+
* Handles errors gracefully by skipping problematic files
|
|
48
|
+
*/
|
|
49
|
+
export declare function processFiles(filePaths: string[], pathManager: DocumentPathManager): Promise<DocumentProcessingResult>;
|
|
50
|
+
/**
|
|
51
|
+
* Complete file discovery and processing pipeline
|
|
52
|
+
* Discovers files and processes them into Documents
|
|
53
|
+
*/
|
|
54
|
+
export declare function discoverAndProcessFiles(path: string, options?: FileProcessorOptions, pathManager?: DocumentPathManager): Promise<{
|
|
55
|
+
documents: Document[];
|
|
56
|
+
discoveryResult: FileDiscoveryResult;
|
|
57
|
+
processingResult: DocumentProcessingResult;
|
|
58
|
+
}>;
|
|
59
|
+
//# sourceMappingURL=file-processor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-processor.d.ts","sourceRoot":"","sources":["../src/file-processor.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAyC3C;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,6DAA6D;IAC7D,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,iDAAiD;IACjD,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,eAAO,MAAM,8BAA8B,EAAE,oBAG5C,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oCAAoC;IACpC,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,4CAA4C;IAC5C,OAAO,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,uCAAuC;IACvC,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,mCAAmC;IACnC,MAAM,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;CACJ;AAoED;;;GAGG;AACH,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,oBAAqD,GAC7D,OAAO,CAAC,mBAAmB,CAAC,CAsD9B;AA4GD;;;GAGG;AACH,wBAAsB,YAAY,CAChC,SAAS,EAAE,MAAM,EAAE,EACnB,WAAW,EAAE,mBAAmB,GAC/B,OAAO,CAAC,wBAAwB,CAAC,CAmBnC;AAED;;;GAGG;AACH,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,oBAAqD,EAC9D,WAAW,CAAC,EAAE,mBAAmB,GAChC,OAAO,CAAC;IACT,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,eAAe,EAAE,mBAAmB,CAAC;IACrC,gBAAgB,EAAE,wBAAwB,CAAC;CAC5C,CAAC,CAwCD"}
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
import { promises as fs } from 'fs';
|
|
2
|
+
import { join, extname, basename, resolve } from 'path';
|
|
3
|
+
import { ErrorCategory, ErrorSeverity, safeExecute } from './error-handler.js';
|
|
4
|
+
import { preprocessDocument } from './preprocess.js';
|
|
5
|
+
import { config } from './config.js';
|
|
6
|
+
import { DocumentPathManager } from './path-manager.js';
|
|
7
|
+
import { createRequire } from 'module';
|
|
8
|
+
import { JSDOM } from 'jsdom';
|
|
9
|
+
const require = createRequire(import.meta.url);
|
|
10
|
+
// Set up DOM polyfills for PDF parsing
|
|
11
|
+
const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
|
|
12
|
+
pretendToBeVisual: true,
|
|
13
|
+
resources: 'usable'
|
|
14
|
+
});
|
|
15
|
+
// Polyfill global objects needed by pdf-parse
|
|
16
|
+
global.DOMMatrix = dom.window.DOMMatrix || class {
|
|
17
|
+
a = 1;
|
|
18
|
+
b = 0;
|
|
19
|
+
c = 0;
|
|
20
|
+
d = 1;
|
|
21
|
+
e = 0;
|
|
22
|
+
f = 0;
|
|
23
|
+
constructor() { }
|
|
24
|
+
};
|
|
25
|
+
global.ImageData = dom.window.ImageData || class {
|
|
26
|
+
width;
|
|
27
|
+
height;
|
|
28
|
+
data;
|
|
29
|
+
constructor(width, height) {
|
|
30
|
+
this.width = width;
|
|
31
|
+
this.height = height;
|
|
32
|
+
this.data = new Uint8ClampedArray(width * height * 4);
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
global.Path2D = dom.window.Path2D || class {
|
|
36
|
+
constructor() { }
|
|
37
|
+
moveTo() { }
|
|
38
|
+
lineTo() { }
|
|
39
|
+
closePath() { }
|
|
40
|
+
};
|
|
41
|
+
const pdfParse = require('pdf-parse');
|
|
42
|
+
import * as mammoth from 'mammoth';
|
|
43
|
+
/**
|
|
44
|
+
* Supported file extensions for document ingestion
|
|
45
|
+
*/
|
|
46
|
+
const SUPPORTED_EXTENSIONS = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
|
|
47
|
+
/**
|
|
48
|
+
* Default options for file processing
|
|
49
|
+
*/
|
|
50
|
+
export const DEFAULT_FILE_PROCESSOR_OPTIONS = {
|
|
51
|
+
recursive: true,
|
|
52
|
+
maxFileSize: 10 * 1024 * 1024 // 10MB
|
|
53
|
+
};
|
|
54
|
+
/**
|
|
55
|
+
* Check if a file has a supported extension
|
|
56
|
+
*/
|
|
57
|
+
function isSupportedFile(filePath) {
|
|
58
|
+
const ext = extname(filePath).toLowerCase();
|
|
59
|
+
return SUPPORTED_EXTENSIONS.includes(ext);
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Recursively discover files in a directory
|
|
63
|
+
*/
|
|
64
|
+
async function discoverFilesRecursive(dirPath, options) {
|
|
65
|
+
const result = {
|
|
66
|
+
files: [],
|
|
67
|
+
skipped: []
|
|
68
|
+
};
|
|
69
|
+
try {
|
|
70
|
+
const entries = await fs.readdir(dirPath, { withFileTypes: true });
|
|
71
|
+
for (const entry of entries) {
|
|
72
|
+
const fullPath = join(dirPath, entry.name);
|
|
73
|
+
if (entry.isDirectory()) {
|
|
74
|
+
if (options.recursive) {
|
|
75
|
+
// Recursively process subdirectory
|
|
76
|
+
const subResult = await discoverFilesRecursive(fullPath, options);
|
|
77
|
+
result.files.push(...subResult.files);
|
|
78
|
+
result.skipped.push(...subResult.skipped);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
else if (entry.isFile()) {
|
|
82
|
+
if (isSupportedFile(fullPath)) {
|
|
83
|
+
try {
|
|
84
|
+
// Check file size
|
|
85
|
+
const stats = await fs.stat(fullPath);
|
|
86
|
+
if (options.maxFileSize && stats.size > options.maxFileSize) {
|
|
87
|
+
result.skipped.push({
|
|
88
|
+
path: fullPath,
|
|
89
|
+
reason: `File size (${stats.size} bytes) exceeds maximum (${options.maxFileSize} bytes)`
|
|
90
|
+
});
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
result.files.push(fullPath);
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
result.skipped.push({
|
|
97
|
+
path: fullPath,
|
|
98
|
+
reason: `Failed to stat file: ${error instanceof Error ? error.message : String(error)}`
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
result.skipped.push({
|
|
107
|
+
path: dirPath,
|
|
108
|
+
reason: `Failed to read directory: ${error instanceof Error ? error.message : String(error)}`
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
return result;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Discover files for ingestion
|
|
115
|
+
* Supports both single files and directories (with optional recursion)
|
|
116
|
+
*/
|
|
117
|
+
export async function discoverFiles(path, options = DEFAULT_FILE_PROCESSOR_OPTIONS) {
|
|
118
|
+
const resolvedPath = resolve(path);
|
|
119
|
+
try {
|
|
120
|
+
const stats = await fs.stat(resolvedPath);
|
|
121
|
+
if (stats.isFile()) {
|
|
122
|
+
// Single file processing
|
|
123
|
+
if (!isSupportedFile(resolvedPath)) {
|
|
124
|
+
return {
|
|
125
|
+
files: [],
|
|
126
|
+
skipped: [{
|
|
127
|
+
path: resolvedPath,
|
|
128
|
+
reason: `Unsupported file extension. Supported: ${SUPPORTED_EXTENSIONS.join(', ')}`
|
|
129
|
+
}]
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
// Check file size
|
|
133
|
+
if (options.maxFileSize && stats.size > options.maxFileSize) {
|
|
134
|
+
return {
|
|
135
|
+
files: [],
|
|
136
|
+
skipped: [{
|
|
137
|
+
path: resolvedPath,
|
|
138
|
+
reason: `File size (${stats.size} bytes) exceeds maximum (${options.maxFileSize} bytes)`
|
|
139
|
+
}]
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
return {
|
|
143
|
+
files: [resolvedPath],
|
|
144
|
+
skipped: []
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
else if (stats.isDirectory()) {
|
|
148
|
+
// Directory processing
|
|
149
|
+
return await discoverFilesRecursive(resolvedPath, options);
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
return {
|
|
153
|
+
files: [],
|
|
154
|
+
skipped: [{
|
|
155
|
+
path: resolvedPath,
|
|
156
|
+
reason: 'Path is neither a file nor a directory'
|
|
157
|
+
}]
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
catch (error) {
|
|
162
|
+
return {
|
|
163
|
+
files: [],
|
|
164
|
+
skipped: [{
|
|
165
|
+
path: resolvedPath,
|
|
166
|
+
reason: `Failed to access path: ${error instanceof Error ? error.message : String(error)}`
|
|
167
|
+
}]
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Extract text content from PDF file
|
|
173
|
+
*/
|
|
174
|
+
async function extractPdfContent(filePath) {
|
|
175
|
+
const buffer = await fs.readFile(filePath);
|
|
176
|
+
const data = await pdfParse(buffer);
|
|
177
|
+
return data.text;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Extract text content from DOCX file
|
|
181
|
+
*/
|
|
182
|
+
async function extractDocxContent(filePath) {
|
|
183
|
+
const buffer = await fs.readFile(filePath);
|
|
184
|
+
const result = await mammoth.extractRawText({ buffer });
|
|
185
|
+
return result.value;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Extract document title from content
|
|
189
|
+
* Looks for markdown H1 headers first, then falls back to filename
|
|
190
|
+
*/
|
|
191
|
+
function extractTitle(content, filePath) {
|
|
192
|
+
// Try to find markdown H1 header
|
|
193
|
+
const lines = content.split('\n');
|
|
194
|
+
for (const line of lines) {
|
|
195
|
+
const trimmed = line.trim();
|
|
196
|
+
if (trimmed.startsWith('# ')) {
|
|
197
|
+
const title = trimmed.substring(2).trim();
|
|
198
|
+
if (title) {
|
|
199
|
+
return title;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// Fallback to filename without extension
|
|
204
|
+
const filename = basename(filePath);
|
|
205
|
+
const ext = extname(filename);
|
|
206
|
+
return ext ? filename.slice(0, -ext.length) : filename;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Process a single file into a Document
|
|
210
|
+
*/
|
|
211
|
+
async function processFile(filePath, pathManager) {
|
|
212
|
+
const result = await safeExecute(async () => {
|
|
213
|
+
let content;
|
|
214
|
+
const ext = extname(filePath).toLowerCase();
|
|
215
|
+
// Extract content based on file type
|
|
216
|
+
switch (ext) {
|
|
217
|
+
case '.pdf':
|
|
218
|
+
content = await extractPdfContent(filePath);
|
|
219
|
+
break;
|
|
220
|
+
case '.docx':
|
|
221
|
+
content = await extractDocxContent(filePath);
|
|
222
|
+
break;
|
|
223
|
+
case '.md':
|
|
224
|
+
case '.txt':
|
|
225
|
+
case '.mdx':
|
|
226
|
+
default:
|
|
227
|
+
content = await fs.readFile(filePath, 'utf-8');
|
|
228
|
+
break;
|
|
229
|
+
}
|
|
230
|
+
// Validate content is not empty
|
|
231
|
+
if (!content.trim()) {
|
|
232
|
+
throw new Error('File is empty or contains only whitespace');
|
|
233
|
+
}
|
|
234
|
+
// Use preprocessing module for all content types
|
|
235
|
+
content = preprocessDocument(content, filePath, config.preprocessing);
|
|
236
|
+
// Validate processed content is not empty (preprocessing module ensures this)
|
|
237
|
+
if (!content.trim()) {
|
|
238
|
+
throw new Error('File contains no content after preprocessing');
|
|
239
|
+
}
|
|
240
|
+
const title = extractTitle(content, filePath);
|
|
241
|
+
return {
|
|
242
|
+
source: pathManager.toStoragePath(filePath), // Use path manager
|
|
243
|
+
title,
|
|
244
|
+
content: content.trim()
|
|
245
|
+
};
|
|
246
|
+
}, `File Processing: ${filePath}`, {
|
|
247
|
+
category: ErrorCategory.FILE_SYSTEM,
|
|
248
|
+
severity: ErrorSeverity.ERROR
|
|
249
|
+
});
|
|
250
|
+
if (!result) {
|
|
251
|
+
throw new Error(`Failed to process file: ${filePath}`);
|
|
252
|
+
}
|
|
253
|
+
return result;
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Process multiple files into Documents
|
|
257
|
+
* Handles errors gracefully by skipping problematic files
|
|
258
|
+
*/
|
|
259
|
+
export async function processFiles(filePaths, pathManager) {
|
|
260
|
+
const result = {
|
|
261
|
+
documents: [],
|
|
262
|
+
errors: []
|
|
263
|
+
};
|
|
264
|
+
for (const filePath of filePaths) {
|
|
265
|
+
try {
|
|
266
|
+
const document = await processFile(filePath, pathManager);
|
|
267
|
+
result.documents.push(document);
|
|
268
|
+
}
|
|
269
|
+
catch (error) {
|
|
270
|
+
result.errors.push({
|
|
271
|
+
path: filePath,
|
|
272
|
+
error: error instanceof Error ? error.message : String(error)
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return result;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Complete file discovery and processing pipeline
|
|
280
|
+
* Discovers files and processes them into Documents
|
|
281
|
+
*/
|
|
282
|
+
export async function discoverAndProcessFiles(path, options = DEFAULT_FILE_PROCESSOR_OPTIONS, pathManager) {
|
|
283
|
+
console.log(`Discovering files in: ${path}`);
|
|
284
|
+
// Discover files
|
|
285
|
+
const discoveryResult = await discoverFiles(path, options);
|
|
286
|
+
// Log discovery results
|
|
287
|
+
if (discoveryResult.skipped.length > 0) {
|
|
288
|
+
console.log(`Skipped ${discoveryResult.skipped.length} files:`);
|
|
289
|
+
for (const skipped of discoveryResult.skipped) {
|
|
290
|
+
console.error(` - ${skipped.path}: ${skipped.reason}`);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
console.log(`Found ${discoveryResult.files.length} supported files`);
|
|
294
|
+
// Create default path manager if not provided
|
|
295
|
+
const effectivePathManager = pathManager || new DocumentPathManager(config.path_storage_strategy, resolve(path));
|
|
296
|
+
// Process discovered files with path manager
|
|
297
|
+
const processingResult = await processFiles(discoveryResult.files, effectivePathManager);
|
|
298
|
+
// Log processing results
|
|
299
|
+
if (processingResult.errors.length > 0) {
|
|
300
|
+
console.log(`Failed to process ${processingResult.errors.length} files:`);
|
|
301
|
+
for (const error of processingResult.errors) {
|
|
302
|
+
console.error(` - ${error.path}: ${error.error}`);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
console.log(`Successfully processed ${processingResult.documents.length} documents`);
|
|
306
|
+
return {
|
|
307
|
+
documents: processingResult.documents,
|
|
308
|
+
discoveryResult,
|
|
309
|
+
processingResult
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
//# sourceMappingURL=file-processor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-processor.js","sourceRoot":"","sources":["../src/file-processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACxD,OAAO,EAAe,aAAa,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAC5F,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAExD,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AACvC,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE/C,uCAAuC;AACvC,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,2CAA2C,EAAE;IACjE,iBAAiB,EAAE,IAAI;IACvB,SAAS,EAAE,QAAQ;CACpB,CAAC,CAAC;AAEH,8CAA8C;AAC7C,MAAc,CAAC,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI;IAClD,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC,GAAG,CAAC,CAAC;IACzC,gBAAe,CAAC;CACjB,CAAC;AACD,MAAc,CAAC,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI;IAClD,KAAK,CAAS;IACd,MAAM,CAAS;IACf,IAAI,CAAoB;IACxB,YAAY,KAAa,EAAE,MAAc;QACvC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,IAAI,GAAG,IAAI,iBAAiB,CAAC,KAAK,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC;IACxD,CAAC;CACF,CAAC;AACD,MAAc,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI;IAC5C,gBAAe,CAAC;IAChB,MAAM,KAAI,CAAC;IACX,MAAM,KAAI,CAAC;IACX,SAAS,KAAI,CAAC;CACf,CAAC;AAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;AACtC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC;;GAEG;AACH,MAAM,oBAAoB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;AAYtE;;GAEG;AACH,MAAM,CAAC,MAAM,8BAA8B,GAAyB;IAClE,SAAS,EAAE,IAAI;IACf,WAAW,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,OAAO;CACtC,CAAC;AA4BF;;GAEG;AACH,SAAS,eAAe,CAAC,QAAgB;IACvC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,OAAO,oBAAoB,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;AAC5C,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,sBAAsB,CACnC,OAAe,EACf,OAA6B;IAE7B,MAAM,MAAM,GAAwB;QAClC,KAAK,EAAE,EAAE;QACT,OAAO,EAAE,EAAE;KACZ,CAAC;IAEF,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAEnE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAE3C,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;oBACtB,mCAAmC;oBACnC,MAAM,SAAS,GAAG,MAAM,sBAAsB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;oBAClE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;oBACtC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;gBAC5C,CAAC;YACH,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;gBAC1B,IAAI,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC9B,IAAI,CAAC;wBACH,kBAAkB;wBAClB,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;wBACtC,IAAI,OAAO,CAAC,WAAW,IAAI,KAAK,CAAC,IAAI,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;4BAC5D,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;gCAClB,IAAI,EAAE,QAAQ;gCACd,MAAM,EAAE,cAAc,KAAK,CAAC,IAAI,4BAA4B,OAAO,CAAC,WAAW,SAAS;6BACzF,CAAC,CAAC;4BACH,SAAS;wBACX,CAAC;wBAED,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBAC9B,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;4BAClB,IAAI,EAAE,QAAQ;4BACd,MAAM,EAAE,wBAAwB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;yBACzF,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;YAClB,IAAI,EAAE,OAAO;YACb,MAAM,EAAE,6BAA6B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;SAC9F,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAY,EACZ,UAAgC,8BAA8B;IAE9D,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAE1C,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;YACnB,yBAAyB;YACzB,IAAI,CAAC,eAAe,CAAC,YAAY,CAAC,EAAE,CAAC;gBACnC,OAAO;oBACL,KAAK,EAAE,EAAE;oBACT,OAAO,EAAE,CAAC;4BACR,IAAI,EAAE,YAAY;4BAClB,MAAM,EAAE,0CAA0C,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;yBACpF,CAAC;iBACH,CAAC;YACJ,CAAC;YAED,kBAAkB;YAClB,IAAI,OAAO,CAAC,WAAW,IAAI,KAAK,CAAC,IAAI,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;gBAC5D,OAAO;oBACL,KAAK,EAAE,EAAE;oBACT,OAAO,EAAE,CAAC;4BACR,IAAI,EAAE,YAAY;4BAClB,MAAM,EAAE,cAAc,KAAK,CAAC,IAAI,4BAA4B,OAAO,CAAC,WAAW,SAAS;yBACzF,CAAC;iBACH,CAAC;YACJ,CAAC;YAED,OAAO;gBACL,KAAK,EAAE,CAAC,YAAY,CAAC;gBACrB,OAAO,EAAE,EAAE;aACZ,CAAC;QACJ,CAAC;aAAM,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YAC/B,uBAAuB;YACvB,OAAO,MAAM,sBAAsB,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QAC7D,CAAC;aAAM,CAAC;YACN,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,CAAC;wBACR,IAAI,EAAE,YAAY;wBAClB,MAAM,EAAE,wCAAwC;qBACjD,CAAC;aACH,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,KAAK,EAAE,EAAE;YACT,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,YAAY;oBAClB,MAAM,EAAE,0BAA0B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;iBAC3F,CAAC;SACH,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,iBAAiB,CAAC,QAAgB;IAC/C,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC3C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;IACpC,OAAO,IAAI,CAAC,IAAI,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IAChD,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACxD,OAAO,MAAM,CAAC,KAAK,CAAC;AACtB,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,OAAe,EAAE,QAAgB;IACrD,iCAAiC;IACjC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1C,IAAI,KAAK,EAAE,CAAC;gBACV,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC;IACH,CAAC;IAED,yCAAyC;IACzC,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACpC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,OAAO,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;AACzD,CAAC;AAID;;GAEG;AACH,KAAK,UAAU,WAAW,CACxB,QAAgB,EAChB,WAAgC;IAEhC,MAAM,MAAM,GAAG,MAAM,WAAW,CAC9B,KAAK,IAAI,EAAE;QACT,IAAI,OAAe,CAAC;QACpB,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QAE5C,qCAAqC;QACrC,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,MAAM;gBACT,OAAO,GAAG,MAAM,iBAAiB,CAAC,QAAQ,CAAC,CAAC;gBAC5C,MAAM;YACR,KAAK,OAAO;gBACV,OAAO,GAAG,MAAM,kBAAkB,CAAC,QAAQ,CAAC,CAAC;gBAC7C,MAAM;YACR,KAAK,KAAK,CAAC;YACX,KAAK,MAAM,CAAC;YACZ,KAAK,MAAM,CAAC;YACZ;gBACE,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAC/C,MAAM;QACV,CAAC;QAED,gCAAgC;QAChC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,CAAC;QAED,iDAAiD;QACjD,OAAO,GAAG,kBAAkB,CAAC,OAAO,EAAE,QAAQ,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC;QAEtE,8EAA8E;QAC9E,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;QAED,MAAM,KAAK,GAAG,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAE9C,OAAO;YACL,MAAM,EAAE,WAAW,CAAC,aAAa,CAAC,QAAQ,CAAC,EAAE,mBAAmB;YAChE,KAAK;YACL,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE;SACxB,CAAC;IACJ,CAAC,EACD,oBAAoB,QAAQ,EAAE,EAC9B;QACE,QAAQ,EAAE,aAAa,CAAC,WAAW;QACnC,QAAQ,EAAE,aAAa,CAAC,KAAK;KAC9B,CACF,CAAC;IAEF,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,SAAmB,EACnB,WAAgC;IAEhC,MAAM,MAAM,GAA6B;QACvC,SAAS,EAAE,EAAE;QACb,MAAM,EAAE,EAAE;KACX,CAAC;IAEF,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;YAC1D,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;gBACjB,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,IAAY,EACZ,UAAgC,8BAA8B,EAC9D,WAAiC;IAMjC,OAAO,CAAC,GAAG,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC;IAE7C,iBAAiB;IACjB,MAAM,eAAe,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAE3D,wBAAwB;IACxB,IAAI,eAAe,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,WAAW,eAAe,CAAC,OAAO,CAAC,MAAM,SAAS,CAAC,CAAC;QAChE,KAAK,MAAM,OAAO,IAAI,eAAe,CAAC,OAAO,EAAE,CAAC;YAC9C,OAAO,CAAC,KAAK,CAAC,OAAO,OAAO,CAAC,IAAI,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,SAAS,eAAe,CAAC,KAAK,CAAC,MAAM,kBAAkB,CAAC,CAAC;IAErE,8CAA8C;IAC9C,MAAM,oBAAoB,GAAG,WAAW,IAAI,IAAI,mBAAmB,CACjE,MAAM,CAAC,qBAAqB,EAC5B,OAAO,CAAC,IAAI,CAAC,CACd,CAAC;IAEF,6CAA6C;IAC7C,MAAM,gBAAgB,GAAG,MAAM,YAAY,CAAC,eAAe,CAAC,KAAK,EAAE,oBAAoB,CAAC,CAAC;IAEzF,yBAAyB;IACzB,IAAI,gBAAgB,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,qBAAqB,gBAAgB,CAAC,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;QAC1E,KAAK,MAAM,KAAK,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;YAC5C,OAAO,CAAC,KAAK,CAAC,OAAO,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,0BAA0B,gBAAgB,CAAC,SAAS,CAAC,MAAM,YAAY,CAAC,CAAC;IAErF,OAAO;QACL,SAAS,EAAE,gBAAgB,CAAC,SAAS;QACrC,eAAe;QACf,gBAAgB;KACjB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import type { EmbeddingResult } from './types.js';
|
|
2
|
+
export interface IndexStats {
|
|
3
|
+
totalVectors: number;
|
|
4
|
+
modelVersion: string;
|
|
5
|
+
lastUpdated: Date;
|
|
6
|
+
}
|
|
7
|
+
export declare class IndexManager {
|
|
8
|
+
private modelName?;
|
|
9
|
+
private vectorIndex;
|
|
10
|
+
private db;
|
|
11
|
+
private indexPath;
|
|
12
|
+
private dbPath;
|
|
13
|
+
private isInitialized;
|
|
14
|
+
private hashToEmbeddingId;
|
|
15
|
+
private embeddingIdToHash;
|
|
16
|
+
constructor(indexPath: string, dbPath: string, dimensions: number, modelName?: string | undefined);
|
|
17
|
+
/**
|
|
18
|
+
* Initialize the index manager and load existing index if available
|
|
19
|
+
* @param skipModelCheck - Skip model compatibility check (used for rebuilds)
|
|
20
|
+
*/
|
|
21
|
+
initialize(skipModelCheck?: boolean): Promise<void>;
|
|
22
|
+
/**
|
|
23
|
+
* Check model compatibility between stored and current configuration
|
|
24
|
+
* Requirements: 2.1, 2.2, 2.4, 5.1, 5.2, 5.3, 5.4
|
|
25
|
+
*/
|
|
26
|
+
private checkModelCompatibility;
|
|
27
|
+
/**
|
|
28
|
+
* Add vectors to the index with corresponding metadata (incremental addition)
|
|
29
|
+
* Requirements: 5.3 - When new documents are added THEN system SHALL append new chunks and vectors without rebuilding existing index
|
|
30
|
+
*/
|
|
31
|
+
addVectors(embeddings: EmbeddingResult[]): Promise<void>;
|
|
32
|
+
/**
|
|
33
|
+
* Rebuild the entire index from scratch
|
|
34
|
+
* Requirements: 5.2, 5.4 - Create full index rebuild functionality for model changes or document deletions
|
|
35
|
+
*/
|
|
36
|
+
rebuildIndex(newModelVersion?: string): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Trigger a full rebuild when documents are modified or deleted
|
|
39
|
+
* Requirements: 5.4 - When documents are modified or deleted THEN system SHALL trigger full index rebuild
|
|
40
|
+
*/
|
|
41
|
+
triggerRebuildForDocumentChanges(reason: string): Promise<void>;
|
|
42
|
+
/**
|
|
43
|
+
* Complete rebuild workflow with embedding regeneration
|
|
44
|
+
* This method should be called by higher-level components that have access to the EmbeddingEngine
|
|
45
|
+
* Requirements: 5.2, 5.4 - Full index rebuild functionality
|
|
46
|
+
*/
|
|
47
|
+
rebuildWithEmbeddings(embeddingEngine: {
|
|
48
|
+
embedDocumentBatch: (texts: string[]) => Promise<EmbeddingResult[]>;
|
|
49
|
+
getModelVersion: () => string;
|
|
50
|
+
}): Promise<void>;
|
|
51
|
+
/**
|
|
52
|
+
* Check if the current model version matches stored version
|
|
53
|
+
* Requirements: 5.1, 5.2 - Compare current embedding model version with stored version
|
|
54
|
+
*/
|
|
55
|
+
checkModelVersion(currentVersion: string): Promise<boolean>;
|
|
56
|
+
/**
|
|
57
|
+
* Update the stored model version after successful rebuild
|
|
58
|
+
* Requirements: 5.5 - Save model name and hash in SQLite for version tracking
|
|
59
|
+
*/
|
|
60
|
+
updateModelVersion(version: string): Promise<void>;
|
|
61
|
+
/**
|
|
62
|
+
* Validate model version and exit if mismatch detected
|
|
63
|
+
* Requirements: 5.2 - System SHALL exit with error message until full index rebuild is completed
|
|
64
|
+
*/
|
|
65
|
+
validateModelVersionOrExit(currentVersion: string): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Save the vector index to disk
|
|
68
|
+
*/
|
|
69
|
+
saveIndex(): Promise<void>;
|
|
70
|
+
/**
|
|
71
|
+
* Search for similar vectors
|
|
72
|
+
*/
|
|
73
|
+
search(queryVector: Float32Array, k?: number): {
|
|
74
|
+
embeddingIds: string[];
|
|
75
|
+
distances: number[];
|
|
76
|
+
};
|
|
77
|
+
/**
|
|
78
|
+
* Get index statistics
|
|
79
|
+
*/
|
|
80
|
+
getStats(): Promise<IndexStats>;
|
|
81
|
+
/**
|
|
82
|
+
* Get all chunks from database for rebuild (returns chunk data, not embeddings)
|
|
83
|
+
* Note: Embeddings need to be regenerated during rebuild since we don't store vectors in DB
|
|
84
|
+
*/
|
|
85
|
+
private getAllChunksFromDB;
|
|
86
|
+
/**
|
|
87
|
+
* Convert embedding ID string to numeric ID for hnswlib with collision handling
|
|
88
|
+
*/
|
|
89
|
+
private hashEmbeddingId;
|
|
90
|
+
/**
|
|
91
|
+
* Convert numeric ID back to embedding ID using the maintained mapping
|
|
92
|
+
*/
|
|
93
|
+
private unhashEmbeddingId;
|
|
94
|
+
/**
|
|
95
|
+
* Close database connection
|
|
96
|
+
*/
|
|
97
|
+
close(): Promise<void>;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=index-manager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index-manager.d.ts","sourceRoot":"","sources":["../src/index-manager.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAIlD,MAAM,WAAW,UAAU;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,IAAI,CAAC;CACnB;AAED,qBAAa,YAAY;IAS4C,OAAO,CAAC,SAAS,CAAC;IARrF,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,EAAE,CAAmC;IAC7C,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,iBAAiB,CAAkC;IAC3D,OAAO,CAAC,iBAAiB,CAAkC;gBAE/C,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAU,SAAS,CAAC,EAAE,MAAM,YAAA;IAa7F;;;OAGG;IACG,UAAU,CAAC,cAAc,GAAE,OAAe,GAAG,OAAO,CAAC,IAAI,CAAC;IAqChE;;;OAGG;YACW,uBAAuB;IAwDrC;;;OAGG;IACG,UAAU,CAAC,UAAU,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAsC9D;;;OAGG;IACG,YAAY,CAAC,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA2D3D;;;OAGG;IACG,gCAAgC,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKrE;;;;OAIG;IACG,qBAAqB,CACzB,eAAe,EAAE;QAAE,kBAAkB,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC;QAAC,eAAe,EAAE,MAAM,MAAM,CAAA;KAAE,GACtH,OAAO,CAAC,IAAI,CAAC;IAwEhB;;;OAGG;IACG,iBAAiB,CAAC,cAAc,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IA8BjE;;;OAGG;IACG,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAaxD;;;OAGG;IACG,0BAA0B,CAAC,cAAc,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAgBvE;;OAEG;IACG,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;IAQhC;;OAEG;IACH,MAAM,CAAC,WAAW,EAAE,YAAY,EAAE,CAAC,GAAE,MAAU,GAAG;QAAE,YAAY,EAAE,MAAM,EAAE,CAAC;QAAC,SAAS,EAAE,MAAM,EAAE,CAAA;KAAE;IAgBjG;;OAEG;IACG,QAAQ,IAAI,OAAO,CAAC,UAAU,CAAC;IAoBrC;;;OAGG;YACW,kBAAkB;IAoBhC;;OAEG;IACH,OAAO,CAAC,eAAe;IA2BvB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAUzB;;OAEG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAM7B"}
|