rag-lite-ts 2.2.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/cjs/cli/indexer.js +73 -15
- package/dist/cjs/cli/search.js +77 -2
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/cjs/cli.js +53 -7
- package/dist/cjs/core/abstract-generator.d.ts +97 -0
- package/dist/cjs/core/abstract-generator.js +222 -0
- package/dist/cjs/core/binary-index-format.js +53 -10
- package/dist/cjs/core/db.d.ts +56 -0
- package/dist/cjs/core/db.js +105 -0
- package/dist/cjs/core/generator-registry.d.ts +114 -0
- package/dist/cjs/core/generator-registry.js +280 -0
- package/dist/cjs/core/index.d.ts +4 -0
- package/dist/cjs/core/index.js +11 -0
- package/dist/cjs/core/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/cjs/core/lazy-dependency-loader.js +111 -2
- package/dist/cjs/core/prompt-templates.d.ts +138 -0
- package/dist/cjs/core/prompt-templates.js +225 -0
- package/dist/cjs/core/response-generator.d.ts +132 -0
- package/dist/cjs/core/response-generator.js +69 -0
- package/dist/cjs/core/search-pipeline.js +1 -1
- package/dist/cjs/core/search.d.ts +72 -1
- package/dist/cjs/core/search.js +80 -7
- package/dist/cjs/core/types.d.ts +1 -0
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +314 -0
- package/dist/cjs/core/vector-index.d.ts +45 -10
- package/dist/cjs/core/vector-index.js +279 -218
- package/dist/cjs/factories/generator-factory.d.ts +88 -0
- package/dist/cjs/factories/generator-factory.js +151 -0
- package/dist/cjs/factories/index.d.ts +1 -0
- package/dist/cjs/factories/index.js +5 -0
- package/dist/cjs/factories/ingestion-factory.js +3 -7
- package/dist/cjs/factories/search-factory.js +11 -0
- package/dist/cjs/index-manager.d.ts +23 -3
- package/dist/cjs/index-manager.js +84 -15
- package/dist/cjs/index.d.ts +11 -1
- package/dist/cjs/index.js +19 -1
- package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
- package/dist/cjs/text/generators/index.d.ts +10 -0
- package/dist/cjs/text/generators/index.js +10 -0
- package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
- package/dist/cjs/text/generators/instruct-generator.js +192 -0
- package/dist/esm/cli/indexer.js +73 -15
- package/dist/esm/cli/search.js +77 -2
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.js +53 -7
- package/dist/esm/core/abstract-generator.d.ts +97 -0
- package/dist/esm/core/abstract-generator.js +222 -0
- package/dist/esm/core/binary-index-format.js +53 -10
- package/dist/esm/core/db.d.ts +56 -0
- package/dist/esm/core/db.js +105 -0
- package/dist/esm/core/generator-registry.d.ts +114 -0
- package/dist/esm/core/generator-registry.js +280 -0
- package/dist/esm/core/index.d.ts +4 -0
- package/dist/esm/core/index.js +11 -0
- package/dist/esm/core/ingestion.js +3 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/esm/core/lazy-dependency-loader.js +111 -2
- package/dist/esm/core/prompt-templates.d.ts +138 -0
- package/dist/esm/core/prompt-templates.js +225 -0
- package/dist/esm/core/response-generator.d.ts +132 -0
- package/dist/esm/core/response-generator.js +69 -0
- package/dist/esm/core/search-pipeline.js +1 -1
- package/dist/esm/core/search.d.ts +72 -1
- package/dist/esm/core/search.js +80 -7
- package/dist/esm/core/types.d.ts +1 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +314 -0
- package/dist/esm/core/vector-index.d.ts +45 -10
- package/dist/esm/core/vector-index.js +279 -218
- package/dist/esm/factories/generator-factory.d.ts +88 -0
- package/dist/esm/factories/generator-factory.js +151 -0
- package/dist/esm/factories/index.d.ts +1 -0
- package/dist/esm/factories/index.js +5 -0
- package/dist/esm/factories/ingestion-factory.js +3 -7
- package/dist/esm/factories/search-factory.js +11 -0
- package/dist/esm/index-manager.d.ts +23 -3
- package/dist/esm/index-manager.js +84 -15
- package/dist/esm/index.d.ts +11 -1
- package/dist/esm/index.js +19 -1
- package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/esm/text/generators/causal-lm-generator.js +197 -0
- package/dist/esm/text/generators/index.d.ts +10 -0
- package/dist/esm/text/generators/index.js +10 -0
- package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
- package/dist/esm/text/generators/instruct-generator.js +192 -0
- package/package.json +14 -7
package/dist/cjs/cli.js
CHANGED
|
@@ -6,8 +6,18 @@ import { EXIT_CODES, ConfigurationError } from './core/config.js';
|
|
|
6
6
|
// Get package.json for version info
|
|
7
7
|
const __filename = fileURLToPath(import.meta.url);
|
|
8
8
|
const __dirname = dirname(__filename);
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
// When built, CLI is at dist/esm/cli.js, so go up two levels to root
|
|
10
|
+
// When running from source, CLI is at src/cli.ts, so go up one level to root
|
|
11
|
+
const packageJsonPath = join(__dirname, '..', '..', 'package.json');
|
|
12
|
+
let packageJson;
|
|
13
|
+
try {
|
|
14
|
+
packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
// Fallback: try one level up (for source execution)
|
|
18
|
+
const fallbackPath = join(__dirname, '..', 'package.json');
|
|
19
|
+
packageJson = JSON.parse(readFileSync(fallbackPath, 'utf-8'));
|
|
20
|
+
}
|
|
11
21
|
/**
|
|
12
22
|
* Display version information
|
|
13
23
|
*/
|
|
@@ -28,6 +38,7 @@ Usage:
|
|
|
28
38
|
Commands:
|
|
29
39
|
ingest <path> Ingest documents from file or directory
|
|
30
40
|
search <query> Search indexed documents (text or image)
|
|
41
|
+
ui Launch the web interface
|
|
31
42
|
rebuild Rebuild the vector index
|
|
32
43
|
version Show version information
|
|
33
44
|
help Show this help message
|
|
@@ -43,6 +54,8 @@ Examples:
|
|
|
43
54
|
raglite search "red car" --content-type image # Search only image results
|
|
44
55
|
raglite search ./photo.jpg # Search with image (multimodal mode only)
|
|
45
56
|
raglite search ./image.png --top-k 5 # Find similar images
|
|
57
|
+
raglite search "How does auth work?" --generate # [EXPERIMENTAL] Generate AI response
|
|
58
|
+
raglite ui # Launch web interface
|
|
46
59
|
|
|
47
60
|
raglite rebuild # Rebuild the entire index
|
|
48
61
|
|
|
@@ -52,10 +65,17 @@ Options for search:
|
|
|
52
65
|
--no-rerank Disable reranking
|
|
53
66
|
--content-type <type> Filter results by content type: 'text', 'image', or 'all' (default: all)
|
|
54
67
|
|
|
68
|
+
[EXPERIMENTAL] AI Response Generation (text mode only):
|
|
69
|
+
--generate Generate an AI response from search results
|
|
70
|
+
--generator <model> Generator model to use (default: SmolLM2-135M-Instruct)
|
|
71
|
+
--max-tokens <n> Maximum tokens to generate (default: 512)
|
|
72
|
+
--temperature <n> Sampling temperature 0-1 (default: 0.1)
|
|
73
|
+
--max-chunks <n> Maximum chunks for context (default: 3 for 135M, 5 for 360M)
|
|
74
|
+
|
|
55
75
|
Options for ingest:
|
|
56
76
|
--model <name> Use specific embedding model
|
|
57
77
|
--mode <mode> Processing mode: 'text' (default) or 'multimodal'
|
|
58
|
-
--
|
|
78
|
+
--force-rebuild Wipe DB+index and rebuild from scratch (DESTRUCTIVE)
|
|
59
79
|
--path-strategy <strategy> Path storage strategy: 'relative' (default) or 'absolute'
|
|
60
80
|
--path-base <path> Base directory for relative paths (defaults to current directory)
|
|
61
81
|
|
|
@@ -71,6 +91,12 @@ Available reranking strategies (multimodal mode):
|
|
|
71
91
|
text-derived Use image-to-text conversion + cross-encoder (default)
|
|
72
92
|
disabled No reranking, use vector similarity only
|
|
73
93
|
|
|
94
|
+
[EXPERIMENTAL] Available generator models:
|
|
95
|
+
HuggingFaceTB/SmolLM2-135M-Instruct (balanced, recommended default, uses top 3 chunks)
|
|
96
|
+
HuggingFaceTB/SmolLM2-360M-Instruct (higher quality, slower, uses top 5 chunks)
|
|
97
|
+
|
|
98
|
+
Note: Generation requires reranking (--rerank is automatically enabled with --generate)
|
|
99
|
+
|
|
74
100
|
For more information, visit: https://github.com/your-repo/rag-lite-ts
|
|
75
101
|
`);
|
|
76
102
|
}
|
|
@@ -111,8 +137,12 @@ function parseArgs() {
|
|
|
111
137
|
else if (optionName === 'no-rerank') {
|
|
112
138
|
options.rerank = false;
|
|
113
139
|
}
|
|
114
|
-
else if (optionName === 'rebuild
|
|
115
|
-
options.
|
|
140
|
+
else if (optionName === 'force-rebuild') {
|
|
141
|
+
options.forceRebuild = true;
|
|
142
|
+
}
|
|
143
|
+
else if (optionName === 'generate') {
|
|
144
|
+
// Handle --generate flag for experimental response generation
|
|
145
|
+
options.generate = true;
|
|
116
146
|
}
|
|
117
147
|
else if (optionName === 'help') {
|
|
118
148
|
return { command: 'help', args: [], options: {} };
|
|
@@ -124,7 +154,16 @@ function parseArgs() {
|
|
|
124
154
|
// Handle options with values
|
|
125
155
|
const nextArg = args[i + 1];
|
|
126
156
|
if (nextArg && !nextArg.startsWith('--')) {
|
|
127
|
-
|
|
157
|
+
// Parse numeric values for specific options
|
|
158
|
+
if (optionName === 'max-tokens' || optionName === 'top-k' || optionName === 'max-chunks') {
|
|
159
|
+
options[optionName] = parseInt(nextArg, 10);
|
|
160
|
+
}
|
|
161
|
+
else if (optionName === 'temperature') {
|
|
162
|
+
options[optionName] = parseFloat(nextArg);
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
options[optionName] = nextArg;
|
|
166
|
+
}
|
|
128
167
|
i++; // Skip the next argument as it's the value
|
|
129
168
|
}
|
|
130
169
|
else {
|
|
@@ -169,7 +208,7 @@ function validateArgs(command, args, options) {
|
|
|
169
208
|
console.error('Options:');
|
|
170
209
|
console.error(' --model <name> Use specific embedding model');
|
|
171
210
|
console.error(' --mode <mode> Processing mode: text (default) or multimodal');
|
|
172
|
-
console.error(' --rebuild
|
|
211
|
+
console.error(' --force-rebuild Wipe DB+index and rebuild from scratch (DESTRUCTIVE)');
|
|
173
212
|
console.error('');
|
|
174
213
|
console.error('The path can be either a file (.md or .txt) or a directory.');
|
|
175
214
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
@@ -201,6 +240,9 @@ function validateArgs(command, args, options) {
|
|
|
201
240
|
case 'rebuild':
|
|
202
241
|
// No arguments required
|
|
203
242
|
break;
|
|
243
|
+
case 'ui':
|
|
244
|
+
// No arguments required
|
|
245
|
+
break;
|
|
204
246
|
case 'version':
|
|
205
247
|
// No validation needed
|
|
206
248
|
break;
|
|
@@ -412,6 +454,10 @@ async function main() {
|
|
|
412
454
|
const { runRebuild } = await import('./cli/indexer.js');
|
|
413
455
|
await runRebuild();
|
|
414
456
|
break;
|
|
457
|
+
case 'ui':
|
|
458
|
+
const { runUI } = await import('./cli/ui-server.js');
|
|
459
|
+
await runUI(options);
|
|
460
|
+
break;
|
|
415
461
|
default:
|
|
416
462
|
console.error(`Error: Unknown command '${command}'`);
|
|
417
463
|
process.exit(1);
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Abstract Base Generator
|
|
3
|
+
*
|
|
4
|
+
* Provides model-agnostic base functionality for all generator implementations.
|
|
5
|
+
* This is an abstract base class, not a concrete implementation.
|
|
6
|
+
*
|
|
7
|
+
* ARCHITECTURAL NOTE:
|
|
8
|
+
* Similar to BaseUniversalEmbedder, this class provides shared infrastructure:
|
|
9
|
+
* - Model lifecycle management (loading, cleanup, disposal)
|
|
10
|
+
* - Token budget management
|
|
11
|
+
* - Error handling with helpful messages
|
|
12
|
+
* - Common utility methods
|
|
13
|
+
*
|
|
14
|
+
* IMPLEMENTATION LAYERS:
|
|
15
|
+
* - Text: InstructGenerator extends this class (SmolLM2-Instruct)
|
|
16
|
+
* - Text: CausalLMGenerator extends this class (DistilGPT2)
|
|
17
|
+
*
|
|
18
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
19
|
+
*/
|
|
20
|
+
import type { ResponseGenerator, GeneratorModelInfo, GeneratorModelType, GenerationRequest, GenerationResult, GeneratorCreationOptions } from './response-generator.js';
|
|
21
|
+
import { GenerationError } from './response-generator.js';
|
|
22
|
+
/**
|
|
23
|
+
* Abstract base class for response generators
|
|
24
|
+
* Provides common functionality and lifecycle management
|
|
25
|
+
*/
|
|
26
|
+
export declare abstract class BaseResponseGenerator implements ResponseGenerator {
|
|
27
|
+
readonly modelName: string;
|
|
28
|
+
protected _isLoaded: boolean;
|
|
29
|
+
protected _modelInfo: GeneratorModelInfo;
|
|
30
|
+
protected _options: GeneratorCreationOptions;
|
|
31
|
+
constructor(modelName: string, options?: GeneratorCreationOptions);
|
|
32
|
+
get modelType(): GeneratorModelType;
|
|
33
|
+
get maxContextLength(): number;
|
|
34
|
+
get maxOutputLength(): number;
|
|
35
|
+
isLoaded(): boolean;
|
|
36
|
+
getModelInfo(): GeneratorModelInfo;
|
|
37
|
+
/**
|
|
38
|
+
* Load the model - must be implemented by subclasses
|
|
39
|
+
*/
|
|
40
|
+
abstract loadModel(): Promise<void>;
|
|
41
|
+
/**
|
|
42
|
+
* Generate text using the model - must be implemented by subclasses
|
|
43
|
+
* @param prompt - The formatted prompt string
|
|
44
|
+
* @param options - Generation options
|
|
45
|
+
* @returns Generated text
|
|
46
|
+
*/
|
|
47
|
+
protected abstract generateText(prompt: string, options: {
|
|
48
|
+
maxTokens: number;
|
|
49
|
+
temperature: number;
|
|
50
|
+
topP: number;
|
|
51
|
+
topK: number;
|
|
52
|
+
repetitionPenalty: number;
|
|
53
|
+
stopSequences: string[];
|
|
54
|
+
}): Promise<{
|
|
55
|
+
text: string;
|
|
56
|
+
promptTokens: number;
|
|
57
|
+
completionTokens: number;
|
|
58
|
+
finishReason: 'complete' | 'length' | 'stop_sequence' | 'error';
|
|
59
|
+
}>;
|
|
60
|
+
/**
|
|
61
|
+
* Clean up resources - must be implemented by subclasses
|
|
62
|
+
*/
|
|
63
|
+
abstract cleanup(): Promise<void>;
|
|
64
|
+
/**
|
|
65
|
+
* Generate a response based on query and retrieved chunks
|
|
66
|
+
* This method orchestrates the generation pipeline
|
|
67
|
+
*/
|
|
68
|
+
generate(request: GenerationRequest): Promise<GenerationResult>;
|
|
69
|
+
/**
|
|
70
|
+
* Validate that the model is loaded before operations
|
|
71
|
+
*/
|
|
72
|
+
protected ensureLoaded(): void;
|
|
73
|
+
/**
|
|
74
|
+
* Clean up response text by removing artifacts
|
|
75
|
+
*/
|
|
76
|
+
protected cleanResponseText(text: string): string;
|
|
77
|
+
/**
|
|
78
|
+
* Log model loading progress
|
|
79
|
+
*/
|
|
80
|
+
protected logModelLoading(stage: string, details?: string): void;
|
|
81
|
+
/**
|
|
82
|
+
* Handle model loading errors with helpful messages
|
|
83
|
+
*/
|
|
84
|
+
protected handleLoadingError(error: Error): GenerationError;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Extended options for generator instances
|
|
88
|
+
*/
|
|
89
|
+
export interface GeneratorOptions extends GeneratorCreationOptions {
|
|
90
|
+
/** Log level for debugging */
|
|
91
|
+
logLevel?: 'debug' | 'info' | 'warn' | 'error' | 'silent';
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Create generator options with defaults
|
|
95
|
+
*/
|
|
96
|
+
export declare function createGeneratorOptions(options?: Partial<GeneratorOptions>): GeneratorOptions;
|
|
97
|
+
//# sourceMappingURL=abstract-generator.d.ts.map
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Abstract Base Generator
|
|
3
|
+
*
|
|
4
|
+
* Provides model-agnostic base functionality for all generator implementations.
|
|
5
|
+
* This is an abstract base class, not a concrete implementation.
|
|
6
|
+
*
|
|
7
|
+
* ARCHITECTURAL NOTE:
|
|
8
|
+
* Similar to BaseUniversalEmbedder, this class provides shared infrastructure:
|
|
9
|
+
* - Model lifecycle management (loading, cleanup, disposal)
|
|
10
|
+
* - Token budget management
|
|
11
|
+
* - Error handling with helpful messages
|
|
12
|
+
* - Common utility methods
|
|
13
|
+
*
|
|
14
|
+
* IMPLEMENTATION LAYERS:
|
|
15
|
+
* - Text: InstructGenerator extends this class (SmolLM2-Instruct)
|
|
16
|
+
* - Text: CausalLMGenerator extends this class (DistilGPT2)
|
|
17
|
+
*
|
|
18
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
19
|
+
*/
|
|
20
|
+
import { GenerationError } from './response-generator.js';
|
|
21
|
+
import { GeneratorRegistry } from './generator-registry.js';
|
|
22
|
+
import { buildPrompt, getDefaultStopSequences } from './prompt-templates.js';
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// BASE GENERATOR ABSTRACT CLASS
|
|
25
|
+
// =============================================================================
|
|
26
|
+
/**
|
|
27
|
+
* Abstract base class for response generators
|
|
28
|
+
* Provides common functionality and lifecycle management
|
|
29
|
+
*/
|
|
30
|
+
export class BaseResponseGenerator {
|
|
31
|
+
modelName;
|
|
32
|
+
_isLoaded = false;
|
|
33
|
+
_modelInfo;
|
|
34
|
+
_options;
|
|
35
|
+
constructor(modelName, options = {}) {
|
|
36
|
+
this.modelName = modelName;
|
|
37
|
+
const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
|
|
38
|
+
if (!modelInfo) {
|
|
39
|
+
throw new Error(`Generator model '${modelName}' is not supported. ` +
|
|
40
|
+
`Supported models: ${GeneratorRegistry.getSupportedGenerators().join(', ')}`);
|
|
41
|
+
}
|
|
42
|
+
this._modelInfo = modelInfo;
|
|
43
|
+
this._options = options;
|
|
44
|
+
}
|
|
45
|
+
// =============================================================================
|
|
46
|
+
// PUBLIC INTERFACE IMPLEMENTATION
|
|
47
|
+
// =============================================================================
|
|
48
|
+
get modelType() {
|
|
49
|
+
return this._modelInfo.type;
|
|
50
|
+
}
|
|
51
|
+
get maxContextLength() {
|
|
52
|
+
return this._modelInfo.capabilities.maxContextLength;
|
|
53
|
+
}
|
|
54
|
+
get maxOutputLength() {
|
|
55
|
+
return this._modelInfo.capabilities.defaultMaxOutputTokens;
|
|
56
|
+
}
|
|
57
|
+
isLoaded() {
|
|
58
|
+
return this._isLoaded;
|
|
59
|
+
}
|
|
60
|
+
getModelInfo() {
|
|
61
|
+
return { ...this._modelInfo }; // Return a copy to prevent mutation
|
|
62
|
+
}
|
|
63
|
+
// =============================================================================
|
|
64
|
+
// DEFAULT IMPLEMENTATION
|
|
65
|
+
// =============================================================================
|
|
66
|
+
/**
|
|
67
|
+
* Generate a response based on query and retrieved chunks
|
|
68
|
+
* This method orchestrates the generation pipeline
|
|
69
|
+
*/
|
|
70
|
+
async generate(request) {
|
|
71
|
+
if (!this._isLoaded) {
|
|
72
|
+
await this.loadModel();
|
|
73
|
+
}
|
|
74
|
+
const startTime = Date.now();
|
|
75
|
+
try {
|
|
76
|
+
// Get generation parameters with defaults
|
|
77
|
+
const maxTokens = request.maxTokens ?? this._modelInfo.capabilities.defaultMaxOutputTokens;
|
|
78
|
+
const temperature = request.temperature ?? this._modelInfo.capabilities.recommendedTemperature;
|
|
79
|
+
const topP = request.topP ?? 0.9;
|
|
80
|
+
const topK = request.topK ?? 50;
|
|
81
|
+
const repetitionPenalty = request.repetitionPenalty ?? 1.1;
|
|
82
|
+
const stopSequences = request.stopSequences ?? getDefaultStopSequences(this.modelType);
|
|
83
|
+
// Get max chunks for context (configurable, with model-specific default)
|
|
84
|
+
const maxChunksForContext = request.maxChunksForContext ??
|
|
85
|
+
this._modelInfo.capabilities.defaultMaxChunksForContext;
|
|
86
|
+
// Limit chunks to maxChunksForContext (assumes chunks are already reranked)
|
|
87
|
+
const totalChunks = request.chunks.length;
|
|
88
|
+
const limitedChunks = request.chunks.slice(0, maxChunksForContext);
|
|
89
|
+
if (totalChunks > maxChunksForContext) {
|
|
90
|
+
console.log(`📊 Using top ${maxChunksForContext} of ${totalChunks} reranked chunks for generation`);
|
|
91
|
+
}
|
|
92
|
+
// Build the prompt with context
|
|
93
|
+
const builtPrompt = buildPrompt({
|
|
94
|
+
query: request.query,
|
|
95
|
+
chunks: limitedChunks,
|
|
96
|
+
modelType: this.modelType,
|
|
97
|
+
systemPrompt: request.systemPrompt,
|
|
98
|
+
maxContextLength: this.maxContextLength,
|
|
99
|
+
reservedOutputTokens: maxTokens,
|
|
100
|
+
includeSourceAttribution: request.includeSourceAttribution
|
|
101
|
+
});
|
|
102
|
+
// Log context info
|
|
103
|
+
if (builtPrompt.contextInfo.truncated) {
|
|
104
|
+
console.warn(`⚠️ Context truncated: Only ${builtPrompt.contextInfo.chunksIncluded} of ` +
|
|
105
|
+
`${builtPrompt.contextInfo.totalChunks} chunks fit in context window`);
|
|
106
|
+
}
|
|
107
|
+
// Generate response
|
|
108
|
+
const result = await this.generateText(builtPrompt.prompt, {
|
|
109
|
+
maxTokens,
|
|
110
|
+
temperature,
|
|
111
|
+
topP,
|
|
112
|
+
topK,
|
|
113
|
+
repetitionPenalty,
|
|
114
|
+
stopSequences
|
|
115
|
+
});
|
|
116
|
+
const generationTimeMs = Date.now() - startTime;
|
|
117
|
+
// Clean up the response text
|
|
118
|
+
const cleanedResponse = this.cleanResponseText(result.text);
|
|
119
|
+
return {
|
|
120
|
+
response: cleanedResponse,
|
|
121
|
+
tokensUsed: result.promptTokens + result.completionTokens,
|
|
122
|
+
truncated: builtPrompt.contextInfo.truncated,
|
|
123
|
+
modelName: this.modelName,
|
|
124
|
+
generationTimeMs,
|
|
125
|
+
metadata: {
|
|
126
|
+
promptTokens: result.promptTokens,
|
|
127
|
+
completionTokens: result.completionTokens,
|
|
128
|
+
chunksIncluded: builtPrompt.contextInfo.chunksIncluded,
|
|
129
|
+
totalChunks: totalChunks, // Report original total, not limited
|
|
130
|
+
finishReason: result.finishReason
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
catch (error) {
|
|
135
|
+
const generationTimeMs = Date.now() - startTime;
|
|
136
|
+
if (error instanceof GenerationError) {
|
|
137
|
+
throw error;
|
|
138
|
+
}
|
|
139
|
+
throw new GenerationError(this.modelName, 'generation', `Generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// =============================================================================
|
|
143
|
+
// PROTECTED HELPER METHODS
|
|
144
|
+
// =============================================================================
|
|
145
|
+
/**
|
|
146
|
+
* Validate that the model is loaded before operations
|
|
147
|
+
*/
|
|
148
|
+
ensureLoaded() {
|
|
149
|
+
if (!this._isLoaded) {
|
|
150
|
+
throw new GenerationError(this.modelName, 'generation', `Model '${this.modelName}' is not loaded. Call loadModel() first.`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Clean up response text by removing artifacts
|
|
155
|
+
*/
|
|
156
|
+
cleanResponseText(text) {
|
|
157
|
+
let cleaned = text.trim();
|
|
158
|
+
// Remove common artifacts
|
|
159
|
+
const artifactsToRemove = [
|
|
160
|
+
'<|im_end|>',
|
|
161
|
+
'<|im_start|>',
|
|
162
|
+
'<|endoftext|>',
|
|
163
|
+
'<|assistant|>',
|
|
164
|
+
'<|user|>',
|
|
165
|
+
'<|system|>'
|
|
166
|
+
];
|
|
167
|
+
for (const artifact of artifactsToRemove) {
|
|
168
|
+
cleaned = cleaned.split(artifact)[0];
|
|
169
|
+
}
|
|
170
|
+
// Remove trailing incomplete sentences (if cut off at max tokens)
|
|
171
|
+
if (cleaned.length > 0 && !cleaned.match(/[.!?]$/)) {
|
|
172
|
+
const lastSentenceEnd = Math.max(cleaned.lastIndexOf('.'), cleaned.lastIndexOf('!'), cleaned.lastIndexOf('?'));
|
|
173
|
+
if (lastSentenceEnd > cleaned.length * 0.5) {
|
|
174
|
+
cleaned = cleaned.substring(0, lastSentenceEnd + 1);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return cleaned.trim();
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Log model loading progress
|
|
181
|
+
*/
|
|
182
|
+
logModelLoading(stage, details) {
|
|
183
|
+
const message = `[${this.modelName}] ${stage}`;
|
|
184
|
+
if (details) {
|
|
185
|
+
console.log(`${message}: ${details}`);
|
|
186
|
+
}
|
|
187
|
+
else {
|
|
188
|
+
console.log(message);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Handle model loading errors with helpful messages
|
|
193
|
+
*/
|
|
194
|
+
handleLoadingError(error) {
|
|
195
|
+
const baseMessage = `Failed to load generator model '${this.modelName}': ${error.message}`;
|
|
196
|
+
// Provide specific guidance based on error type
|
|
197
|
+
if (error.message.includes('network') || error.message.includes('fetch')) {
|
|
198
|
+
return new GenerationError(this.modelName, 'loading', `${baseMessage}\n` +
|
|
199
|
+
`This appears to be a network error. Please check your internet connection ` +
|
|
200
|
+
`and ensure the model repository is accessible.`, error);
|
|
201
|
+
}
|
|
202
|
+
if (error.message.includes('memory') || error.message.includes('OOM')) {
|
|
203
|
+
return new GenerationError(this.modelName, 'loading', `${baseMessage}\n` +
|
|
204
|
+
`This appears to be a memory error. The model requires ` +
|
|
205
|
+
`${this._modelInfo.requirements.minimumMemory}MB. Try closing other applications ` +
|
|
206
|
+
`or using a smaller model like 'Xenova/distilgpt2'.`, error);
|
|
207
|
+
}
|
|
208
|
+
return new GenerationError(this.modelName, 'loading', baseMessage, error);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Create generator options with defaults
|
|
213
|
+
*/
|
|
214
|
+
export function createGeneratorOptions(options = {}) {
|
|
215
|
+
return {
|
|
216
|
+
timeout: 60000, // 60 seconds
|
|
217
|
+
enableGPU: false,
|
|
218
|
+
logLevel: 'info',
|
|
219
|
+
...options
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
//# sourceMappingURL=abstract-generator.js.map
|
|
@@ -27,10 +27,12 @@ export class BinaryIndexFormat {
|
|
|
27
27
|
* @param data Index data to serialize
|
|
28
28
|
*/
|
|
29
29
|
static async save(indexPath, data) {
|
|
30
|
-
//
|
|
30
|
+
// Use actual vector count to ensure accurate file size
|
|
31
|
+
const actualVectorCount = data.vectors.length;
|
|
32
|
+
// Calculate total size based on actual vectors
|
|
31
33
|
const headerSize = 24; // 6 uint32 fields
|
|
32
34
|
const vectorSize = 4 + (data.dimensions * 4); // id + vector
|
|
33
|
-
const totalSize = headerSize + (
|
|
35
|
+
const totalSize = headerSize + (actualVectorCount * vectorSize);
|
|
34
36
|
const buffer = new ArrayBuffer(totalSize);
|
|
35
37
|
const view = new DataView(buffer);
|
|
36
38
|
let offset = 0;
|
|
@@ -45,7 +47,8 @@ export class BinaryIndexFormat {
|
|
|
45
47
|
offset += 4;
|
|
46
48
|
view.setUint32(offset, data.seed, true);
|
|
47
49
|
offset += 4;
|
|
48
|
-
|
|
50
|
+
// Write actual vector count in header
|
|
51
|
+
view.setUint32(offset, actualVectorCount, true);
|
|
49
52
|
offset += 4;
|
|
50
53
|
// Write vectors
|
|
51
54
|
for (const item of data.vectors) {
|
|
@@ -187,6 +190,9 @@ export class BinaryIndexFormat {
|
|
|
187
190
|
const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
|
|
188
191
|
let offset = 0;
|
|
189
192
|
// Read basic header (24 bytes, all little-endian)
|
|
193
|
+
if (buffer.byteLength < 24) {
|
|
194
|
+
throw new Error(`Index file too small: expected at least 24 bytes, got ${buffer.byteLength}`);
|
|
195
|
+
}
|
|
190
196
|
const dimensions = view.getUint32(offset, true);
|
|
191
197
|
offset += 4;
|
|
192
198
|
const maxElements = view.getUint32(offset, true);
|
|
@@ -199,10 +205,20 @@ export class BinaryIndexFormat {
|
|
|
199
205
|
offset += 4;
|
|
200
206
|
const currentSize = view.getUint32(offset, true);
|
|
201
207
|
offset += 4;
|
|
202
|
-
//
|
|
203
|
-
const
|
|
204
|
-
|
|
205
|
-
|
|
208
|
+
// Calculate expected size for original format
|
|
209
|
+
const vectorSize = 4 + (dimensions * 4); // id + vector
|
|
210
|
+
const expectedOriginalSize = 24 + (currentSize * vectorSize);
|
|
211
|
+
// Check if this is the extended grouped format (44 bytes header)
|
|
212
|
+
// Extended header has: 24 bytes basic + 4 bytes hasGroups + 16 bytes for offsets/counts = 44 bytes
|
|
213
|
+
// Only check for grouped format if file is larger than expected original format size
|
|
214
|
+
const hasGroups = buffer.byteLength > expectedOriginalSize && buffer.byteLength >= 44 && offset + 4 <= buffer.byteLength
|
|
215
|
+
? view.getUint32(offset, true)
|
|
216
|
+
: 0;
|
|
217
|
+
if (hasGroups === 1 && buffer.byteLength >= 44) {
|
|
218
|
+
// Load grouped format - ensure we have enough bytes for extended header
|
|
219
|
+
if (offset + 20 > buffer.byteLength) {
|
|
220
|
+
throw new Error(`Index file too small for grouped format: expected at least ${offset + 20} bytes, got ${buffer.byteLength}`);
|
|
221
|
+
}
|
|
206
222
|
const textOffset = view.getUint32(offset + 4, true);
|
|
207
223
|
const textCount = view.getUint32(offset + 8, true);
|
|
208
224
|
const imageOffset = view.getUint32(offset + 12, true);
|
|
@@ -215,14 +231,23 @@ export class BinaryIndexFormat {
|
|
|
215
231
|
if (offset % 4 !== 0) {
|
|
216
232
|
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
217
233
|
}
|
|
234
|
+
// Check bounds before reading vector ID
|
|
235
|
+
if (offset + 4 > buffer.byteLength) {
|
|
236
|
+
throw new Error(`Text vector ID at offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
|
|
237
|
+
}
|
|
218
238
|
// Read vector ID
|
|
219
239
|
const id = view.getUint32(offset, true);
|
|
220
240
|
offset += 4;
|
|
241
|
+
// Check bounds before reading vector data
|
|
242
|
+
const vectorDataSize = dimensions * 4;
|
|
243
|
+
if (offset + vectorDataSize > buffer.byteLength) {
|
|
244
|
+
throw new Error(`Text vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
|
|
245
|
+
}
|
|
221
246
|
// Zero-copy Float32Array view
|
|
222
247
|
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
223
248
|
// Copy to avoid buffer lifecycle issues
|
|
224
249
|
const vector = new Float32Array(vectorView);
|
|
225
|
-
offset +=
|
|
250
|
+
offset += vectorDataSize;
|
|
226
251
|
textVectors.push({ id, vector });
|
|
227
252
|
}
|
|
228
253
|
// Load image vectors
|
|
@@ -233,14 +258,23 @@ export class BinaryIndexFormat {
|
|
|
233
258
|
if (offset % 4 !== 0) {
|
|
234
259
|
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
235
260
|
}
|
|
261
|
+
// Check bounds before reading vector ID
|
|
262
|
+
if (offset + 4 > buffer.byteLength) {
|
|
263
|
+
throw new Error(`Image vector ID at offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
|
|
264
|
+
}
|
|
236
265
|
// Read vector ID
|
|
237
266
|
const id = view.getUint32(offset, true);
|
|
238
267
|
offset += 4;
|
|
268
|
+
// Check bounds before reading vector data
|
|
269
|
+
const vectorDataSize = dimensions * 4;
|
|
270
|
+
if (offset + vectorDataSize > buffer.byteLength) {
|
|
271
|
+
throw new Error(`Image vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
|
|
272
|
+
}
|
|
239
273
|
// Zero-copy Float32Array view
|
|
240
274
|
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
241
275
|
// Copy to avoid buffer lifecycle issues
|
|
242
276
|
const vector = new Float32Array(vectorView);
|
|
243
|
-
offset +=
|
|
277
|
+
offset += vectorDataSize;
|
|
244
278
|
imageVectors.push({ id, vector });
|
|
245
279
|
}
|
|
246
280
|
// Combine all vectors for backward compatibility
|
|
@@ -266,14 +300,23 @@ export class BinaryIndexFormat {
|
|
|
266
300
|
if (offset % 4 !== 0) {
|
|
267
301
|
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
268
302
|
}
|
|
303
|
+
// Check bounds before reading vector ID
|
|
304
|
+
if (offset + 4 > buffer.byteLength) {
|
|
305
|
+
throw new Error(`Offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
|
|
306
|
+
}
|
|
269
307
|
// Read vector ID
|
|
270
308
|
const id = view.getUint32(offset, true);
|
|
271
309
|
offset += 4;
|
|
310
|
+
// Check bounds before reading vector data
|
|
311
|
+
const vectorDataSize = dimensions * 4;
|
|
312
|
+
if (offset + vectorDataSize > buffer.byteLength) {
|
|
313
|
+
throw new Error(`Vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
|
|
314
|
+
}
|
|
272
315
|
// Zero-copy Float32Array view (fast!)
|
|
273
316
|
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
274
317
|
// Copy to avoid buffer lifecycle issues
|
|
275
318
|
const vector = new Float32Array(vectorView);
|
|
276
|
-
offset +=
|
|
319
|
+
offset += vectorDataSize;
|
|
277
320
|
vectors.push({ id, vector });
|
|
278
321
|
}
|
|
279
322
|
return {
|
package/dist/cjs/core/db.d.ts
CHANGED
|
@@ -210,4 +210,60 @@ export declare function updateStorageStats(connection: DatabaseConnection, stats
|
|
|
210
210
|
filesystemRefs?: number;
|
|
211
211
|
lastCleanup?: Date;
|
|
212
212
|
}): Promise<void>;
|
|
213
|
+
/**
|
|
214
|
+
* Result of a database reset operation
|
|
215
|
+
*/
|
|
216
|
+
export interface DatabaseResetResult {
|
|
217
|
+
/** Whether the reset was successful */
|
|
218
|
+
success: boolean;
|
|
219
|
+
/** Number of documents deleted */
|
|
220
|
+
documentsDeleted: number;
|
|
221
|
+
/** Number of chunks deleted */
|
|
222
|
+
chunksDeleted: number;
|
|
223
|
+
/** Number of content metadata entries deleted */
|
|
224
|
+
contentMetadataDeleted: number;
|
|
225
|
+
/** Whether system_info was preserved or cleared */
|
|
226
|
+
systemInfoCleared: boolean;
|
|
227
|
+
/** Time taken for the reset operation in milliseconds */
|
|
228
|
+
resetTimeMs: number;
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Options for database reset operation
|
|
232
|
+
*/
|
|
233
|
+
export interface DatabaseResetOptions {
|
|
234
|
+
/** Whether to preserve system_info (mode, model configuration) - default: false */
|
|
235
|
+
preserveSystemInfo?: boolean;
|
|
236
|
+
/** Whether to run VACUUM after deletion to reclaim space - default: true */
|
|
237
|
+
runVacuum?: boolean;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Reset the database by deleting all data while keeping the schema intact.
|
|
241
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
242
|
+
*
|
|
243
|
+
* This function:
|
|
244
|
+
* 1. Deletes all rows from chunks, documents, content_metadata tables
|
|
245
|
+
* 2. Optionally clears system_info (mode/model configuration)
|
|
246
|
+
* 3. Resets storage_stats counters
|
|
247
|
+
* 4. Optionally runs VACUUM to reclaim disk space
|
|
248
|
+
*
|
|
249
|
+
* @param connection - Database connection object
|
|
250
|
+
* @param options - Reset options
|
|
251
|
+
* @returns Promise resolving to reset result statistics
|
|
252
|
+
*
|
|
253
|
+
* @example
|
|
254
|
+
* ```typescript
|
|
255
|
+
* const db = await openDatabase('./db.sqlite');
|
|
256
|
+
* const result = await resetDatabase(db, { preserveSystemInfo: false });
|
|
257
|
+
* console.log(`Deleted ${result.documentsDeleted} documents and ${result.chunksDeleted} chunks`);
|
|
258
|
+
* ```
|
|
259
|
+
*/
|
|
260
|
+
export declare function resetDatabase(connection: DatabaseConnection, options?: DatabaseResetOptions): Promise<DatabaseResetResult>;
|
|
261
|
+
/**
|
|
262
|
+
* Check if the database has any data (documents, chunks, or content)
|
|
263
|
+
* Useful for determining if a reset is needed
|
|
264
|
+
*
|
|
265
|
+
* @param connection - Database connection object
|
|
266
|
+
* @returns Promise resolving to true if database has data, false if empty
|
|
267
|
+
*/
|
|
268
|
+
export declare function hasDatabaseData(connection: DatabaseConnection): Promise<boolean>;
|
|
213
269
|
//# sourceMappingURL=db.d.ts.map
|