rag-lite-ts 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/README.md +88 -5
  2. package/dist/cjs/cli/indexer.js +73 -15
  3. package/dist/cjs/cli/search.js +77 -2
  4. package/dist/cjs/cli/ui-server.d.ts +5 -0
  5. package/dist/cjs/cli/ui-server.js +152 -0
  6. package/dist/cjs/cli.js +53 -7
  7. package/dist/cjs/core/abstract-generator.d.ts +97 -0
  8. package/dist/cjs/core/abstract-generator.js +222 -0
  9. package/dist/cjs/core/binary-index-format.js +53 -10
  10. package/dist/cjs/core/db.d.ts +56 -0
  11. package/dist/cjs/core/db.js +105 -0
  12. package/dist/cjs/core/generator-registry.d.ts +114 -0
  13. package/dist/cjs/core/generator-registry.js +280 -0
  14. package/dist/cjs/core/index.d.ts +4 -0
  15. package/dist/cjs/core/index.js +11 -0
  16. package/dist/cjs/core/ingestion.js +3 -0
  17. package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
  18. package/dist/cjs/core/knowledge-base-manager.js +256 -0
  19. package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
  20. package/dist/cjs/core/lazy-dependency-loader.js +111 -2
  21. package/dist/cjs/core/prompt-templates.d.ts +138 -0
  22. package/dist/cjs/core/prompt-templates.js +225 -0
  23. package/dist/cjs/core/response-generator.d.ts +132 -0
  24. package/dist/cjs/core/response-generator.js +69 -0
  25. package/dist/cjs/core/search-pipeline.js +1 -1
  26. package/dist/cjs/core/search.d.ts +72 -1
  27. package/dist/cjs/core/search.js +80 -7
  28. package/dist/cjs/core/types.d.ts +1 -0
  29. package/dist/cjs/core/vector-index-messages.d.ts +52 -0
  30. package/dist/cjs/core/vector-index-messages.js +5 -0
  31. package/dist/cjs/core/vector-index-worker.d.ts +6 -0
  32. package/dist/cjs/core/vector-index-worker.js +314 -0
  33. package/dist/cjs/core/vector-index.d.ts +45 -10
  34. package/dist/cjs/core/vector-index.js +279 -218
  35. package/dist/cjs/factories/generator-factory.d.ts +88 -0
  36. package/dist/cjs/factories/generator-factory.js +151 -0
  37. package/dist/cjs/factories/index.d.ts +1 -0
  38. package/dist/cjs/factories/index.js +5 -0
  39. package/dist/cjs/factories/ingestion-factory.js +3 -7
  40. package/dist/cjs/factories/search-factory.js +11 -0
  41. package/dist/cjs/index-manager.d.ts +23 -3
  42. package/dist/cjs/index-manager.js +84 -15
  43. package/dist/cjs/index.d.ts +11 -1
  44. package/dist/cjs/index.js +19 -1
  45. package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
  46. package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
  47. package/dist/cjs/text/generators/index.d.ts +10 -0
  48. package/dist/cjs/text/generators/index.js +10 -0
  49. package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
  50. package/dist/cjs/text/generators/instruct-generator.js +192 -0
  51. package/dist/esm/cli/indexer.js +73 -15
  52. package/dist/esm/cli/search.js +77 -2
  53. package/dist/esm/cli/ui-server.d.ts +5 -0
  54. package/dist/esm/cli/ui-server.js +152 -0
  55. package/dist/esm/cli.js +53 -7
  56. package/dist/esm/core/abstract-generator.d.ts +97 -0
  57. package/dist/esm/core/abstract-generator.js +222 -0
  58. package/dist/esm/core/binary-index-format.js +53 -10
  59. package/dist/esm/core/db.d.ts +56 -0
  60. package/dist/esm/core/db.js +105 -0
  61. package/dist/esm/core/generator-registry.d.ts +114 -0
  62. package/dist/esm/core/generator-registry.js +280 -0
  63. package/dist/esm/core/index.d.ts +4 -0
  64. package/dist/esm/core/index.js +11 -0
  65. package/dist/esm/core/ingestion.js +3 -0
  66. package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
  67. package/dist/esm/core/knowledge-base-manager.js +256 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +111 -2
  70. package/dist/esm/core/prompt-templates.d.ts +138 -0
  71. package/dist/esm/core/prompt-templates.js +225 -0
  72. package/dist/esm/core/response-generator.d.ts +132 -0
  73. package/dist/esm/core/response-generator.js +69 -0
  74. package/dist/esm/core/search-pipeline.js +1 -1
  75. package/dist/esm/core/search.d.ts +72 -1
  76. package/dist/esm/core/search.js +80 -7
  77. package/dist/esm/core/types.d.ts +1 -0
  78. package/dist/esm/core/vector-index-messages.d.ts +52 -0
  79. package/dist/esm/core/vector-index-messages.js +5 -0
  80. package/dist/esm/core/vector-index-worker.d.ts +6 -0
  81. package/dist/esm/core/vector-index-worker.js +314 -0
  82. package/dist/esm/core/vector-index.d.ts +45 -10
  83. package/dist/esm/core/vector-index.js +279 -218
  84. package/dist/esm/factories/generator-factory.d.ts +88 -0
  85. package/dist/esm/factories/generator-factory.js +151 -0
  86. package/dist/esm/factories/index.d.ts +1 -0
  87. package/dist/esm/factories/index.js +5 -0
  88. package/dist/esm/factories/ingestion-factory.js +3 -7
  89. package/dist/esm/factories/search-factory.js +11 -0
  90. package/dist/esm/index-manager.d.ts +23 -3
  91. package/dist/esm/index-manager.js +84 -15
  92. package/dist/esm/index.d.ts +11 -1
  93. package/dist/esm/index.js +19 -1
  94. package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
  95. package/dist/esm/text/generators/causal-lm-generator.js +197 -0
  96. package/dist/esm/text/generators/index.d.ts +10 -0
  97. package/dist/esm/text/generators/index.js +10 -0
  98. package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
  99. package/dist/esm/text/generators/instruct-generator.js +192 -0
  100. package/package.json +14 -7
@@ -0,0 +1,280 @@
1
+ /**
2
+ * CORE MODULE — Generator Registry for RAG Response Generation
3
+ *
4
+ * Centralized registry of supported generator models with validation and
5
+ * compatibility checking. Follows the same patterns as model-registry.ts.
6
+ *
7
+ * SUPPORTED MODELS:
8
+ * - HuggingFaceTB/SmolLM2-135M-Instruct: Balanced instruct model (DEFAULT, 3 chunks)
9
+ * - HuggingFaceTB/SmolLM2-360M-Instruct: Higher quality instruct model (5 chunks)
10
+ *
11
+ * PREREQUISITES:
12
+ * - Reranking must be enabled for response generation to ensure quality context
13
+ *
14
+ * @experimental This feature is experimental and may change in future versions.
15
+ */
16
+ // =============================================================================
17
+ // GENERATOR REGISTRY
18
+ // =============================================================================
19
+ /**
20
+ * Registry of supported generator models with their metadata and capabilities
21
+ */
22
+ export const SUPPORTED_GENERATORS = {
23
+ // SmolLM2-135M-Instruct - Balanced instruction-tuned model (RECOMMENDED DEFAULT)
24
+ 'HuggingFaceTB/SmolLM2-135M-Instruct': {
25
+ name: 'HuggingFaceTB/SmolLM2-135M-Instruct',
26
+ type: 'instruct',
27
+ version: '1.0.0',
28
+ description: 'Balanced instruction-tuned model with good quality and speed (uses top 3 chunks)',
29
+ capabilities: {
30
+ supportsStreaming: true,
31
+ supportsSystemPrompt: true, // Instruct models support system prompts
32
+ instructionTuned: true,
33
+ maxContextLength: 2048,
34
+ defaultMaxOutputTokens: 512,
35
+ recommendedTemperature: 0.1,
36
+ defaultMaxChunksForContext: 3 // Use top 3 reranked chunks for context
37
+ },
38
+ requirements: {
39
+ transformersJsVersion: '>=3.0.0',
40
+ minimumMemory: 768,
41
+ requiredFeatures: ['text-generation'],
42
+ platformSupport: ['node', 'browser']
43
+ },
44
+ isDefault: true // Recommended default model
45
+ },
46
+ // SmolLM2-360M-Instruct - Higher quality instruction-tuned model
47
+ 'HuggingFaceTB/SmolLM2-360M-Instruct': {
48
+ name: 'HuggingFaceTB/SmolLM2-360M-Instruct',
49
+ type: 'instruct',
50
+ version: '1.0.0',
51
+ description: 'Higher quality instruction-tuned model, slower but more accurate (uses top 5 chunks)',
52
+ capabilities: {
53
+ supportsStreaming: true,
54
+ supportsSystemPrompt: true,
55
+ instructionTuned: true,
56
+ maxContextLength: 2048,
57
+ defaultMaxOutputTokens: 512,
58
+ recommendedTemperature: 0.1,
59
+ defaultMaxChunksForContext: 5 // Use top 5 reranked chunks for context
60
+ },
61
+ requirements: {
62
+ transformersJsVersion: '>=3.0.0',
63
+ minimumMemory: 1024,
64
+ requiredFeatures: ['text-generation'],
65
+ platformSupport: ['node', 'browser']
66
+ },
67
+ isDefault: false
68
+ }
69
+ };
70
+ // =============================================================================
71
+ // DEFAULT MODEL
72
+ // =============================================================================
73
+ /** Default generator model name */
74
+ export const DEFAULT_GENERATOR_MODEL = 'HuggingFaceTB/SmolLM2-135M-Instruct';
75
+ // =============================================================================
76
+ // GENERATOR REGISTRY CLASS
77
+ // =============================================================================
78
+ /**
79
+ * Generator registry class providing validation and model information services
80
+ */
81
+ export class GeneratorRegistry {
82
+ /**
83
+ * Gets generator model information for a given model name
84
+ * @param modelName - Name of the generator model
85
+ * @returns Generator model information or null if not supported
86
+ */
87
+ static getGeneratorInfo(modelName) {
88
+ return SUPPORTED_GENERATORS[modelName] || null;
89
+ }
90
+ /**
91
+ * Validates a generator model name and returns compatibility information
92
+ * @param modelName - Name of the model to validate
93
+ * @returns Validation result with errors, warnings, and suggestions
94
+ */
95
+ static validateGenerator(modelName) {
96
+ const modelInfo = this.getGeneratorInfo(modelName);
97
+ if (!modelInfo) {
98
+ const suggestions = this.getSimilarGenerators(modelName);
99
+ return {
100
+ isValid: false,
101
+ errors: [`Generator model '${modelName}' is not supported`],
102
+ warnings: [],
103
+ suggestions: suggestions.length > 0
104
+ ? [`Did you mean: ${suggestions.join(', ')}?`]
105
+ : [`Available generators: ${this.getSupportedGenerators().join(', ')}`]
106
+ };
107
+ }
108
+ const warnings = [];
109
+ const suggestions = [];
110
+ // Memory warnings
111
+ if (modelInfo.requirements.minimumMemory > 768) {
112
+ warnings.push(`Model requires ${modelInfo.requirements.minimumMemory}MB memory`);
113
+ }
114
+ return {
115
+ isValid: true,
116
+ errors: [],
117
+ warnings,
118
+ suggestions
119
+ };
120
+ }
121
+ /**
122
+ * Lists all supported generator models
123
+ * @param modelType - Optional filter by model type
124
+ * @returns Array of supported generator model names
125
+ */
126
+ static getSupportedGenerators(modelType) {
127
+ const allModels = Object.keys(SUPPORTED_GENERATORS);
128
+ if (!modelType) {
129
+ return allModels;
130
+ }
131
+ return allModels.filter(modelName => SUPPORTED_GENERATORS[modelName].type === modelType);
132
+ }
133
+ /**
134
+ * Gets the default generator model name
135
+ * @returns Default generator model name
136
+ */
137
+ static getDefaultGenerator() {
138
+ return DEFAULT_GENERATOR_MODEL;
139
+ }
140
+ /**
141
+ * Gets generators by type
142
+ * @param type - Generator type ('causal-lm' or 'instruct')
143
+ * @returns Array of model names matching the type
144
+ */
145
+ static getGeneratorsByType(type) {
146
+ return Object.keys(SUPPORTED_GENERATORS).filter(modelName => SUPPORTED_GENERATORS[modelName].type === type);
147
+ }
148
+ /**
149
+ * Checks if a generator model supports a specific capability
150
+ * @param modelName - Name of the model
151
+ * @param capability - Capability to check
152
+ * @returns True if the model supports the capability
153
+ */
154
+ static supportsCapability(modelName, capability) {
155
+ const modelInfo = this.getGeneratorInfo(modelName);
156
+ if (!modelInfo)
157
+ return false;
158
+ const value = modelInfo.capabilities[capability];
159
+ return typeof value === 'boolean' ? value : value !== undefined;
160
+ }
161
+ /**
162
+ * Gets generators similar to the given model name (for suggestions)
163
+ * @private
164
+ */
165
+ static getSimilarGenerators(modelName) {
166
+ const allModels = Object.keys(SUPPORTED_GENERATORS);
167
+ const lowerModelName = modelName.toLowerCase();
168
+ // Simple similarity check based on common substrings
169
+ const keywords = ['gpt', 'smol', 'lm', 'instruct', 'distil'];
170
+ const modelKeywords = keywords.filter(keyword => lowerModelName.includes(keyword));
171
+ return allModels.filter(supportedModel => {
172
+ const lowerSupported = supportedModel.toLowerCase();
173
+ return modelKeywords.some(keyword => lowerSupported.includes(keyword));
174
+ }).slice(0, 3);
175
+ }
176
+ /**
177
+ * Validates system compatibility for a generator model
178
+ * @param modelName - Name of the model
179
+ * @param systemCapabilities - System capabilities to check against
180
+ * @returns Validation result with compatibility information
181
+ */
182
+ static validateSystemCompatibility(modelName, systemCapabilities) {
183
+ const modelInfo = this.getGeneratorInfo(modelName);
184
+ if (!modelInfo) {
185
+ return {
186
+ isValid: false,
187
+ errors: [`Generator model '${modelName}' is not supported`],
188
+ warnings: [],
189
+ suggestions: []
190
+ };
191
+ }
192
+ const errors = [];
193
+ const warnings = [];
194
+ const suggestions = [];
195
+ // Check memory requirements
196
+ if (systemCapabilities.availableMemory !== undefined) {
197
+ if (systemCapabilities.availableMemory < modelInfo.requirements.minimumMemory) {
198
+ errors.push(`Insufficient memory: ${systemCapabilities.availableMemory}MB available, ` +
199
+ `${modelInfo.requirements.minimumMemory}MB required`);
200
+ // Suggest lighter models
201
+ const lighterModels = this.getSupportedGenerators().filter(name => {
202
+ const info = this.getGeneratorInfo(name);
203
+ return info &&
204
+ info.requirements.minimumMemory <= systemCapabilities.availableMemory;
205
+ });
206
+ if (lighterModels.length > 0) {
207
+ suggestions.push(`Consider lighter models: ${lighterModels.join(', ')}`);
208
+ }
209
+ }
210
+ }
211
+ // Check platform compatibility
212
+ if (systemCapabilities.platform) {
213
+ if (!modelInfo.requirements.platformSupport.includes(systemCapabilities.platform)) {
214
+ errors.push(`Platform '${systemCapabilities.platform}' not supported. ` +
215
+ `Supported platforms: ${modelInfo.requirements.platformSupport.join(', ')}`);
216
+ }
217
+ }
218
+ return {
219
+ isValid: errors.length === 0,
220
+ errors,
221
+ warnings,
222
+ suggestions
223
+ };
224
+ }
225
+ }
226
+ // =============================================================================
227
+ // UTILITY FUNCTIONS
228
+ // =============================================================================
229
+ /**
230
+ * Gets the generator type for a given model name
231
+ * @param modelName - Name of the model
232
+ * @returns Generator type or null if model not supported
233
+ */
234
+ export function getGeneratorType(modelName) {
235
+ const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
236
+ return modelInfo ? modelInfo.type : null;
237
+ }
238
+ /**
239
+ * Checks if a model is an instruction-tuned model
240
+ * @param modelName - Name of the model
241
+ * @returns True if the model is instruction-tuned
242
+ */
243
+ export function isInstructionTunedModel(modelName) {
244
+ const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
245
+ return modelInfo ? modelInfo.capabilities.instructionTuned : false;
246
+ }
247
+ /**
248
+ * Gets the maximum context length for a generator model
249
+ * @param modelName - Name of the model
250
+ * @returns Maximum context length or null if model not supported
251
+ */
252
+ export function getMaxContextLength(modelName) {
253
+ const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
254
+ return modelInfo ? modelInfo.capabilities.maxContextLength : null;
255
+ }
256
+ /**
257
+ * Gets recommended generation settings for a model
258
+ * @param modelName - Name of the model
259
+ * @returns Recommended settings or null if model not supported
260
+ */
261
+ export function getRecommendedSettings(modelName) {
262
+ const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
263
+ if (!modelInfo)
264
+ return null;
265
+ return {
266
+ temperature: modelInfo.capabilities.recommendedTemperature,
267
+ maxTokens: modelInfo.capabilities.defaultMaxOutputTokens,
268
+ maxChunksForContext: modelInfo.capabilities.defaultMaxChunksForContext
269
+ };
270
+ }
271
+ /**
272
+ * Gets the default maximum chunks for context for a generator model
273
+ * @param modelName - Name of the model
274
+ * @returns Default max chunks for context or null if model not supported
275
+ */
276
+ export function getDefaultMaxChunksForContext(modelName) {
277
+ const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
278
+ return modelInfo ? modelInfo.capabilities.defaultMaxChunksForContext : null;
279
+ }
280
+ //# sourceMappingURL=generator-registry.js.map
@@ -56,4 +56,8 @@ export * from './path-manager.js';
56
56
  export { ContentManager, type MemoryContentMetadata, type ContentIngestionResult, type ContentManagerConfig } from './content-manager.js';
57
57
  export { ContentResolver, type ContentRequest, type ContentResult } from './content-resolver.js';
58
58
  export * from './error-handler.js';
59
+ export { type ResponseGenerator, type GeneratorModelType, type GenerationRequest, type GenerationResult, type GeneratorCapabilities, type GeneratorRequirements, type GeneratorModelInfo, type GeneratorValidationResult, type GeneratorCreationOptions, type GenerateFunction, type CreateGeneratorFunction, GeneratorValidationError, GenerationError, ContextWindowError, supportsStreaming, isInstructModel, createGenerateFunction } from './response-generator.js';
60
+ export { SUPPORTED_GENERATORS, DEFAULT_GENERATOR_MODEL, GeneratorRegistry, getGeneratorType, isInstructionTunedModel, getMaxContextLength, getRecommendedSettings, getDefaultMaxChunksForContext } from './generator-registry.js';
61
+ export { DEFAULT_SYSTEM_PROMPT, DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION, SMOLLM2_CHAT_TEMPLATE, formatContextChunks, buildPrompt, estimateTokenCount, calculateContextBudget, getDefaultStopSequences, type ContextFormattingOptions, type FormattedContext, type PromptBuildOptions, type BuiltPrompt } from './prompt-templates.js';
62
+ export { BaseResponseGenerator, createGeneratorOptions, type GeneratorOptions } from './abstract-generator.js';
59
63
  //# sourceMappingURL=index.d.ts.map
@@ -66,4 +66,15 @@ export { ContentManager } from './content-manager.js';
66
66
  export { ContentResolver } from './content-resolver.js';
67
67
  // Error handling framework - supports implementation-specific error contexts
68
68
  export * from './error-handler.js';
69
+ // =============================================================================
70
+ // EXPERIMENTAL: Response Generation
71
+ // =============================================================================
72
+ // Response generation types and interfaces (experimental)
73
+ export { GeneratorValidationError, GenerationError, ContextWindowError, supportsStreaming, isInstructModel, createGenerateFunction } from './response-generator.js';
74
+ // Generator registry (experimental)
75
+ export { SUPPORTED_GENERATORS, DEFAULT_GENERATOR_MODEL, GeneratorRegistry, getGeneratorType, isInstructionTunedModel, getMaxContextLength, getRecommendedSettings, getDefaultMaxChunksForContext } from './generator-registry.js';
76
+ // Prompt templates for generation (experimental)
77
+ export { DEFAULT_SYSTEM_PROMPT, DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION, SMOLLM2_CHAT_TEMPLATE, formatContextChunks, buildPrompt, estimateTokenCount, calculateContextBudget, getDefaultStopSequences } from './prompt-templates.js';
78
+ // Abstract base generator (experimental)
79
+ export { BaseResponseGenerator, createGeneratorOptions } from './abstract-generator.js';
69
80
  //# sourceMappingURL=index.js.map
@@ -340,6 +340,9 @@ export class IngestionPipeline {
340
340
  // Phase 5: Vector Index Updates
341
341
  console.log('\n--- Phase 5: Vector Index Updates ---');
342
342
  await this.updateVectorIndex(embeddingResult.embeddings);
343
+ // Final save to ensure all vectors are persisted
344
+ console.log('Performing final index save...');
345
+ await this.indexManager.saveIndex();
343
346
  const endTime = Date.now();
344
347
  const processingTimeMs = endTime - startTime;
345
348
  const result = {
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Knowledge Base Manager
3
+ *
4
+ * Provides a unified API for managing the knowledge base (database + vector index).
5
+ * This module is designed to solve file locking issues on Windows by using
6
+ * in-place reset operations instead of file deletion.
7
+ *
8
+ * Key Features:
9
+ * - Reset database and index without file deletion (avoids EBUSY/EACCES errors)
10
+ * - Coordinated reset of both database and index in a single operation
11
+ * - Connection management to prevent orphaned handles
12
+ * - Cross-platform compatibility (especially Windows)
13
+ *
14
+ * @module knowledge-base-manager
15
+ */
16
+ import { type DatabaseResetResult } from './db.js';
17
+ /**
18
+ * Result of a knowledge base reset operation
19
+ */
20
+ export interface KnowledgeBaseResetResult {
21
+ /** Whether the overall reset was successful */
22
+ success: boolean;
23
+ /** Database reset result */
24
+ database: DatabaseResetResult;
25
+ /** Index reset statistics */
26
+ index: {
27
+ /** Number of vectors cleared */
28
+ vectorsCleared: number;
29
+ /** Time taken for index reset in milliseconds */
30
+ resetTimeMs: number;
31
+ };
32
+ /** Total time for the complete reset operation */
33
+ totalTimeMs: number;
34
+ /** Any warnings that occurred during reset */
35
+ warnings: string[];
36
+ }
37
+ /**
38
+ * Options for knowledge base reset operation
39
+ */
40
+ export interface KnowledgeBaseResetOptions {
41
+ /** Whether to preserve system_info (mode, model configuration) - default: false */
42
+ preserveSystemInfo?: boolean;
43
+ /** Whether to run VACUUM after database reset - default: true */
44
+ runVacuum?: boolean;
45
+ /** Model name to use for index recreation - default: from config */
46
+ modelName?: string;
47
+ }
48
+ /**
49
+ * Knowledge Base Manager
50
+ *
51
+ * Manages the complete knowledge base lifecycle including database and vector index.
52
+ * Provides safe reset operations that avoid file locking issues on Windows.
53
+ *
54
+ * @example
55
+ * ```typescript
56
+ * // Reset knowledge base for force rebuild
57
+ * const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin');
58
+ * console.log(`Reset ${result.database.documentsDeleted} documents and ${result.index.vectorsCleared} vectors`);
59
+ *
60
+ * // Reset with options
61
+ * const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin', {
62
+ * preserveSystemInfo: true, // Keep mode/model configuration
63
+ * modelName: 'all-MiniLM-L6-v2' // Specify model for index
64
+ * });
65
+ * ```
66
+ */
67
+ export declare class KnowledgeBaseManager {
68
+ /**
69
+ * Reset the knowledge base by clearing all data while keeping files intact.
70
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
71
+ *
72
+ * The reset operation:
73
+ * 1. Closes any existing connections via DatabaseConnectionManager
74
+ * 2. Opens a fresh connection to the database
75
+ * 3. Deletes all rows from documents, chunks, content_metadata tables
76
+ * 4. Optionally runs VACUUM to reclaim disk space
77
+ * 5. Reinitializes the vector index (clears all vectors)
78
+ * 6. Saves the empty index to disk (overwrites existing file content)
79
+ *
80
+ * This approach works because:
81
+ * - We don't delete files, so no EBUSY/EACCES errors
82
+ * - The same file handles can be reused or replaced safely
83
+ * - SQLite transactions ensure data integrity
84
+ * - Index overwrite uses standard file write operations
85
+ *
86
+ * @param dbPath - Path to the SQLite database file
87
+ * @param indexPath - Path to the vector index file
88
+ * @param options - Reset options
89
+ * @returns Promise resolving to reset result statistics
90
+ *
91
+ * @throws Error if database or index reset fails
92
+ */
93
+ static reset(dbPath: string, indexPath: string, options?: KnowledgeBaseResetOptions): Promise<KnowledgeBaseResetResult>;
94
+ /**
95
+ * Check if the knowledge base has any data
96
+ *
97
+ * @param dbPath - Path to the SQLite database file
98
+ * @returns Promise resolving to true if database has data, false if empty
99
+ */
100
+ static hasData(dbPath: string): Promise<boolean>;
101
+ /**
102
+ * Close all connections to the knowledge base
103
+ * Useful before operations that might conflict with open handles
104
+ *
105
+ * @param dbPath - Path to the SQLite database file
106
+ */
107
+ static closeAllConnections(dbPath: string): Promise<void>;
108
+ }
109
+ //# sourceMappingURL=knowledge-base-manager.d.ts.map
@@ -0,0 +1,256 @@
1
+ /**
2
+ * Knowledge Base Manager
3
+ *
4
+ * Provides a unified API for managing the knowledge base (database + vector index).
5
+ * This module is designed to solve file locking issues on Windows by using
6
+ * in-place reset operations instead of file deletion.
7
+ *
8
+ * Key Features:
9
+ * - Reset database and index without file deletion (avoids EBUSY/EACCES errors)
10
+ * - Coordinated reset of both database and index in a single operation
11
+ * - Connection management to prevent orphaned handles
12
+ * - Cross-platform compatibility (especially Windows)
13
+ *
14
+ * @module knowledge-base-manager
15
+ */
16
+ import { openDatabase, resetDatabase, hasDatabaseData } from './db.js';
17
+ import { IndexManager } from '../index-manager.js';
18
+ import { DatabaseConnectionManager } from './database-connection-manager.js';
19
+ import { getModelDefaults, config } from './config.js';
20
+ import { existsSync } from 'fs';
21
+ /**
22
+ * Knowledge Base Manager
23
+ *
24
+ * Manages the complete knowledge base lifecycle including database and vector index.
25
+ * Provides safe reset operations that avoid file locking issues on Windows.
26
+ *
27
+ * @example
28
+ * ```typescript
29
+ * // Reset knowledge base for force rebuild
30
+ * const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin');
31
+ * console.log(`Reset ${result.database.documentsDeleted} documents and ${result.index.vectorsCleared} vectors`);
32
+ *
33
+ * // Reset with options
34
+ * const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin', {
35
+ * preserveSystemInfo: true, // Keep mode/model configuration
36
+ * modelName: 'all-MiniLM-L6-v2' // Specify model for index
37
+ * });
38
+ * ```
39
+ */
40
+ export class KnowledgeBaseManager {
41
+ /**
42
+ * Reset the knowledge base by clearing all data while keeping files intact.
43
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
44
+ *
45
+ * The reset operation:
46
+ * 1. Closes any existing connections via DatabaseConnectionManager
47
+ * 2. Opens a fresh connection to the database
48
+ * 3. Deletes all rows from documents, chunks, content_metadata tables
49
+ * 4. Optionally runs VACUUM to reclaim disk space
50
+ * 5. Reinitializes the vector index (clears all vectors)
51
+ * 6. Saves the empty index to disk (overwrites existing file content)
52
+ *
53
+ * This approach works because:
54
+ * - We don't delete files, so no EBUSY/EACCES errors
55
+ * - The same file handles can be reused or replaced safely
56
+ * - SQLite transactions ensure data integrity
57
+ * - Index overwrite uses standard file write operations
58
+ *
59
+ * @param dbPath - Path to the SQLite database file
60
+ * @param indexPath - Path to the vector index file
61
+ * @param options - Reset options
62
+ * @returns Promise resolving to reset result statistics
63
+ *
64
+ * @throws Error if database or index reset fails
65
+ */
66
+ static async reset(dbPath, indexPath, options = {}) {
67
+ const startTime = Date.now();
68
+ const warnings = [];
69
+ console.log('šŸ”„ Starting knowledge base reset...');
70
+ console.log(` Database: ${dbPath}`);
71
+ console.log(` Index: ${indexPath}`);
72
+ // Step 1: Close any existing managed connections to prevent conflicts
73
+ console.log('\nšŸ“” Step 1: Closing existing connections...');
74
+ try {
75
+ if (DatabaseConnectionManager.hasConnection(dbPath)) {
76
+ await DatabaseConnectionManager.forceCloseConnection(dbPath);
77
+ console.log(' āœ“ Closed existing database connection');
78
+ }
79
+ else {
80
+ console.log(' āœ“ No existing connection to close');
81
+ }
82
+ }
83
+ catch (error) {
84
+ const warning = `Warning: Error closing existing connection: ${error instanceof Error ? error.message : 'Unknown error'}`;
85
+ warnings.push(warning);
86
+ console.warn(` āš ļø ${warning}`);
87
+ }
88
+ // Small delay to ensure handles are fully released
89
+ await new Promise(resolve => setTimeout(resolve, 50));
90
+ // Step 2: Reset the database
91
+ console.log('\nšŸ’¾ Step 2: Resetting database...');
92
+ let db = null;
93
+ let dbResetResult;
94
+ try {
95
+ // Open a fresh connection
96
+ db = await openDatabase(dbPath);
97
+ // Perform the reset
98
+ dbResetResult = await resetDatabase(db, {
99
+ preserveSystemInfo: options.preserveSystemInfo,
100
+ runVacuum: options.runVacuum
101
+ });
102
+ console.log(' āœ“ Database reset complete');
103
+ }
104
+ catch (error) {
105
+ console.error(' āŒ Database reset failed:', error);
106
+ throw new Error(`Failed to reset database: ${error instanceof Error ? error.message : 'Unknown error'}`);
107
+ }
108
+ finally {
109
+ // Close the database connection
110
+ if (db) {
111
+ try {
112
+ await db.close();
113
+ }
114
+ catch (closeError) {
115
+ warnings.push(`Warning: Error closing database after reset: ${closeError}`);
116
+ }
117
+ }
118
+ }
119
+ // Step 3: Reset the vector index
120
+ console.log('\nšŸ“‡ Step 3: Resetting vector index...');
121
+ let indexResetResult;
122
+ const indexStartTime = Date.now();
123
+ try {
124
+ // Determine model and dimensions
125
+ const modelName = options.modelName || config.embedding_model;
126
+ const modelDefaults = getModelDefaults(modelName);
127
+ // Check if index file exists
128
+ if (!existsSync(indexPath)) {
129
+ console.log(' Index file does not exist, will be created during ingestion');
130
+ indexResetResult = {
131
+ vectorsCleared: 0,
132
+ resetTimeMs: Date.now() - indexStartTime
133
+ };
134
+ }
135
+ else {
136
+ // Create IndexManager and reset
137
+ // We need to handle dimension mismatch gracefully since the user might be
138
+ // switching models (e.g., from MPNet 768D to MiniLM 384D)
139
+ const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, modelName);
140
+ let previousVectorCount = 0;
141
+ try {
142
+ // Try to initialize with forceRecreate=false first to get the vector count
143
+ // skipModelCheck=true since we're resetting anyway
144
+ await indexManager.initialize(true, false);
145
+ // Get current vector count before reset
146
+ previousVectorCount = (await indexManager.hasVectors()) ?
147
+ (await indexManager.getStats()).totalVectors : 0;
148
+ // Perform the reset
149
+ await indexManager.reset();
150
+ }
151
+ catch (initError) {
152
+ // If initialization failed (e.g., dimension mismatch), force recreate the index
153
+ // This handles the case where user is switching models
154
+ const errorMessage = initError?.message || String(initError);
155
+ if (errorMessage.includes('dimension mismatch') || errorMessage.includes('Vector dimension')) {
156
+ console.log(' āš ļø Dimension mismatch detected - forcing index recreation');
157
+ console.log(' (This is expected when switching embedding models)');
158
+ // Create a fresh IndexManager and force recreate
159
+ const freshIndexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, modelName);
160
+ await freshIndexManager.initialize(true, true); // skipModelCheck=true, forceRecreate=true
161
+ await freshIndexManager.saveIndex();
162
+ await freshIndexManager.close();
163
+ // We don't know the previous count since we couldn't load the old index
164
+ // But we can estimate it was non-zero since the file existed
165
+ previousVectorCount = -1; // Indicate unknown
166
+ }
167
+ else {
168
+ // Re-throw other errors
169
+ throw initError;
170
+ }
171
+ }
172
+ // Close the index manager
173
+ await indexManager.close();
174
+ indexResetResult = {
175
+ vectorsCleared: previousVectorCount,
176
+ resetTimeMs: Date.now() - indexStartTime
177
+ };
178
+ console.log(' āœ“ Index reset complete');
179
+ }
180
+ }
181
+ catch (error) {
182
+ console.error(' āŒ Index reset failed:', error);
183
+ throw new Error(`Failed to reset index: ${error instanceof Error ? error.message : 'Unknown error'}`);
184
+ }
185
+ const totalTimeMs = Date.now() - startTime;
186
+ // Summary
187
+ console.log('\nāœ… Knowledge base reset complete!');
188
+ console.log(` Total time: ${totalTimeMs}ms`);
189
+ console.log(` Documents deleted: ${dbResetResult.documentsDeleted}`);
190
+ console.log(` Chunks deleted: ${dbResetResult.chunksDeleted}`);
191
+ console.log(` Vectors cleared: ${indexResetResult.vectorsCleared === -1 ? '(unknown - index recreated due to model change)' : indexResetResult.vectorsCleared}`);
192
+ if (warnings.length > 0) {
193
+ console.log(` Warnings: ${warnings.length}`);
194
+ }
195
+ return {
196
+ success: true,
197
+ database: dbResetResult,
198
+ index: indexResetResult,
199
+ totalTimeMs,
200
+ warnings
201
+ };
202
+ }
203
+ /**
204
+ * Check if the knowledge base has any data
205
+ *
206
+ * @param dbPath - Path to the SQLite database file
207
+ * @returns Promise resolving to true if database has data, false if empty
208
+ */
209
+ static async hasData(dbPath) {
210
+ let db = null;
211
+ try {
212
+ db = await openDatabase(dbPath);
213
+ return await hasDatabaseData(db);
214
+ }
215
+ catch (error) {
216
+ // If we can't open the database, assume no data
217
+ return false;
218
+ }
219
+ finally {
220
+ if (db) {
221
+ try {
222
+ await db.close();
223
+ }
224
+ catch {
225
+ // Ignore close errors
226
+ }
227
+ }
228
+ }
229
+ }
230
+ /**
231
+ * Close all connections to the knowledge base
232
+ * Useful before operations that might conflict with open handles
233
+ *
234
+ * @param dbPath - Path to the SQLite database file
235
+ */
236
+ static async closeAllConnections(dbPath) {
237
+ console.log('šŸ”’ Closing all knowledge base connections...');
238
+ try {
239
+ if (DatabaseConnectionManager.hasConnection(dbPath)) {
240
+ await DatabaseConnectionManager.forceCloseConnection(dbPath);
241
+ }
242
+ // Also close WAL/SHM connections if they exist
243
+ const sidecars = [`${dbPath}-wal`, `${dbPath}-shm`];
244
+ for (const sidecar of sidecars) {
245
+ if (DatabaseConnectionManager.hasConnection(sidecar)) {
246
+ await DatabaseConnectionManager.forceCloseConnection(sidecar);
247
+ }
248
+ }
249
+ console.log('āœ“ All connections closed');
250
+ }
251
+ catch (error) {
252
+ console.warn('āš ļø Error closing connections:', error);
253
+ }
254
+ }
255
+ }
256
+ //# sourceMappingURL=knowledge-base-manager.js.map