codecritique 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1145 -0
- package/package.json +98 -0
- package/src/content-retrieval.js +747 -0
- package/src/custom-documents.js +597 -0
- package/src/embeddings/cache-manager.js +364 -0
- package/src/embeddings/constants.js +40 -0
- package/src/embeddings/database.js +921 -0
- package/src/embeddings/errors.js +208 -0
- package/src/embeddings/factory.js +447 -0
- package/src/embeddings/file-processor.js +851 -0
- package/src/embeddings/model-manager.js +337 -0
- package/src/embeddings/similarity-calculator.js +97 -0
- package/src/embeddings/types.js +113 -0
- package/src/feedback-loader.js +384 -0
- package/src/index.js +1418 -0
- package/src/llm.js +123 -0
- package/src/pr-history/analyzer.js +579 -0
- package/src/pr-history/bot-detector.js +123 -0
- package/src/pr-history/cli-utils.js +204 -0
- package/src/pr-history/comment-processor.js +549 -0
- package/src/pr-history/database.js +819 -0
- package/src/pr-history/github-client.js +629 -0
- package/src/project-analyzer.js +955 -0
- package/src/rag-analyzer.js +2764 -0
- package/src/rag-review.js +566 -0
- package/src/technology-keywords.json +753 -0
- package/src/utils/command.js +48 -0
- package/src/utils/constants.js +263 -0
- package/src/utils/context-inference.js +364 -0
- package/src/utils/document-detection.js +105 -0
- package/src/utils/file-validation.js +271 -0
- package/src/utils/git.js +232 -0
- package/src/utils/language-detection.js +170 -0
- package/src/utils/logging.js +24 -0
- package/src/utils/markdown.js +132 -0
- package/src/utils/mobilebert-tokenizer.js +141 -0
- package/src/utils/pr-chunking.js +276 -0
- package/src/utils/string-utils.js +28 -0
- package/src/zero-shot-classifier-open.js +392 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings Error Handling
|
|
3
|
+
*
|
|
4
|
+
* This module provides standardized error handling for the embeddings system.
|
|
5
|
+
* It includes custom error classes and error codes for different failure scenarios.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Error codes for different embedding failure scenarios
|
|
10
|
+
*/
|
|
11
|
+
export const ERROR_CODES = {
|
|
12
|
+
// Model initialization errors
|
|
13
|
+
MODEL_INITIALIZATION_FAILED: 'MODEL_INITIALIZATION_FAILED',
|
|
14
|
+
MODEL_NOT_INITIALIZED: 'MODEL_NOT_INITIALIZED',
|
|
15
|
+
MODEL_LOADING_FAILED: 'MODEL_LOADING_FAILED',
|
|
16
|
+
|
|
17
|
+
// Database errors
|
|
18
|
+
DB_CONNECTION_FAILED: 'DB_CONNECTION_FAILED',
|
|
19
|
+
DB_QUERY_FAILED: 'DB_QUERY_FAILED',
|
|
20
|
+
DB_INSERTION_FAILED: 'DB_INSERTION_FAILED',
|
|
21
|
+
DB_TABLE_CREATION_FAILED: 'DB_TABLE_CREATION_FAILED',
|
|
22
|
+
DB_SCHEMA_VALIDATION_FAILED: 'DB_SCHEMA_VALIDATION_FAILED',
|
|
23
|
+
|
|
24
|
+
// Embedding generation errors
|
|
25
|
+
EMBEDDING_GENERATION_FAILED: 'EMBEDDING_GENERATION_FAILED',
|
|
26
|
+
EMBEDDING_DIMENSION_MISMATCH: 'EMBEDDING_DIMENSION_MISMATCH',
|
|
27
|
+
EMBEDDING_INVALID_INPUT: 'EMBEDDING_INVALID_INPUT',
|
|
28
|
+
EMBEDDING_TIMEOUT: 'EMBEDDING_TIMEOUT',
|
|
29
|
+
|
|
30
|
+
// File processing errors
|
|
31
|
+
FILE_NOT_FOUND: 'FILE_NOT_FOUND',
|
|
32
|
+
FILE_READ_FAILED: 'FILE_READ_FAILED',
|
|
33
|
+
FILE_TOO_LARGE: 'FILE_TOO_LARGE',
|
|
34
|
+
FILE_INVALID_FORMAT: 'FILE_INVALID_FORMAT',
|
|
35
|
+
FILE_PROCESSING_FAILED: 'FILE_PROCESSING_FAILED',
|
|
36
|
+
|
|
37
|
+
// Search and similarity errors
|
|
38
|
+
SEARCH_FAILED: 'SEARCH_FAILED',
|
|
39
|
+
SIMILARITY_CALCULATION_FAILED: 'SIMILARITY_CALCULATION_FAILED',
|
|
40
|
+
INVALID_SEARCH_QUERY: 'INVALID_SEARCH_QUERY',
|
|
41
|
+
SEARCH_TIMEOUT: 'SEARCH_TIMEOUT',
|
|
42
|
+
|
|
43
|
+
// Cache errors
|
|
44
|
+
CACHE_WRITE_FAILED: 'CACHE_WRITE_FAILED',
|
|
45
|
+
CACHE_READ_FAILED: 'CACHE_READ_FAILED',
|
|
46
|
+
CACHE_INVALIDATION_FAILED: 'CACHE_INVALIDATION_FAILED',
|
|
47
|
+
|
|
48
|
+
// Configuration errors
|
|
49
|
+
CONFIG_VALIDATION_FAILED: 'CONFIG_VALIDATION_FAILED',
|
|
50
|
+
CONFIG_MISSING_REQUIRED: 'CONFIG_MISSING_REQUIRED',
|
|
51
|
+
CONFIG_INVALID_VALUE: 'CONFIG_INVALID_VALUE',
|
|
52
|
+
|
|
53
|
+
// Network and external service errors
|
|
54
|
+
NETWORK_ERROR: 'NETWORK_ERROR',
|
|
55
|
+
SERVICE_UNAVAILABLE: 'SERVICE_UNAVAILABLE',
|
|
56
|
+
API_RATE_LIMITED: 'API_RATE_LIMITED',
|
|
57
|
+
|
|
58
|
+
// Generic errors
|
|
59
|
+
UNKNOWN_ERROR: 'UNKNOWN_ERROR',
|
|
60
|
+
VALIDATION_ERROR: 'VALIDATION_ERROR',
|
|
61
|
+
TIMEOUT_ERROR: 'TIMEOUT_ERROR',
|
|
62
|
+
MEMORY_ERROR: 'MEMORY_ERROR',
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Custom error class for embeddings-related errors
|
|
67
|
+
*/
|
|
68
|
+
export class EmbeddingError extends Error {
|
|
69
|
+
/**
|
|
70
|
+
* Create a new EmbeddingError
|
|
71
|
+
*
|
|
72
|
+
* @param {string} message - Error message
|
|
73
|
+
* @param {string} code - Error code from ERROR_CODES
|
|
74
|
+
* @param {Error} [originalError] - Original error that caused this error
|
|
75
|
+
* @param {Object} [context] - Additional context information
|
|
76
|
+
*/
|
|
77
|
+
constructor(message, code = ERROR_CODES.UNKNOWN_ERROR, originalError = null, context = {}) {
|
|
78
|
+
super(message);
|
|
79
|
+
|
|
80
|
+
this.name = 'EmbeddingError';
|
|
81
|
+
this.code = code;
|
|
82
|
+
this.originalError = originalError;
|
|
83
|
+
this.context = context;
|
|
84
|
+
this.timestamp = new Date().toISOString();
|
|
85
|
+
|
|
86
|
+
// Maintain proper stack trace for where our error was thrown (only available on V8)
|
|
87
|
+
if (Error.captureStackTrace) {
|
|
88
|
+
Error.captureStackTrace(this, EmbeddingError);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Convert error to JSON for logging
|
|
94
|
+
*
|
|
95
|
+
* @returns {Object} JSON representation of the error
|
|
96
|
+
*/
|
|
97
|
+
toJSON() {
|
|
98
|
+
return {
|
|
99
|
+
name: this.name,
|
|
100
|
+
message: this.message,
|
|
101
|
+
code: this.code,
|
|
102
|
+
timestamp: this.timestamp,
|
|
103
|
+
context: this.context,
|
|
104
|
+
stack: this.stack,
|
|
105
|
+
originalError: this.originalError
|
|
106
|
+
? {
|
|
107
|
+
name: this.originalError.name,
|
|
108
|
+
message: this.originalError.message,
|
|
109
|
+
stack: this.originalError.stack,
|
|
110
|
+
}
|
|
111
|
+
: null,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Check if this error is of a specific type
|
|
117
|
+
*
|
|
118
|
+
* @param {string} code - Error code to check
|
|
119
|
+
* @returns {boolean} True if the error matches the code
|
|
120
|
+
*/
|
|
121
|
+
is(code) {
|
|
122
|
+
return this.code === code;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Check if this error is retryable
|
|
127
|
+
*
|
|
128
|
+
* @returns {boolean} True if the error is retryable
|
|
129
|
+
*/
|
|
130
|
+
isRetryable() {
|
|
131
|
+
const retryableCodes = [
|
|
132
|
+
ERROR_CODES.NETWORK_ERROR,
|
|
133
|
+
ERROR_CODES.SERVICE_UNAVAILABLE,
|
|
134
|
+
ERROR_CODES.EMBEDDING_TIMEOUT,
|
|
135
|
+
ERROR_CODES.SEARCH_TIMEOUT,
|
|
136
|
+
ERROR_CODES.DB_CONNECTION_FAILED,
|
|
137
|
+
ERROR_CODES.CACHE_WRITE_FAILED,
|
|
138
|
+
ERROR_CODES.CACHE_READ_FAILED,
|
|
139
|
+
];
|
|
140
|
+
|
|
141
|
+
return retryableCodes.includes(this.code);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Custom error class for validation errors
|
|
147
|
+
*/
|
|
148
|
+
export class ValidationError extends EmbeddingError {
|
|
149
|
+
/**
|
|
150
|
+
* Create a new ValidationError
|
|
151
|
+
*
|
|
152
|
+
* @param {string} message - Error message
|
|
153
|
+
* @param {Error} [originalError] - Original error that caused this error
|
|
154
|
+
* @param {Object} [context] - Additional context information
|
|
155
|
+
*/
|
|
156
|
+
constructor(message, originalError = null, context = {}) {
|
|
157
|
+
super(message, ERROR_CODES.VALIDATION_ERROR, originalError, context);
|
|
158
|
+
this.name = 'ValidationError';
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Create a specific error for model initialization failures
|
|
164
|
+
*
|
|
165
|
+
* @param {string} message - Error message
|
|
166
|
+
* @param {Error} [originalError] - Original error
|
|
167
|
+
* @param {Object} [context] - Additional context
|
|
168
|
+
* @returns {EmbeddingError} New EmbeddingError instance
|
|
169
|
+
*/
|
|
170
|
+
export function createModelInitializationError(message, originalError = null, context = {}) {
|
|
171
|
+
return new EmbeddingError(message, ERROR_CODES.MODEL_INITIALIZATION_FAILED, originalError, context);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Create a specific error for database failures
|
|
176
|
+
*
|
|
177
|
+
* @param {string} message - Error message
|
|
178
|
+
* @param {Error} [originalError] - Original error
|
|
179
|
+
* @param {Object} [context] - Additional context
|
|
180
|
+
* @returns {EmbeddingError} New EmbeddingError instance
|
|
181
|
+
*/
|
|
182
|
+
export function createDatabaseError(message, originalError = null, context = {}) {
|
|
183
|
+
return new EmbeddingError(message, ERROR_CODES.DB_QUERY_FAILED, originalError, context);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Create a specific error for embedding generation failures
|
|
188
|
+
*
|
|
189
|
+
* @param {string} message - Error message
|
|
190
|
+
* @param {Error} [originalError] - Original error
|
|
191
|
+
* @param {Object} [context] - Additional context
|
|
192
|
+
* @returns {EmbeddingError} New EmbeddingError instance
|
|
193
|
+
*/
|
|
194
|
+
export function createEmbeddingGenerationError(message, originalError = null, context = {}) {
|
|
195
|
+
return new EmbeddingError(message, ERROR_CODES.EMBEDDING_GENERATION_FAILED, originalError, context);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Create a specific error for file processing failures
|
|
200
|
+
*
|
|
201
|
+
* @param {string} message - Error message
|
|
202
|
+
* @param {Error} [originalError] - Original error
|
|
203
|
+
* @param {Object} [context] - Additional context
|
|
204
|
+
* @returns {EmbeddingError} New EmbeddingError instance
|
|
205
|
+
*/
|
|
206
|
+
export function createFileProcessingError(message, originalError = null, context = {}) {
|
|
207
|
+
return new EmbeddingError(message, ERROR_CODES.FILE_PROCESSING_FAILED, originalError, context);
|
|
208
|
+
}
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings System Factory
|
|
3
|
+
*
|
|
4
|
+
* This module provides a factory pattern for creating and wiring together
|
|
5
|
+
* all components of the embeddings system. It implements dependency injection
|
|
6
|
+
* and provides both singleton and instance-based usage patterns.
|
|
7
|
+
*
|
|
8
|
+
* Features:
|
|
9
|
+
* - Dependency injection for all modules
|
|
10
|
+
* - System-wide initialization and cleanup
|
|
11
|
+
* - Configuration management
|
|
12
|
+
* - Environment setup
|
|
13
|
+
* - Module lifecycle management
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @typedef {import('./types.js').EmbeddingConfig} EmbeddingConfig
|
|
18
|
+
* @typedef {import('./types.js').SearchOptions} SearchOptions
|
|
19
|
+
* @typedef {import('./types.js').SearchResult} SearchResult
|
|
20
|
+
* @typedef {import('./types.js').ProcessingProgress} ProcessingProgress
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import chalk from 'chalk';
|
|
24
|
+
import { ContentRetriever } from '../content-retrieval.js';
|
|
25
|
+
import { CustomDocumentProcessor } from '../custom-documents.js';
|
|
26
|
+
import { CacheManager } from './cache-manager.js';
|
|
27
|
+
import { EMBEDDING_DIMENSIONS, MODEL_NAME_STRING, MAX_RETRIES, LANCEDB_PATH, FASTEMBED_CACHE_DIR } from './constants.js';
|
|
28
|
+
import { DatabaseManager } from './database.js';
|
|
29
|
+
import { EmbeddingError } from './errors.js';
|
|
30
|
+
import { FileProcessor } from './file-processor.js';
|
|
31
|
+
import { ModelManager } from './model-manager.js';
|
|
32
|
+
|
|
33
|
+
// ============================================================================
|
|
34
|
+
// EMBEDDINGS SYSTEM CLASS
|
|
35
|
+
// ============================================================================
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* EmbeddingsSystem class that encapsulates all embedding functionality
|
|
39
|
+
* with proper dependency injection and lifecycle management
|
|
40
|
+
*/
|
|
41
|
+
class EmbeddingsSystem {
|
|
42
|
+
constructor(options = {}) {
|
|
43
|
+
this.options = options;
|
|
44
|
+
this.initialized = false;
|
|
45
|
+
this.initializing = false;
|
|
46
|
+
this.initializationPromise = null;
|
|
47
|
+
this.cleaningUp = false;
|
|
48
|
+
|
|
49
|
+
// Initialize core components with dependency injection
|
|
50
|
+
this.cacheManager =
|
|
51
|
+
options.cacheManager ||
|
|
52
|
+
new CacheManager({
|
|
53
|
+
maxCacheSize: options.maxCacheSize || 1000,
|
|
54
|
+
maxEmbeddingCacheSize: options.maxEmbeddingCacheSize || 1000,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
this.databaseManager =
|
|
58
|
+
options.databaseManager ||
|
|
59
|
+
new DatabaseManager({
|
|
60
|
+
dbPath: options.dbPath || LANCEDB_PATH,
|
|
61
|
+
embeddingDimensions: options.embeddingDimensions || EMBEDDING_DIMENSIONS,
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
this.modelManager =
|
|
65
|
+
options.modelManager ||
|
|
66
|
+
new ModelManager({
|
|
67
|
+
embeddingDimensions: options.embeddingDimensions || EMBEDDING_DIMENSIONS,
|
|
68
|
+
modelNameString: options.modelNameString || MODEL_NAME_STRING,
|
|
69
|
+
maxRetries: options.maxRetries || MAX_RETRIES,
|
|
70
|
+
cacheDir: options.cacheDir || FASTEMBED_CACHE_DIR,
|
|
71
|
+
cacheManager: this.cacheManager,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
this.fileProcessor =
|
|
75
|
+
options.fileProcessor ||
|
|
76
|
+
new FileProcessor({
|
|
77
|
+
modelManager: this.modelManager,
|
|
78
|
+
databaseManager: this.databaseManager,
|
|
79
|
+
cacheManager: this.cacheManager,
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
this.contentRetriever =
|
|
83
|
+
options.contentRetriever ||
|
|
84
|
+
new ContentRetriever({
|
|
85
|
+
modelManager: this.modelManager,
|
|
86
|
+
database: this.databaseManager,
|
|
87
|
+
cacheManager: this.cacheManager,
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
this.customDocumentProcessor =
|
|
91
|
+
options.customDocumentProcessor ||
|
|
92
|
+
new CustomDocumentProcessor({
|
|
93
|
+
modelManager: this.modelManager,
|
|
94
|
+
cacheManager: this.cacheManager,
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// Track initialization status
|
|
98
|
+
this.components = {
|
|
99
|
+
cacheManager: this.cacheManager,
|
|
100
|
+
databaseManager: this.databaseManager,
|
|
101
|
+
modelManager: this.modelManager,
|
|
102
|
+
fileProcessor: this.fileProcessor,
|
|
103
|
+
contentRetriever: this.contentRetriever,
|
|
104
|
+
customDocumentProcessor: this.customDocumentProcessor,
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
console.log(chalk.green('[EmbeddingsSystem] System created with dependency injection'));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// ============================================================================
|
|
111
|
+
// INITIALIZATION
|
|
112
|
+
// ============================================================================
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Initialize the embeddings system
|
|
116
|
+
* @returns {Promise<void>}
|
|
117
|
+
*/
|
|
118
|
+
async initialize() {
|
|
119
|
+
if (this.initialized) {
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (this.initializing) {
|
|
124
|
+
return this.initializationPromise;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
this.initializing = true;
|
|
128
|
+
this.initializationPromise = this._performInitialization();
|
|
129
|
+
|
|
130
|
+
try {
|
|
131
|
+
await this.initializationPromise;
|
|
132
|
+
this.initialized = true;
|
|
133
|
+
this.initializing = false;
|
|
134
|
+
console.log(chalk.green('[EmbeddingsSystem] System initialized successfully'));
|
|
135
|
+
} catch (error) {
|
|
136
|
+
this.initializing = false;
|
|
137
|
+
this.initializationPromise = null;
|
|
138
|
+
throw error;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Perform the actual initialization
|
|
144
|
+
* @private
|
|
145
|
+
*/
|
|
146
|
+
async _performInitialization() {
|
|
147
|
+
console.log(chalk.blue('[EmbeddingsSystem] Initializing embeddings system...'));
|
|
148
|
+
|
|
149
|
+
try {
|
|
150
|
+
// Initialize database and tables
|
|
151
|
+
await this.databaseManager.initializeTables();
|
|
152
|
+
|
|
153
|
+
// Initialize the model
|
|
154
|
+
await this.modelManager.initialize();
|
|
155
|
+
|
|
156
|
+
console.log(chalk.green('[EmbeddingsSystem] All components initialized successfully'));
|
|
157
|
+
} catch (error) {
|
|
158
|
+
console.error(chalk.red(`[EmbeddingsSystem] Initialization failed: ${error.message}`));
|
|
159
|
+
throw new EmbeddingError(`System initialization failed: ${error.message}`, 'SYSTEM_INITIALIZATION_FAILED', error);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Check if the system is initialized
|
|
165
|
+
* @returns {boolean}
|
|
166
|
+
*/
|
|
167
|
+
isInitialized() {
|
|
168
|
+
return this.initialized;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ============================================================================
|
|
172
|
+
// PUBLIC API METHODS
|
|
173
|
+
// ============================================================================
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Calculate embedding for text
|
|
177
|
+
* @param {string} text - Text to embed
|
|
178
|
+
* @returns {Promise<import('./types.js').EmbeddingVector|null>}
|
|
179
|
+
*/
|
|
180
|
+
async calculateEmbedding(text) {
|
|
181
|
+
await this.initialize();
|
|
182
|
+
return this.modelManager.calculateEmbedding(text);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Calculate query embedding for text
|
|
187
|
+
* @param {string} text - Query text to embed
|
|
188
|
+
* @returns {Promise<import('./types.js').EmbeddingVector|null>}
|
|
189
|
+
*/
|
|
190
|
+
async calculateQueryEmbedding(text) {
|
|
191
|
+
await this.initialize();
|
|
192
|
+
return this.modelManager.calculateQueryEmbedding(text);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Find relevant documentation
|
|
197
|
+
* @param {string} queryText - Query text
|
|
198
|
+
* @param {SearchOptions} options - Search options
|
|
199
|
+
* @returns {Promise<SearchResult[]>}
|
|
200
|
+
*/
|
|
201
|
+
async findRelevantDocs(queryText, options = {}) {
|
|
202
|
+
await this.initialize();
|
|
203
|
+
return this.contentRetriever.findRelevantDocs(queryText, options);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Find similar code
|
|
208
|
+
* @param {string} queryText - Query text
|
|
209
|
+
* @param {SearchOptions} options - Search options
|
|
210
|
+
* @returns {Promise<SearchResult[]>}
|
|
211
|
+
*/
|
|
212
|
+
async findSimilarCode(queryText, options = {}) {
|
|
213
|
+
await this.initialize();
|
|
214
|
+
return this.contentRetriever.findSimilarCode(queryText, options);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Process custom documents in memory
|
|
219
|
+
* @param {import('./types.js').CustomDocument[]} customDocs - Array of custom documents
|
|
220
|
+
* @param {string} projectPath - Project path
|
|
221
|
+
* @returns {Promise<import('./types.js').DocumentChunk[]>}
|
|
222
|
+
*/
|
|
223
|
+
async processCustomDocumentsInMemory(customDocs, projectPath) {
|
|
224
|
+
await this.initialize();
|
|
225
|
+
return this.customDocumentProcessor.processDocumentsInMemory(customDocs, projectPath);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Find relevant custom document chunks
|
|
230
|
+
* @param {string} queryText - Query text
|
|
231
|
+
* @param {import('./types.js').DocumentChunk[]} chunks - Document chunks
|
|
232
|
+
* @param {SearchOptions} options - Search options
|
|
233
|
+
* @returns {Promise<SearchResult[]>}
|
|
234
|
+
*/
|
|
235
|
+
async findRelevantCustomDocChunks(queryText, chunks = [], options = {}) {
|
|
236
|
+
await this.initialize();
|
|
237
|
+
return this.customDocumentProcessor.findRelevantChunks(queryText, chunks, options);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Get existing custom document chunks
|
|
242
|
+
* @param {string} projectPath - Project path
|
|
243
|
+
* @returns {Promise<import('./types.js').DocumentChunk[]>}
|
|
244
|
+
*/
|
|
245
|
+
async getExistingCustomDocumentChunks(projectPath) {
|
|
246
|
+
await this.initialize();
|
|
247
|
+
return this.customDocumentProcessor.getExistingChunks(projectPath);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Process batch embeddings
|
|
252
|
+
* @param {string[]} filePaths - Array of file paths
|
|
253
|
+
* @param {import('./types.js').BatchProcessingOptions} options - Processing options
|
|
254
|
+
* @returns {Promise<ProcessingProgress>}
|
|
255
|
+
*/
|
|
256
|
+
async processBatchEmbeddings(filePaths, options = {}) {
|
|
257
|
+
await this.initialize();
|
|
258
|
+
return this.fileProcessor.processBatchEmbeddings(filePaths, options);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Get project embeddings (compatibility method)
|
|
263
|
+
* @param {string} projectPath - Project path
|
|
264
|
+
* @returns {Object}
|
|
265
|
+
*/
|
|
266
|
+
getProjectEmbeddings(projectPath = process.cwd()) {
|
|
267
|
+
// This is a sync method that returns cached data
|
|
268
|
+
return {
|
|
269
|
+
system: this,
|
|
270
|
+
projectPath,
|
|
271
|
+
components: this.components,
|
|
272
|
+
initialized: this.initialized,
|
|
273
|
+
config: this.config,
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Clear embeddings for a project
|
|
279
|
+
* For deletion operations, we only need database connection - no need for
|
|
280
|
+
* full system initialization (models, indexes, etc.)
|
|
281
|
+
* @param {string} projectPath - Project path
|
|
282
|
+
* @returns {Promise<boolean>}
|
|
283
|
+
*/
|
|
284
|
+
async clearEmbeddings(projectPath = process.cwd()) {
|
|
285
|
+
await this.databaseManager.getDBConnection();
|
|
286
|
+
return this.databaseManager.clearProjectEmbeddings(projectPath);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Clear all embeddings
|
|
291
|
+
* @returns {Promise<boolean>}
|
|
292
|
+
*/
|
|
293
|
+
async clearAllEmbeddings() {
|
|
294
|
+
// Only ensure database connection exists, skip full initialization
|
|
295
|
+
await this.databaseManager.getDBConnection();
|
|
296
|
+
return this.databaseManager.clearAllEmbeddings();
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Store project summary for later retrieval during reviews
|
|
301
|
+
* @param {string} projectPath - Project path
|
|
302
|
+
* @param {Object} projectSummary - Project analysis summary
|
|
303
|
+
* @returns {Promise<boolean>}
|
|
304
|
+
*/
|
|
305
|
+
async storeProjectSummary(projectPath, projectSummary) {
|
|
306
|
+
await this.databaseManager.getDBConnection();
|
|
307
|
+
return this.databaseManager.storeProjectSummary(projectPath, projectSummary);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Get stored project summary
|
|
312
|
+
* @param {string} projectPath - Project path
|
|
313
|
+
* @returns {Promise<Object|null>}
|
|
314
|
+
*/
|
|
315
|
+
async getProjectSummary(projectPath) {
|
|
316
|
+
await this.databaseManager.getDBConnection();
|
|
317
|
+
return this.databaseManager.getProjectSummary(projectPath);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// ============================================================================
|
|
321
|
+
// PR COMMENTS TABLE METHODS
|
|
322
|
+
// ============================================================================
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Get PR comments table
|
|
326
|
+
* @returns {Promise<import('@lancedb/lancedb').Table|null>} PR comments table or null on error
|
|
327
|
+
*/
|
|
328
|
+
async getPRCommentsTable() {
|
|
329
|
+
await this.initialize();
|
|
330
|
+
return this.databaseManager.getTable(this.databaseManager.prCommentsTable);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Update the vector index for the PR comments table
|
|
335
|
+
* @returns {Promise<void>}
|
|
336
|
+
*/
|
|
337
|
+
async updatePRCommentsIndex() {
|
|
338
|
+
await this.initialize();
|
|
339
|
+
return this.databaseManager.updatePRCommentsIndex();
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// ============================================================================
|
|
343
|
+
// UTILITY METHODS
|
|
344
|
+
// ============================================================================
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Get system metrics
|
|
348
|
+
* @returns {Object}
|
|
349
|
+
*/
|
|
350
|
+
getSystemMetrics() {
|
|
351
|
+
return {
|
|
352
|
+
initialized: this.initialized,
|
|
353
|
+
initializing: this.initializing,
|
|
354
|
+
config: this.config,
|
|
355
|
+
cacheMetrics: this.cacheManager.getCacheMetrics(),
|
|
356
|
+
contentRetrieverMetrics: this.contentRetriever.getPerformanceMetrics(),
|
|
357
|
+
customDocumentMetrics: this.customDocumentProcessor.getPerformanceMetrics(),
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Get system status
|
|
363
|
+
* @returns {Object}
|
|
364
|
+
*/
|
|
365
|
+
getSystemStatus() {
|
|
366
|
+
return {
|
|
367
|
+
initialized: this.initialized,
|
|
368
|
+
initializing: this.initializing,
|
|
369
|
+
modelReady: this.modelManager.isInitialized(),
|
|
370
|
+
databaseReady: this.databaseManager.tablesInitialized,
|
|
371
|
+
cacheStatus: this.cacheManager.getCacheStatus(),
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// ============================================================================
|
|
376
|
+
// CLEANUP
|
|
377
|
+
// ============================================================================
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* Cleanup system resources
|
|
381
|
+
* @returns {Promise<void>}
|
|
382
|
+
*/
|
|
383
|
+
async cleanup() {
|
|
384
|
+
if (this.cleaningUp) {
|
|
385
|
+
return; // Already cleaning up, prevent duplicate calls
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
this.cleaningUp = true;
|
|
389
|
+
|
|
390
|
+
try {
|
|
391
|
+
console.log(chalk.yellow('[EmbeddingsSystem] Cleaning up system resources...'));
|
|
392
|
+
|
|
393
|
+
// Cleanup all components
|
|
394
|
+
await Promise.all([
|
|
395
|
+
this.modelManager.cleanup(),
|
|
396
|
+
this.databaseManager.cleanup(),
|
|
397
|
+
this.fileProcessor.cleanup(),
|
|
398
|
+
this.contentRetriever.cleanup(),
|
|
399
|
+
this.customDocumentProcessor.cleanup(),
|
|
400
|
+
this.cacheManager.cleanup(),
|
|
401
|
+
]);
|
|
402
|
+
|
|
403
|
+
// Reset state
|
|
404
|
+
this.initialized = false;
|
|
405
|
+
this.initializing = false;
|
|
406
|
+
this.initializationPromise = null;
|
|
407
|
+
|
|
408
|
+
console.log(chalk.green('[EmbeddingsSystem] System cleanup completed'));
|
|
409
|
+
} catch (error) {
|
|
410
|
+
console.error(chalk.red(`[EmbeddingsSystem] Error during cleanup: ${error.message}`));
|
|
411
|
+
throw error;
|
|
412
|
+
} finally {
|
|
413
|
+
this.cleaningUp = false;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// ============================================================================
|
|
419
|
+
// FACTORY FUNCTIONS
|
|
420
|
+
// ============================================================================
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Create a new EmbeddingsSystem instance
|
|
424
|
+
* @param {EmbeddingConfig} options - Configuration options
|
|
425
|
+
* @returns {EmbeddingsSystem}
|
|
426
|
+
*/
|
|
427
|
+
function createEmbeddingsSystem(options = {}) {
|
|
428
|
+
return new EmbeddingsSystem(options);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// ============================================================================
|
|
432
|
+
// SINGLETON INSTANCE
|
|
433
|
+
// ============================================================================
|
|
434
|
+
|
|
435
|
+
// Create a default singleton instance for backward compatibility
|
|
436
|
+
let defaultSystem = null;
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Get the default singleton EmbeddingsSystem instance
|
|
440
|
+
* @returns {EmbeddingsSystem}
|
|
441
|
+
*/
|
|
442
|
+
export function getDefaultEmbeddingsSystem() {
|
|
443
|
+
if (!defaultSystem) {
|
|
444
|
+
defaultSystem = createEmbeddingsSystem();
|
|
445
|
+
}
|
|
446
|
+
return defaultSystem;
|
|
447
|
+
}
|