codecritique 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1145 -0
- package/package.json +98 -0
- package/src/content-retrieval.js +747 -0
- package/src/custom-documents.js +597 -0
- package/src/embeddings/cache-manager.js +364 -0
- package/src/embeddings/constants.js +40 -0
- package/src/embeddings/database.js +921 -0
- package/src/embeddings/errors.js +208 -0
- package/src/embeddings/factory.js +447 -0
- package/src/embeddings/file-processor.js +851 -0
- package/src/embeddings/model-manager.js +337 -0
- package/src/embeddings/similarity-calculator.js +97 -0
- package/src/embeddings/types.js +113 -0
- package/src/feedback-loader.js +384 -0
- package/src/index.js +1418 -0
- package/src/llm.js +123 -0
- package/src/pr-history/analyzer.js +579 -0
- package/src/pr-history/bot-detector.js +123 -0
- package/src/pr-history/cli-utils.js +204 -0
- package/src/pr-history/comment-processor.js +549 -0
- package/src/pr-history/database.js +819 -0
- package/src/pr-history/github-client.js +629 -0
- package/src/project-analyzer.js +955 -0
- package/src/rag-analyzer.js +2764 -0
- package/src/rag-review.js +566 -0
- package/src/technology-keywords.json +753 -0
- package/src/utils/command.js +48 -0
- package/src/utils/constants.js +263 -0
- package/src/utils/context-inference.js +364 -0
- package/src/utils/document-detection.js +105 -0
- package/src/utils/file-validation.js +271 -0
- package/src/utils/git.js +232 -0
- package/src/utils/language-detection.js +170 -0
- package/src/utils/logging.js +24 -0
- package/src/utils/markdown.js +132 -0
- package/src/utils/mobilebert-tokenizer.js +141 -0
- package/src/utils/pr-chunking.js +276 -0
- package/src/utils/string-utils.js +28 -0
- package/src/zero-shot-classifier-open.js +392 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cache Manager Module
|
|
3
|
+
*
|
|
4
|
+
* This module provides centralized cache management for embeddings,
|
|
5
|
+
* document contexts, and other cached data structures.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Document context caching
|
|
9
|
+
* - H1 embedding caching
|
|
10
|
+
* - General embedding caching with size limits
|
|
11
|
+
* - Custom document chunks caching
|
|
12
|
+
* - Cache metrics and monitoring
|
|
13
|
+
* - Cache eviction policies
|
|
14
|
+
*/
|
|
15
|
+
/**
|
|
16
|
+
* @typedef {import('./types.js').CacheMetrics} CacheMetrics
|
|
17
|
+
* @typedef {import('./types.js').EmbeddingVector} EmbeddingVector
|
|
18
|
+
* @typedef {import('./types.js').DocumentChunk} DocumentChunk
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import chalk from 'chalk';
|
|
22
|
+
import { MAX_EMBEDDING_CACHE_SIZE } from './constants.js';
|
|
23
|
+
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// CACHE CONFIGURATION
|
|
26
|
+
// ============================================================================
|
|
27
|
+
|
|
28
|
+
const DEFAULT_MAX_CACHE_SIZE = 1000;
|
|
29
|
+
const DEFAULT_MAX_EMBEDDING_CACHE_SIZE = MAX_EMBEDDING_CACHE_SIZE;
|
|
30
|
+
|
|
31
|
+
// ============================================================================
|
|
32
|
+
// CACHE MANAGER CLASS
|
|
33
|
+
// ============================================================================
|
|
34
|
+
|
|
35
|
+
export class CacheManager {
|
|
36
|
+
constructor(options = {}) {
|
|
37
|
+
this.maxCacheSize = options.maxCacheSize || DEFAULT_MAX_CACHE_SIZE;
|
|
38
|
+
this.maxEmbeddingCacheSize = options.maxEmbeddingCacheSize || DEFAULT_MAX_EMBEDDING_CACHE_SIZE;
|
|
39
|
+
|
|
40
|
+
// Initialize cache Maps
|
|
41
|
+
this.documentContextCache = new Map();
|
|
42
|
+
this.documentContextPromiseCache = new Map();
|
|
43
|
+
this.h1EmbeddingCache = new Map();
|
|
44
|
+
this.embeddingCache = new Map();
|
|
45
|
+
this.customDocumentChunks = new Map();
|
|
46
|
+
|
|
47
|
+
// Cache statistics
|
|
48
|
+
this.stats = {
|
|
49
|
+
hits: 0,
|
|
50
|
+
misses: 0,
|
|
51
|
+
evictions: 0,
|
|
52
|
+
created: Date.now(),
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// Cleanup guard
|
|
56
|
+
this.cleaningUp = false;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ============================================================================
|
|
60
|
+
// DOCUMENT CONTEXT CACHE
|
|
61
|
+
// ============================================================================
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Get document context from cache
|
|
65
|
+
* @param {string} key - Cache key
|
|
66
|
+
* @returns {*} Cached document context or undefined
|
|
67
|
+
*/
|
|
68
|
+
getDocumentContext(key) {
|
|
69
|
+
if (this.documentContextCache.has(key)) {
|
|
70
|
+
this.stats.hits++;
|
|
71
|
+
return this.documentContextCache.get(key);
|
|
72
|
+
}
|
|
73
|
+
this.stats.misses++;
|
|
74
|
+
return undefined;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Set document context in cache
|
|
79
|
+
* @param {string} key - Cache key
|
|
80
|
+
* @param {*} context - Document context to cache
|
|
81
|
+
*/
|
|
82
|
+
setDocumentContext(key, context) {
|
|
83
|
+
this._enforceMaxSize(this.documentContextCache, this.maxCacheSize);
|
|
84
|
+
this.documentContextCache.set(key, context);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Get document context promise from cache
|
|
89
|
+
* @param {string} key - Cache key
|
|
90
|
+
* @returns {Promise|undefined} Cached promise or undefined
|
|
91
|
+
*/
|
|
92
|
+
getDocumentContextPromise(key) {
|
|
93
|
+
if (this.documentContextPromiseCache.has(key)) {
|
|
94
|
+
this.stats.hits++;
|
|
95
|
+
return this.documentContextPromiseCache.get(key);
|
|
96
|
+
}
|
|
97
|
+
this.stats.misses++;
|
|
98
|
+
return undefined;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Set document context promise in cache
|
|
103
|
+
* @param {string} key - Cache key
|
|
104
|
+
* @param {Promise} promise - Promise to cache
|
|
105
|
+
*/
|
|
106
|
+
setDocumentContextPromise(key, promise) {
|
|
107
|
+
this._enforceMaxSize(this.documentContextPromiseCache, this.maxCacheSize);
|
|
108
|
+
this.documentContextPromiseCache.set(key, promise);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Remove document context promise from cache
|
|
113
|
+
* @param {string} key - Cache key
|
|
114
|
+
*/
|
|
115
|
+
removeDocumentContextPromise(key) {
|
|
116
|
+
this.documentContextPromiseCache.delete(key);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ============================================================================
|
|
120
|
+
// H1 EMBEDDING CACHE
|
|
121
|
+
// ============================================================================
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Get H1 embedding from cache
|
|
125
|
+
* @param {string} key - Cache key
|
|
126
|
+
* @returns {EmbeddingVector|undefined} Cached H1 embedding or undefined
|
|
127
|
+
*/
|
|
128
|
+
getH1Embedding(key) {
|
|
129
|
+
if (this.h1EmbeddingCache.has(key)) {
|
|
130
|
+
this.stats.hits++;
|
|
131
|
+
return this.h1EmbeddingCache.get(key);
|
|
132
|
+
}
|
|
133
|
+
this.stats.misses++;
|
|
134
|
+
return undefined;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Set H1 embedding in cache
|
|
139
|
+
* @param {string} key - Cache key
|
|
140
|
+
* @param {EmbeddingVector} embedding - H1 embedding to cache
|
|
141
|
+
*/
|
|
142
|
+
setH1Embedding(key, embedding) {
|
|
143
|
+
this._enforceMaxSize(this.h1EmbeddingCache, this.maxCacheSize);
|
|
144
|
+
this.h1EmbeddingCache.set(key, embedding);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ============================================================================
|
|
148
|
+
// GENERAL EMBEDDING CACHE
|
|
149
|
+
// ============================================================================
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Get embedding from cache
|
|
153
|
+
* @param {string} key - Cache key
|
|
154
|
+
* @returns {EmbeddingVector|undefined} Cached embedding or undefined
|
|
155
|
+
*/
|
|
156
|
+
getEmbedding(key) {
|
|
157
|
+
if (this.embeddingCache.has(key)) {
|
|
158
|
+
this.stats.hits++;
|
|
159
|
+
return this.embeddingCache.get(key);
|
|
160
|
+
}
|
|
161
|
+
this.stats.misses++;
|
|
162
|
+
return undefined;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Set embedding in cache
|
|
167
|
+
* @param {string} key - Cache key
|
|
168
|
+
* @param {EmbeddingVector} embedding - Embedding to cache
|
|
169
|
+
*/
|
|
170
|
+
setEmbedding(key, embedding) {
|
|
171
|
+
this._enforceMaxSize(this.embeddingCache, this.maxEmbeddingCacheSize);
|
|
172
|
+
this.embeddingCache.set(key, embedding);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ============================================================================
|
|
176
|
+
// CUSTOM DOCUMENT CHUNKS CACHE
|
|
177
|
+
// ============================================================================
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Get custom document chunks from cache
|
|
181
|
+
* @param {string} projectPath - Project path key
|
|
182
|
+
* @returns {DocumentChunk[]|undefined} Cached chunks or undefined
|
|
183
|
+
*/
|
|
184
|
+
getCustomDocumentChunks(projectPath) {
|
|
185
|
+
if (this.customDocumentChunks.has(projectPath)) {
|
|
186
|
+
this.stats.hits++;
|
|
187
|
+
return this.customDocumentChunks.get(projectPath);
|
|
188
|
+
}
|
|
189
|
+
this.stats.misses++;
|
|
190
|
+
return undefined;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Set custom document chunks in cache
|
|
195
|
+
* @param {string} projectPath - Project path key
|
|
196
|
+
* @param {DocumentChunk[]} chunks - Chunks to cache
|
|
197
|
+
*/
|
|
198
|
+
setCustomDocumentChunks(projectPath, chunks) {
|
|
199
|
+
this._enforceMaxSize(this.customDocumentChunks, this.maxCacheSize);
|
|
200
|
+
this.customDocumentChunks.set(projectPath, chunks);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Store custom documents (alias for setCustomDocumentChunks)
|
|
205
|
+
* @param {string} projectPath - Project path
|
|
206
|
+
* @param {Array} chunks - Document chunks to store
|
|
207
|
+
*/
|
|
208
|
+
async storeCustomDocuments(projectPath, chunks) {
|
|
209
|
+
this.setCustomDocumentChunks(projectPath, chunks);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// ============================================================================
|
|
213
|
+
// CACHE MANAGEMENT
|
|
214
|
+
// ============================================================================
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Clear all caches
|
|
218
|
+
*/
|
|
219
|
+
clearAllCaches() {
|
|
220
|
+
const docCacheSize = this.documentContextCache.size;
|
|
221
|
+
const h1CacheSize = this.h1EmbeddingCache.size;
|
|
222
|
+
const embeddingCacheSize = this.embeddingCache.size;
|
|
223
|
+
const promiseCacheSize = this.documentContextPromiseCache.size;
|
|
224
|
+
const customDocCacheSize = this.customDocumentChunks.size;
|
|
225
|
+
|
|
226
|
+
this.documentContextCache.clear();
|
|
227
|
+
this.documentContextPromiseCache.clear();
|
|
228
|
+
this.h1EmbeddingCache.clear();
|
|
229
|
+
this.embeddingCache.clear();
|
|
230
|
+
this.customDocumentChunks.clear();
|
|
231
|
+
|
|
232
|
+
// Reset stats
|
|
233
|
+
this.stats.hits = 0;
|
|
234
|
+
this.stats.misses = 0;
|
|
235
|
+
this.stats.evictions = 0;
|
|
236
|
+
|
|
237
|
+
console.log(
|
|
238
|
+
chalk.yellow(
|
|
239
|
+
`[CACHE] Cleared all caches - Document contexts: ${docCacheSize}, Promise: ${promiseCacheSize}, H1 embeddings: ${h1CacheSize}, Embeddings: ${embeddingCacheSize}, Custom docs: ${customDocCacheSize}`
|
|
240
|
+
)
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Clear specific cache type
|
|
246
|
+
* @param {string} cacheType - Type of cache to clear
|
|
247
|
+
*/
|
|
248
|
+
clearCache(cacheType) {
|
|
249
|
+
const cacheMap = this._getCacheMap(cacheType);
|
|
250
|
+
if (cacheMap) {
|
|
251
|
+
const size = cacheMap.size;
|
|
252
|
+
cacheMap.clear();
|
|
253
|
+
console.log(chalk.yellow(`[CACHE] Cleared ${cacheType} cache - ${size} items`));
|
|
254
|
+
} else {
|
|
255
|
+
console.warn(chalk.yellow(`[CACHE] Unknown cache type: ${cacheType}`));
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Get cache metrics
|
|
261
|
+
* @returns {CacheMetrics} Cache metrics object
|
|
262
|
+
*/
|
|
263
|
+
getCacheMetrics() {
|
|
264
|
+
const hitRate =
|
|
265
|
+
this.stats.hits + this.stats.misses > 0 ? ((this.stats.hits / (this.stats.hits + this.stats.misses)) * 100).toFixed(2) : 0;
|
|
266
|
+
|
|
267
|
+
return {
|
|
268
|
+
sizes: {
|
|
269
|
+
documentContext: this.documentContextCache.size,
|
|
270
|
+
documentContextPromise: this.documentContextPromiseCache.size,
|
|
271
|
+
h1Embedding: this.h1EmbeddingCache.size,
|
|
272
|
+
embedding: this.embeddingCache.size,
|
|
273
|
+
customDocumentChunks: this.customDocumentChunks.size,
|
|
274
|
+
},
|
|
275
|
+
limits: {
|
|
276
|
+
maxCacheSize: this.maxCacheSize,
|
|
277
|
+
maxEmbeddingCacheSize: this.maxEmbeddingCacheSize,
|
|
278
|
+
},
|
|
279
|
+
statistics: {
|
|
280
|
+
hits: this.stats.hits,
|
|
281
|
+
misses: this.stats.misses,
|
|
282
|
+
evictions: this.stats.evictions,
|
|
283
|
+
hitRate: `${hitRate}%`,
|
|
284
|
+
},
|
|
285
|
+
uptime: Date.now() - this.stats.created,
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Get cache status summary
|
|
291
|
+
* @returns {Object} Cache status summary
|
|
292
|
+
*/
|
|
293
|
+
getCacheStatus() {
|
|
294
|
+
const metrics = this.getCacheMetrics();
|
|
295
|
+
const totalSize = Object.values(metrics.sizes).reduce((sum, size) => sum + size, 0);
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
totalCachedItems: totalSize,
|
|
299
|
+
hitRate: metrics.statistics.hitRate,
|
|
300
|
+
memoryEfficiency: totalSize > 0 ? 'active' : 'idle',
|
|
301
|
+
uptime: `${Math.floor(metrics.uptime / 1000)}s`,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Cleanup method for compatibility with factory cleanup pattern
|
|
307
|
+
* @returns {Promise<void>}
|
|
308
|
+
*/
|
|
309
|
+
async cleanup() {
|
|
310
|
+
if (this.cleaningUp) {
|
|
311
|
+
return; // Already cleaning up, prevent duplicate calls
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
this.cleaningUp = true;
|
|
315
|
+
|
|
316
|
+
try {
|
|
317
|
+
this.clearAllCaches();
|
|
318
|
+
console.log(chalk.green('[CACHE] Cache cleanup completed'));
|
|
319
|
+
} finally {
|
|
320
|
+
this.cleaningUp = false;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// ============================================================================
|
|
325
|
+
// PRIVATE METHODS
|
|
326
|
+
// ============================================================================
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Enforce maximum cache size by evicting oldest entries
|
|
330
|
+
* @param {Map} cacheMap - Cache map to enforce size limit on
|
|
331
|
+
* @param {number} maxSize - Maximum allowed size
|
|
332
|
+
* @private
|
|
333
|
+
*/
|
|
334
|
+
_enforceMaxSize(cacheMap, maxSize) {
|
|
335
|
+
while (cacheMap.size >= maxSize) {
|
|
336
|
+
const firstKey = cacheMap.keys().next().value;
|
|
337
|
+
cacheMap.delete(firstKey);
|
|
338
|
+
this.stats.evictions++;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Get cache map by type
|
|
344
|
+
* @param {string} cacheType - Cache type
|
|
345
|
+
* @returns {Map|null} Cache map or null if not found
|
|
346
|
+
* @private
|
|
347
|
+
*/
|
|
348
|
+
_getCacheMap(cacheType) {
|
|
349
|
+
switch (cacheType) {
|
|
350
|
+
case 'documentContext':
|
|
351
|
+
return this.documentContextCache;
|
|
352
|
+
case 'documentContextPromise':
|
|
353
|
+
return this.documentContextPromiseCache;
|
|
354
|
+
case 'h1Embedding':
|
|
355
|
+
return this.h1EmbeddingCache;
|
|
356
|
+
case 'embedding':
|
|
357
|
+
return this.embeddingCache;
|
|
358
|
+
case 'customDocumentChunks':
|
|
359
|
+
return this.customDocumentChunks;
|
|
360
|
+
default:
|
|
361
|
+
return null;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings Constants
|
|
3
|
+
*
|
|
4
|
+
* This module contains all shared constants used across the embeddings system.
|
|
5
|
+
* These constants are extracted from the original embeddings.js for better modularity.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import path from 'node:path';
|
|
9
|
+
|
|
10
|
+
// FastEmbed Model Configuration
|
|
11
|
+
export const EMBEDDING_DIMENSIONS = 384; // Dimension for bge-small-en-v1.5
|
|
12
|
+
export const MODEL_NAME_STRING = 'bge-small-en-v1.5';
|
|
13
|
+
|
|
14
|
+
// System Constants
|
|
15
|
+
export const MAX_RETRIES = 3;
|
|
16
|
+
|
|
17
|
+
// Directory Names
|
|
18
|
+
export const LANCEDB_DIR_NAME = '.ai-review-lancedb';
|
|
19
|
+
export const FASTEMBED_CACHE_DIR_NAME = '.ai-review-fastembed-cache';
|
|
20
|
+
|
|
21
|
+
// Directory Paths
|
|
22
|
+
// Use workspace-relative paths in CI environments, HOME-based paths locally
|
|
23
|
+
const BASE_DIR = process.env.CI
|
|
24
|
+
? // Prioritize the explicitly passed workspace path
|
|
25
|
+
process.env.GITHUB_WORKSPACE_PATH || process.cwd()
|
|
26
|
+
: process.env.HOME || process.env.USERPROFILE || process.cwd();
|
|
27
|
+
|
|
28
|
+
export const LANCEDB_PATH = path.join(BASE_DIR, LANCEDB_DIR_NAME);
|
|
29
|
+
export const FASTEMBED_CACHE_DIR = path.join(BASE_DIR, FASTEMBED_CACHE_DIR_NAME);
|
|
30
|
+
|
|
31
|
+
// Database Table Names
|
|
32
|
+
export const TABLE_NAMES = {
|
|
33
|
+
FILE_EMBEDDINGS: 'file_embeddings',
|
|
34
|
+
DOCUMENT_CHUNK: 'document_chunk_embeddings',
|
|
35
|
+
PR_COMMENTS: 'pr_comments',
|
|
36
|
+
PROJECT_SUMMARIES: 'project_summaries',
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// Cache Configuration
|
|
40
|
+
export const MAX_EMBEDDING_CACHE_SIZE = 1000;
|