codecritique 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +1145 -0
  3. package/package.json +98 -0
  4. package/src/content-retrieval.js +747 -0
  5. package/src/custom-documents.js +597 -0
  6. package/src/embeddings/cache-manager.js +364 -0
  7. package/src/embeddings/constants.js +40 -0
  8. package/src/embeddings/database.js +921 -0
  9. package/src/embeddings/errors.js +208 -0
  10. package/src/embeddings/factory.js +447 -0
  11. package/src/embeddings/file-processor.js +851 -0
  12. package/src/embeddings/model-manager.js +337 -0
  13. package/src/embeddings/similarity-calculator.js +97 -0
  14. package/src/embeddings/types.js +113 -0
  15. package/src/feedback-loader.js +384 -0
  16. package/src/index.js +1418 -0
  17. package/src/llm.js +123 -0
  18. package/src/pr-history/analyzer.js +579 -0
  19. package/src/pr-history/bot-detector.js +123 -0
  20. package/src/pr-history/cli-utils.js +204 -0
  21. package/src/pr-history/comment-processor.js +549 -0
  22. package/src/pr-history/database.js +819 -0
  23. package/src/pr-history/github-client.js +629 -0
  24. package/src/project-analyzer.js +955 -0
  25. package/src/rag-analyzer.js +2764 -0
  26. package/src/rag-review.js +566 -0
  27. package/src/technology-keywords.json +753 -0
  28. package/src/utils/command.js +48 -0
  29. package/src/utils/constants.js +263 -0
  30. package/src/utils/context-inference.js +364 -0
  31. package/src/utils/document-detection.js +105 -0
  32. package/src/utils/file-validation.js +271 -0
  33. package/src/utils/git.js +232 -0
  34. package/src/utils/language-detection.js +170 -0
  35. package/src/utils/logging.js +24 -0
  36. package/src/utils/markdown.js +132 -0
  37. package/src/utils/mobilebert-tokenizer.js +141 -0
  38. package/src/utils/pr-chunking.js +276 -0
  39. package/src/utils/string-utils.js +28 -0
  40. package/src/zero-shot-classifier-open.js +392 -0
@@ -0,0 +1,364 @@
1
+ /**
2
+ * Cache Manager Module
3
+ *
4
+ * This module provides centralized cache management for embeddings,
5
+ * document contexts, and other cached data structures.
6
+ *
7
+ * Features:
8
+ * - Document context caching
9
+ * - H1 embedding caching
10
+ * - General embedding caching with size limits
11
+ * - Custom document chunks caching
12
+ * - Cache metrics and monitoring
13
+ * - Cache eviction policies
14
+ */
15
+ /**
16
+ * @typedef {import('./types.js').CacheMetrics} CacheMetrics
17
+ * @typedef {import('./types.js').EmbeddingVector} EmbeddingVector
18
+ * @typedef {import('./types.js').DocumentChunk} DocumentChunk
19
+ */
20
+
21
+ import chalk from 'chalk';
22
+ import { MAX_EMBEDDING_CACHE_SIZE } from './constants.js';
23
+
24
+ // ============================================================================
25
+ // CACHE CONFIGURATION
26
+ // ============================================================================
27
+
28
+ const DEFAULT_MAX_CACHE_SIZE = 1000;
29
+ const DEFAULT_MAX_EMBEDDING_CACHE_SIZE = MAX_EMBEDDING_CACHE_SIZE;
30
+
31
+ // ============================================================================
32
+ // CACHE MANAGER CLASS
33
+ // ============================================================================
34
+
35
+ export class CacheManager {
36
+ constructor(options = {}) {
37
+ this.maxCacheSize = options.maxCacheSize || DEFAULT_MAX_CACHE_SIZE;
38
+ this.maxEmbeddingCacheSize = options.maxEmbeddingCacheSize || DEFAULT_MAX_EMBEDDING_CACHE_SIZE;
39
+
40
+ // Initialize cache Maps
41
+ this.documentContextCache = new Map();
42
+ this.documentContextPromiseCache = new Map();
43
+ this.h1EmbeddingCache = new Map();
44
+ this.embeddingCache = new Map();
45
+ this.customDocumentChunks = new Map();
46
+
47
+ // Cache statistics
48
+ this.stats = {
49
+ hits: 0,
50
+ misses: 0,
51
+ evictions: 0,
52
+ created: Date.now(),
53
+ };
54
+
55
+ // Cleanup guard
56
+ this.cleaningUp = false;
57
+ }
58
+
59
+ // ============================================================================
60
+ // DOCUMENT CONTEXT CACHE
61
+ // ============================================================================
62
+
63
+ /**
64
+ * Get document context from cache
65
+ * @param {string} key - Cache key
66
+ * @returns {*} Cached document context or undefined
67
+ */
68
+ getDocumentContext(key) {
69
+ if (this.documentContextCache.has(key)) {
70
+ this.stats.hits++;
71
+ return this.documentContextCache.get(key);
72
+ }
73
+ this.stats.misses++;
74
+ return undefined;
75
+ }
76
+
77
+ /**
78
+ * Set document context in cache
79
+ * @param {string} key - Cache key
80
+ * @param {*} context - Document context to cache
81
+ */
82
+ setDocumentContext(key, context) {
83
+ this._enforceMaxSize(this.documentContextCache, this.maxCacheSize);
84
+ this.documentContextCache.set(key, context);
85
+ }
86
+
87
+ /**
88
+ * Get document context promise from cache
89
+ * @param {string} key - Cache key
90
+ * @returns {Promise|undefined} Cached promise or undefined
91
+ */
92
+ getDocumentContextPromise(key) {
93
+ if (this.documentContextPromiseCache.has(key)) {
94
+ this.stats.hits++;
95
+ return this.documentContextPromiseCache.get(key);
96
+ }
97
+ this.stats.misses++;
98
+ return undefined;
99
+ }
100
+
101
+ /**
102
+ * Set document context promise in cache
103
+ * @param {string} key - Cache key
104
+ * @param {Promise} promise - Promise to cache
105
+ */
106
+ setDocumentContextPromise(key, promise) {
107
+ this._enforceMaxSize(this.documentContextPromiseCache, this.maxCacheSize);
108
+ this.documentContextPromiseCache.set(key, promise);
109
+ }
110
+
111
+ /**
112
+ * Remove document context promise from cache
113
+ * @param {string} key - Cache key
114
+ */
115
+ removeDocumentContextPromise(key) {
116
+ this.documentContextPromiseCache.delete(key);
117
+ }
118
+
119
+ // ============================================================================
120
+ // H1 EMBEDDING CACHE
121
+ // ============================================================================
122
+
123
+ /**
124
+ * Get H1 embedding from cache
125
+ * @param {string} key - Cache key
126
+ * @returns {EmbeddingVector|undefined} Cached H1 embedding or undefined
127
+ */
128
+ getH1Embedding(key) {
129
+ if (this.h1EmbeddingCache.has(key)) {
130
+ this.stats.hits++;
131
+ return this.h1EmbeddingCache.get(key);
132
+ }
133
+ this.stats.misses++;
134
+ return undefined;
135
+ }
136
+
137
+ /**
138
+ * Set H1 embedding in cache
139
+ * @param {string} key - Cache key
140
+ * @param {EmbeddingVector} embedding - H1 embedding to cache
141
+ */
142
+ setH1Embedding(key, embedding) {
143
+ this._enforceMaxSize(this.h1EmbeddingCache, this.maxCacheSize);
144
+ this.h1EmbeddingCache.set(key, embedding);
145
+ }
146
+
147
+ // ============================================================================
148
+ // GENERAL EMBEDDING CACHE
149
+ // ============================================================================
150
+
151
+ /**
152
+ * Get embedding from cache
153
+ * @param {string} key - Cache key
154
+ * @returns {EmbeddingVector|undefined} Cached embedding or undefined
155
+ */
156
+ getEmbedding(key) {
157
+ if (this.embeddingCache.has(key)) {
158
+ this.stats.hits++;
159
+ return this.embeddingCache.get(key);
160
+ }
161
+ this.stats.misses++;
162
+ return undefined;
163
+ }
164
+
165
+ /**
166
+ * Set embedding in cache
167
+ * @param {string} key - Cache key
168
+ * @param {EmbeddingVector} embedding - Embedding to cache
169
+ */
170
+ setEmbedding(key, embedding) {
171
+ this._enforceMaxSize(this.embeddingCache, this.maxEmbeddingCacheSize);
172
+ this.embeddingCache.set(key, embedding);
173
+ }
174
+
175
+ // ============================================================================
176
+ // CUSTOM DOCUMENT CHUNKS CACHE
177
+ // ============================================================================
178
+
179
+ /**
180
+ * Get custom document chunks from cache
181
+ * @param {string} projectPath - Project path key
182
+ * @returns {DocumentChunk[]|undefined} Cached chunks or undefined
183
+ */
184
+ getCustomDocumentChunks(projectPath) {
185
+ if (this.customDocumentChunks.has(projectPath)) {
186
+ this.stats.hits++;
187
+ return this.customDocumentChunks.get(projectPath);
188
+ }
189
+ this.stats.misses++;
190
+ return undefined;
191
+ }
192
+
193
+ /**
194
+ * Set custom document chunks in cache
195
+ * @param {string} projectPath - Project path key
196
+ * @param {DocumentChunk[]} chunks - Chunks to cache
197
+ */
198
+ setCustomDocumentChunks(projectPath, chunks) {
199
+ this._enforceMaxSize(this.customDocumentChunks, this.maxCacheSize);
200
+ this.customDocumentChunks.set(projectPath, chunks);
201
+ }
202
+
203
+ /**
204
+ * Store custom documents (alias for setCustomDocumentChunks)
205
+ * @param {string} projectPath - Project path
206
+ * @param {Array} chunks - Document chunks to store
207
+ */
208
+ async storeCustomDocuments(projectPath, chunks) {
209
+ this.setCustomDocumentChunks(projectPath, chunks);
210
+ }
211
+
212
+ // ============================================================================
213
+ // CACHE MANAGEMENT
214
+ // ============================================================================
215
+
216
+ /**
217
+ * Clear all caches
218
+ */
219
+ clearAllCaches() {
220
+ const docCacheSize = this.documentContextCache.size;
221
+ const h1CacheSize = this.h1EmbeddingCache.size;
222
+ const embeddingCacheSize = this.embeddingCache.size;
223
+ const promiseCacheSize = this.documentContextPromiseCache.size;
224
+ const customDocCacheSize = this.customDocumentChunks.size;
225
+
226
+ this.documentContextCache.clear();
227
+ this.documentContextPromiseCache.clear();
228
+ this.h1EmbeddingCache.clear();
229
+ this.embeddingCache.clear();
230
+ this.customDocumentChunks.clear();
231
+
232
+ // Reset stats
233
+ this.stats.hits = 0;
234
+ this.stats.misses = 0;
235
+ this.stats.evictions = 0;
236
+
237
+ console.log(
238
+ chalk.yellow(
239
+ `[CACHE] Cleared all caches - Document contexts: ${docCacheSize}, Promise: ${promiseCacheSize}, H1 embeddings: ${h1CacheSize}, Embeddings: ${embeddingCacheSize}, Custom docs: ${customDocCacheSize}`
240
+ )
241
+ );
242
+ }
243
+
244
+ /**
245
+ * Clear specific cache type
246
+ * @param {string} cacheType - Type of cache to clear
247
+ */
248
+ clearCache(cacheType) {
249
+ const cacheMap = this._getCacheMap(cacheType);
250
+ if (cacheMap) {
251
+ const size = cacheMap.size;
252
+ cacheMap.clear();
253
+ console.log(chalk.yellow(`[CACHE] Cleared ${cacheType} cache - ${size} items`));
254
+ } else {
255
+ console.warn(chalk.yellow(`[CACHE] Unknown cache type: ${cacheType}`));
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Get cache metrics
261
+ * @returns {CacheMetrics} Cache metrics object
262
+ */
263
+ getCacheMetrics() {
264
+ const hitRate =
265
+ this.stats.hits + this.stats.misses > 0 ? ((this.stats.hits / (this.stats.hits + this.stats.misses)) * 100).toFixed(2) : 0;
266
+
267
+ return {
268
+ sizes: {
269
+ documentContext: this.documentContextCache.size,
270
+ documentContextPromise: this.documentContextPromiseCache.size,
271
+ h1Embedding: this.h1EmbeddingCache.size,
272
+ embedding: this.embeddingCache.size,
273
+ customDocumentChunks: this.customDocumentChunks.size,
274
+ },
275
+ limits: {
276
+ maxCacheSize: this.maxCacheSize,
277
+ maxEmbeddingCacheSize: this.maxEmbeddingCacheSize,
278
+ },
279
+ statistics: {
280
+ hits: this.stats.hits,
281
+ misses: this.stats.misses,
282
+ evictions: this.stats.evictions,
283
+ hitRate: `${hitRate}%`,
284
+ },
285
+ uptime: Date.now() - this.stats.created,
286
+ };
287
+ }
288
+
289
+ /**
290
+ * Get cache status summary
291
+ * @returns {Object} Cache status summary
292
+ */
293
+ getCacheStatus() {
294
+ const metrics = this.getCacheMetrics();
295
+ const totalSize = Object.values(metrics.sizes).reduce((sum, size) => sum + size, 0);
296
+
297
+ return {
298
+ totalCachedItems: totalSize,
299
+ hitRate: metrics.statistics.hitRate,
300
+ memoryEfficiency: totalSize > 0 ? 'active' : 'idle',
301
+ uptime: `${Math.floor(metrics.uptime / 1000)}s`,
302
+ };
303
+ }
304
+
305
+ /**
306
+ * Cleanup method for compatibility with factory cleanup pattern
307
+ * @returns {Promise<void>}
308
+ */
309
+ async cleanup() {
310
+ if (this.cleaningUp) {
311
+ return; // Already cleaning up, prevent duplicate calls
312
+ }
313
+
314
+ this.cleaningUp = true;
315
+
316
+ try {
317
+ this.clearAllCaches();
318
+ console.log(chalk.green('[CACHE] Cache cleanup completed'));
319
+ } finally {
320
+ this.cleaningUp = false;
321
+ }
322
+ }
323
+
324
+ // ============================================================================
325
+ // PRIVATE METHODS
326
+ // ============================================================================
327
+
328
+ /**
329
+ * Enforce maximum cache size by evicting oldest entries
330
+ * @param {Map} cacheMap - Cache map to enforce size limit on
331
+ * @param {number} maxSize - Maximum allowed size
332
+ * @private
333
+ */
334
+ _enforceMaxSize(cacheMap, maxSize) {
335
+ while (cacheMap.size >= maxSize) {
336
+ const firstKey = cacheMap.keys().next().value;
337
+ cacheMap.delete(firstKey);
338
+ this.stats.evictions++;
339
+ }
340
+ }
341
+
342
+ /**
343
+ * Get cache map by type
344
+ * @param {string} cacheType - Cache type
345
+ * @returns {Map|null} Cache map or null if not found
346
+ * @private
347
+ */
348
+ _getCacheMap(cacheType) {
349
+ switch (cacheType) {
350
+ case 'documentContext':
351
+ return this.documentContextCache;
352
+ case 'documentContextPromise':
353
+ return this.documentContextPromiseCache;
354
+ case 'h1Embedding':
355
+ return this.h1EmbeddingCache;
356
+ case 'embedding':
357
+ return this.embeddingCache;
358
+ case 'customDocumentChunks':
359
+ return this.customDocumentChunks;
360
+ default:
361
+ return null;
362
+ }
363
+ }
364
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Embeddings Constants
3
+ *
4
+ * This module contains all shared constants used across the embeddings system.
5
+ * These constants are extracted from the original embeddings.js for better modularity.
6
+ */
7
+
8
+ import path from 'node:path';
9
+
10
+ // FastEmbed Model Configuration
11
+ export const EMBEDDING_DIMENSIONS = 384; // Dimension for bge-small-en-v1.5
12
+ export const MODEL_NAME_STRING = 'bge-small-en-v1.5';
13
+
14
+ // System Constants
15
+ export const MAX_RETRIES = 3;
16
+
17
+ // Directory Names
18
+ export const LANCEDB_DIR_NAME = '.ai-review-lancedb';
19
+ export const FASTEMBED_CACHE_DIR_NAME = '.ai-review-fastembed-cache';
20
+
21
+ // Directory Paths
22
+ // Use workspace-relative paths in CI environments, HOME-based paths locally
23
+ const BASE_DIR = process.env.CI
24
+ ? // Prioritize the explicitly passed workspace path
25
+ process.env.GITHUB_WORKSPACE_PATH || process.cwd()
26
+ : process.env.HOME || process.env.USERPROFILE || process.cwd();
27
+
28
+ export const LANCEDB_PATH = path.join(BASE_DIR, LANCEDB_DIR_NAME);
29
+ export const FASTEMBED_CACHE_DIR = path.join(BASE_DIR, FASTEMBED_CACHE_DIR_NAME);
30
+
31
+ // Database Table Names
32
+ export const TABLE_NAMES = {
33
+ FILE_EMBEDDINGS: 'file_embeddings',
34
+ DOCUMENT_CHUNK: 'document_chunk_embeddings',
35
+ PR_COMMENTS: 'pr_comments',
36
+ PROJECT_SUMMARIES: 'project_summaries',
37
+ };
38
+
39
+ // Cache Configuration
40
+ export const MAX_EMBEDDING_CACHE_SIZE = 1000;