codecritique 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +82 -114
  2. package/package.json +10 -9
  3. package/src/content-retrieval.test.js +775 -0
  4. package/src/custom-documents.test.js +440 -0
  5. package/src/feedback-loader.test.js +529 -0
  6. package/src/llm.test.js +256 -0
  7. package/src/project-analyzer.test.js +747 -0
  8. package/src/rag-analyzer.js +12 -0
  9. package/src/rag-analyzer.test.js +1109 -0
  10. package/src/rag-review.test.js +317 -0
  11. package/src/setupTests.js +131 -0
  12. package/src/zero-shot-classifier-open.test.js +278 -0
  13. package/src/embeddings/cache-manager.js +0 -364
  14. package/src/embeddings/constants.js +0 -40
  15. package/src/embeddings/database.js +0 -921
  16. package/src/embeddings/errors.js +0 -208
  17. package/src/embeddings/factory.js +0 -447
  18. package/src/embeddings/file-processor.js +0 -851
  19. package/src/embeddings/model-manager.js +0 -337
  20. package/src/embeddings/similarity-calculator.js +0 -97
  21. package/src/embeddings/types.js +0 -113
  22. package/src/pr-history/analyzer.js +0 -579
  23. package/src/pr-history/bot-detector.js +0 -123
  24. package/src/pr-history/cli-utils.js +0 -204
  25. package/src/pr-history/comment-processor.js +0 -549
  26. package/src/pr-history/database.js +0 -819
  27. package/src/pr-history/github-client.js +0 -629
  28. package/src/technology-keywords.json +0 -753
  29. package/src/utils/command.js +0 -48
  30. package/src/utils/constants.js +0 -263
  31. package/src/utils/context-inference.js +0 -364
  32. package/src/utils/document-detection.js +0 -105
  33. package/src/utils/file-validation.js +0 -271
  34. package/src/utils/git.js +0 -232
  35. package/src/utils/language-detection.js +0 -170
  36. package/src/utils/logging.js +0 -24
  37. package/src/utils/markdown.js +0 -132
  38. package/src/utils/mobilebert-tokenizer.js +0 -141
  39. package/src/utils/pr-chunking.js +0 -276
  40. package/src/utils/string-utils.js +0 -28
@@ -0,0 +1,529 @@
1
+ import fs from 'node:fs';
2
+ import * as factory from './embeddings/factory.js';
3
+ import {
4
+ loadFeedbackData,
5
+ shouldSkipSimilarIssue,
6
+ calculateWordSimilarity,
7
+ calculateIssueSimilarity,
8
+ extractDismissedPatterns,
9
+ generateFeedbackContext,
10
+ isSemanticSimilarityAvailable,
11
+ initializeSemanticSimilarity,
12
+ } from './feedback-loader.js';
13
+
14
+ vi.mock('node:fs', async (importOriginal) => {
15
+ const original = await importOriginal();
16
+ return {
17
+ ...original,
18
+ default: {
19
+ ...original,
20
+ existsSync: vi.fn(),
21
+ readdirSync: vi.fn(),
22
+ readFileSync: vi.fn(),
23
+ },
24
+ existsSync: vi.fn(),
25
+ readdirSync: vi.fn(),
26
+ readFileSync: vi.fn(),
27
+ };
28
+ });
29
+
30
+ vi.mock('./embeddings/factory.js', () => ({
31
+ getDefaultEmbeddingsSystem: vi.fn(),
32
+ }));
33
+
34
+ describe('calculateWordSimilarity', () => {
35
+ it('should return 1.0 for identical texts', () => {
36
+ const text = 'The quick brown fox jumps over the lazy dog';
37
+ expect(calculateWordSimilarity(text, text)).toBe(1);
38
+ });
39
+
40
+ it('should return 0 for completely different texts', () => {
41
+ const text1 = 'apple banana cherry date';
42
+ const text2 = 'elephant fox giraffe hippo';
43
+ expect(calculateWordSimilarity(text1, text2)).toBe(0);
44
+ });
45
+
46
+ it('should return value between 0 and 1 for partially similar texts', () => {
47
+ const text1 = 'missing null check in error handler';
48
+ const text2 = 'add null check before accessing property';
49
+ const similarity = calculateWordSimilarity(text1, text2);
50
+ expect(similarity).toBeGreaterThan(0);
51
+ expect(similarity).toBeLessThan(1);
52
+ });
53
+
54
+ it('should return 0 for empty texts', () => {
55
+ expect(calculateWordSimilarity('', 'some text')).toBe(0);
56
+ expect(calculateWordSimilarity('some text', '')).toBe(0);
57
+ expect(calculateWordSimilarity('', '')).toBe(0);
58
+ });
59
+
60
+ it('should return 0 for null/undefined texts', () => {
61
+ expect(calculateWordSimilarity(null, 'text')).toBe(0);
62
+ expect(calculateWordSimilarity('text', undefined)).toBe(0);
63
+ });
64
+
65
+ it('should be case insensitive', () => {
66
+ expect(calculateWordSimilarity('Hello World', 'hello world')).toBe(1);
67
+ });
68
+
69
+ it('should ignore punctuation', () => {
70
+ expect(calculateWordSimilarity('hello, world!', 'hello world')).toBe(1);
71
+ });
72
+
73
+ it('should filter short words (length <= 2)', () => {
74
+ // "a" and "an" are filtered out, only "cat" remains
75
+ const text1 = 'a cat';
76
+ const text2 = 'an cat';
77
+ expect(calculateWordSimilarity(text1, text2)).toBe(1);
78
+ });
79
+ });
80
+
81
+ describe('loadFeedbackData', () => {
82
+ beforeEach(() => {
83
+ mockConsoleSelective('log');
84
+ });
85
+
86
+ it('should return empty object for null path', async () => {
87
+ const result = await loadFeedbackData(null);
88
+ expect(result).toEqual({});
89
+ });
90
+
91
+ it('should return empty object for non-existent directory', async () => {
92
+ fs.existsSync.mockReturnValue(false);
93
+
94
+ const result = await loadFeedbackData('/nonexistent');
95
+
96
+ expect(result).toEqual({});
97
+ });
98
+
99
+ it('should return empty object when no feedback files found', async () => {
100
+ fs.existsSync.mockReturnValue(true);
101
+ fs.readdirSync.mockReturnValue(['other-file.txt']);
102
+
103
+ const result = await loadFeedbackData('/feedback');
104
+
105
+ expect(result).toEqual({});
106
+ });
107
+
108
+ it('should load and merge feedback files', async () => {
109
+ fs.existsSync.mockReturnValue(true);
110
+ fs.readdirSync.mockReturnValue(['feedback-1.json', 'feedback-2.json']);
111
+ fs.readFileSync.mockImplementation((path) => {
112
+ if (path.includes('feedback-1')) {
113
+ return JSON.stringify({ feedback: { issue1: { id: 1 } } });
114
+ }
115
+ return JSON.stringify({ feedback: { issue2: { id: 2 } } });
116
+ });
117
+
118
+ const result = await loadFeedbackData('/feedback');
119
+
120
+ expect(result.issue1).toBeDefined();
121
+ expect(result.issue2).toBeDefined();
122
+ });
123
+
124
+ it('should handle parsing errors gracefully', async () => {
125
+ fs.existsSync.mockReturnValue(true);
126
+ fs.readdirSync.mockReturnValue(['feedback-bad.json']);
127
+ fs.readFileSync.mockReturnValue('invalid json');
128
+
129
+ const result = await loadFeedbackData('/feedback');
130
+
131
+ expect(result).toEqual({});
132
+ });
133
+
134
+ it('should only read files matching feedback-*.json pattern', async () => {
135
+ fs.existsSync.mockReturnValue(true);
136
+ fs.readdirSync.mockReturnValue(['feedback-1.json', 'other.json', 'feedback-2.txt', 'config.json']);
137
+ fs.readFileSync.mockReturnValue(JSON.stringify({ feedback: { item: {} } }));
138
+
139
+ await loadFeedbackData('/feedback');
140
+
141
+ expect(fs.readFileSync).toHaveBeenCalledTimes(1);
142
+ });
143
+ });
144
+
145
+ describe('shouldSkipSimilarIssue', () => {
146
+ it('should return false for empty feedback data', async () => {
147
+ const result = await shouldSkipSimilarIssue('some issue', {});
148
+ expect(result).toBe(false);
149
+ });
150
+
151
+ it('should return false for null feedback data', async () => {
152
+ const result = await shouldSkipSimilarIssue('some issue', null);
153
+ expect(result).toBe(false);
154
+ });
155
+
156
+ it('should return true for similar dismissed issue', async () => {
157
+ const feedbackData = {
158
+ issue1: {
159
+ overallSentiment: 'negative',
160
+ originalIssue: 'Missing null check in handler function',
161
+ },
162
+ };
163
+
164
+ const result = await shouldSkipSimilarIssue('Missing null check in handler function', feedbackData, { similarityThreshold: 0.5 });
165
+
166
+ expect(result).toBe(true);
167
+ });
168
+
169
+ it('should return false for dissimilar issues', async () => {
170
+ const feedbackData = {
171
+ issue1: {
172
+ overallSentiment: 'negative',
173
+ originalIssue: 'CSS styling issue in button component',
174
+ },
175
+ };
176
+
177
+ const result = await shouldSkipSimilarIssue('Database connection timeout handling', feedbackData, { similarityThreshold: 0.5 });
178
+
179
+ expect(result).toBe(false);
180
+ });
181
+
182
+ it('should detect dismissed issues from user replies', async () => {
183
+ const feedbackData = {
184
+ issue1: {
185
+ overallSentiment: 'neutral',
186
+ originalIssue: 'Consider adding error handling',
187
+ userReplies: [{ body: 'This is a false positive' }],
188
+ },
189
+ };
190
+
191
+ const result = await shouldSkipSimilarIssue('Consider adding error handling', feedbackData, { similarityThreshold: 0.5 });
192
+
193
+ expect(result).toBe(true);
194
+ });
195
+
196
+ it('should detect resolved issues from user replies', async () => {
197
+ const feedbackData = {
198
+ issue1: {
199
+ overallSentiment: 'neutral',
200
+ originalIssue: 'Fix memory leak',
201
+ userReplies: [{ body: 'This has been resolved in another PR' }],
202
+ },
203
+ };
204
+
205
+ const result = await shouldSkipSimilarIssue('Fix memory leak', feedbackData, { similarityThreshold: 0.5 });
206
+
207
+ expect(result).toBe(true);
208
+ });
209
+
210
+ it('should respect similarity threshold', async () => {
211
+ const feedbackData = {
212
+ issue1: {
213
+ overallSentiment: 'negative',
214
+ originalIssue: 'Add logging to error handler',
215
+ },
216
+ };
217
+
218
+ // With high threshold, should not skip
219
+ const result = await shouldSkipSimilarIssue('Add logging to success handler', feedbackData, { similarityThreshold: 0.95 });
220
+
221
+ expect(result).toBe(false);
222
+ });
223
+ });
224
+
225
+ describe('calculateIssueSimilarity', () => {
226
+ it('should return zero similarity for empty texts', async () => {
227
+ const result = await calculateIssueSimilarity('', 'text');
228
+ expect(result.similarity).toBe(0);
229
+ expect(result.method).toBe('none');
230
+ });
231
+
232
+ it('should use word-based similarity when embeddings not available', async () => {
233
+ const result = await calculateIssueSimilarity('hello world', 'hello world', { useSemanticSimilarity: false });
234
+
235
+ expect(result.method).toBe('word-based');
236
+ expect(result.similarity).toBe(1);
237
+ });
238
+
239
+ it('should return similarity between 0 and 1', async () => {
240
+ const result = await calculateIssueSimilarity('missing error handling', 'add error handling to function');
241
+
242
+ expect(result.similarity).toBeGreaterThanOrEqual(0);
243
+ expect(result.similarity).toBeLessThanOrEqual(1);
244
+ });
245
+ });
246
+
247
+ describe('extractDismissedPatterns', () => {
248
+ it('should return empty array for empty feedback', () => {
249
+ expect(extractDismissedPatterns({})).toEqual([]);
250
+ expect(extractDismissedPatterns(null)).toEqual([]);
251
+ });
252
+
253
+ it('should extract negative sentiment issues', () => {
254
+ const feedbackData = {
255
+ issue1: {
256
+ overallSentiment: 'negative',
257
+ originalIssue: 'Add type annotations',
258
+ },
259
+ };
260
+
261
+ const patterns = extractDismissedPatterns(feedbackData);
262
+
263
+ expect(patterns.length).toBe(1);
264
+ expect(patterns[0].issue).toBe('Add type annotations');
265
+ expect(patterns[0].sentiment).toBe('negative');
266
+ });
267
+
268
+ it('should extract issues dismissed as false positive', () => {
269
+ const feedbackData = {
270
+ issue1: {
271
+ overallSentiment: 'neutral',
272
+ originalIssue: 'Missing semicolon',
273
+ userReplies: [{ body: 'This is a false positive, we use ASI' }],
274
+ },
275
+ };
276
+
277
+ const patterns = extractDismissedPatterns(feedbackData);
278
+
279
+ expect(patterns.length).toBe(1);
280
+ expect(patterns[0].reason).toContain('false positive');
281
+ });
282
+
283
+ it('should limit patterns to maxPatterns', () => {
284
+ const feedbackData = {};
285
+ for (let i = 0; i < 20; i++) {
286
+ feedbackData[`issue${i}`] = {
287
+ overallSentiment: 'negative',
288
+ originalIssue: `Issue ${i}`,
289
+ };
290
+ }
291
+
292
+ const patterns = extractDismissedPatterns(feedbackData, { maxPatterns: 5 });
293
+
294
+ expect(patterns.length).toBe(5);
295
+ });
296
+ });
297
+
298
+ describe('generateFeedbackContext', () => {
299
+ it('should return empty string for empty patterns', () => {
300
+ expect(generateFeedbackContext([])).toBe('');
301
+ expect(generateFeedbackContext(null)).toBe('');
302
+ });
303
+
304
+ it('should generate context text from patterns', () => {
305
+ const patterns = [
306
+ { issue: 'Add error handling', reason: 'Not applicable here' },
307
+ { issue: 'Missing tests', reason: 'Tests are in another file' },
308
+ ];
309
+
310
+ const context = generateFeedbackContext(patterns);
311
+
312
+ expect(context).toContain('Add error handling');
313
+ expect(context).toContain('Missing tests');
314
+ expect(context).toContain('previously dismissed');
315
+ });
316
+
317
+ it('should include numbered list', () => {
318
+ const patterns = [
319
+ { issue: 'Issue 1', reason: 'Reason 1' },
320
+ { issue: 'Issue 2', reason: 'Reason 2' },
321
+ ];
322
+
323
+ const context = generateFeedbackContext(patterns);
324
+
325
+ expect(context).toContain('1.');
326
+ expect(context).toContain('2.');
327
+ });
328
+ });
329
+
330
+ describe('semantic similarity', () => {
331
+ let mockEmbeddingsSystem;
332
+
333
+ beforeEach(() => {
334
+ mockConsoleSelective('log', 'warn', 'error');
335
+
336
+ mockEmbeddingsSystem = {
337
+ initialize: vi.fn().mockResolvedValue(undefined),
338
+ calculateEmbedding: vi.fn().mockResolvedValue(createMockEmbedding()),
339
+ };
340
+
341
+ factory.getDefaultEmbeddingsSystem.mockReturnValue(mockEmbeddingsSystem);
342
+ });
343
+
344
+ it('should report semantic similarity as unavailable initially', () => {
345
+ // Before initialization, semantic similarity should not be available
346
+ expect(isSemanticSimilarityAvailable()).toBe(false);
347
+ });
348
+
349
+ it('should attempt to initialize semantic similarity', async () => {
350
+ // This test just verifies initializeSemanticSimilarity can be called without error
351
+ await expect(initializeSemanticSimilarity()).resolves.not.toThrow();
352
+ });
353
+ });
354
+
355
+ describe('shouldSkipSimilarIssue edge cases', () => {
356
+ beforeEach(() => {
357
+ mockConsoleSelective('log', 'warn');
358
+ });
359
+
360
+ it('should handle feedback entries without originalIssue', async () => {
361
+ const feedbackData = {
362
+ issue1: {
363
+ overallSentiment: 'negative',
364
+ // No originalIssue field
365
+ },
366
+ };
367
+
368
+ const result = await shouldSkipSimilarIssue('some issue', feedbackData);
369
+
370
+ expect(result).toBe(false);
371
+ });
372
+
373
+ it('should handle feedback entries with empty userReplies', async () => {
374
+ const feedbackData = {
375
+ issue1: {
376
+ overallSentiment: 'neutral',
377
+ originalIssue: 'Some issue',
378
+ userReplies: [],
379
+ },
380
+ };
381
+
382
+ const result = await shouldSkipSimilarIssue('Some issue', feedbackData, { similarityThreshold: 0.9 });
383
+
384
+ // Should not match since no replies and sentiment is neutral
385
+ expect(result).toBe(false);
386
+ });
387
+
388
+ it('should detect "ignore" dismissal phrase', async () => {
389
+ const feedbackData = {
390
+ issue1: {
391
+ overallSentiment: 'neutral',
392
+ originalIssue: 'Missing validation',
393
+ userReplies: [{ body: 'Please ignore this issue' }],
394
+ },
395
+ };
396
+
397
+ const result = await shouldSkipSimilarIssue('Missing validation', feedbackData, { similarityThreshold: 0.5 });
398
+
399
+ expect(result).toBe(true);
400
+ });
401
+
402
+ it('should detect "not relevant" dismissal phrase', async () => {
403
+ const feedbackData = {
404
+ issue1: {
405
+ overallSentiment: 'neutral',
406
+ originalIssue: 'Unusual pattern',
407
+ userReplies: [{ body: 'Not relevant for this codebase' }],
408
+ },
409
+ };
410
+
411
+ const result = await shouldSkipSimilarIssue('Unusual pattern', feedbackData, { similarityThreshold: 0.5 });
412
+
413
+ expect(result).toBe(true);
414
+ });
415
+
416
+ it('should detect "resolved" dismissal phrase', async () => {
417
+ const feedbackData = {
418
+ issue1: {
419
+ overallSentiment: 'neutral',
420
+ originalIssue: 'Different approach',
421
+ userReplies: [{ body: 'This has been resolved in another PR' }],
422
+ },
423
+ };
424
+
425
+ const result = await shouldSkipSimilarIssue('Different approach', feedbackData, { similarityThreshold: 0.5 });
426
+
427
+ expect(result).toBe(true);
428
+ });
429
+
430
+ it('should use verbose logging when enabled', async () => {
431
+ const feedbackData = {
432
+ issue1: {
433
+ overallSentiment: 'negative',
434
+ originalIssue: 'Some issue',
435
+ },
436
+ };
437
+
438
+ await shouldSkipSimilarIssue('Some issue', feedbackData, { similarityThreshold: 0.5, verbose: true });
439
+
440
+ expect(console.log).toHaveBeenCalled();
441
+ });
442
+ });
443
+
444
+ describe('extractDismissedPatterns edge cases', () => {
445
+ it('should detect "not relevant" dismissal in replies', () => {
446
+ const feedbackData = {
447
+ issue1: {
448
+ overallSentiment: 'neutral',
449
+ originalIssue: 'Old code style',
450
+ userReplies: [{ body: 'Not relevant for this codebase' }],
451
+ },
452
+ };
453
+
454
+ const patterns = extractDismissedPatterns(feedbackData);
455
+
456
+ expect(patterns.length).toBe(1);
457
+ expect(patterns[0].reason).toContain('Not relevant');
458
+ });
459
+
460
+ it('should detect "ignore" dismissal in replies', () => {
461
+ const feedbackData = {
462
+ issue1: {
463
+ overallSentiment: 'neutral',
464
+ originalIssue: 'Already fixed',
465
+ userReplies: [{ body: 'Please ignore this issue' }],
466
+ },
467
+ };
468
+
469
+ const patterns = extractDismissedPatterns(feedbackData);
470
+
471
+ expect(patterns.length).toBe(1);
472
+ expect(patterns[0].reason).toContain('ignore');
473
+ });
474
+
475
+ it('should not extract positive sentiment issues without dismissal phrases', () => {
476
+ const feedbackData = {
477
+ issue1: {
478
+ overallSentiment: 'positive',
479
+ originalIssue: 'Good suggestion',
480
+ userReplies: [{ body: 'Great catch, fixed!' }],
481
+ },
482
+ };
483
+
484
+ const patterns = extractDismissedPatterns(feedbackData);
485
+
486
+ expect(patterns.length).toBe(0);
487
+ });
488
+
489
+ it('should use verbose logging when enabled', () => {
490
+ mockConsoleSelective('log');
491
+
492
+ const feedbackData = {
493
+ issue1: {
494
+ overallSentiment: 'negative',
495
+ originalIssue: 'Some issue',
496
+ },
497
+ };
498
+
499
+ extractDismissedPatterns(feedbackData, { verbose: true });
500
+
501
+ expect(console.log).toHaveBeenCalled();
502
+ });
503
+ });
504
+
505
+ describe('loadFeedbackData edge cases', () => {
506
+ beforeEach(() => {
507
+ mockConsoleSelective('log', 'warn');
508
+ });
509
+
510
+ it('should handle files with no feedback property', async () => {
511
+ fs.existsSync.mockReturnValue(true);
512
+ fs.readdirSync.mockReturnValue(['feedback-1.json']);
513
+ fs.readFileSync.mockReturnValue(JSON.stringify({ other: 'data' }));
514
+
515
+ const result = await loadFeedbackData('/feedback');
516
+
517
+ expect(result).toEqual({});
518
+ });
519
+
520
+ it('should use default verbose option', async () => {
521
+ fs.existsSync.mockReturnValue(true);
522
+ fs.readdirSync.mockReturnValue(['feedback-1.json']);
523
+ fs.readFileSync.mockReturnValue(JSON.stringify({ feedback: { item: { id: 1 } } }));
524
+
525
+ const result = await loadFeedbackData('/feedback', { verbose: true });
526
+
527
+ expect(result).toHaveProperty('item');
528
+ });
529
+ });