coderev-cli 1.0.26 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,385 @@
1
+ const { describe, it, before, after } = require('node:test');
2
+ const assert = require('node:assert/strict');
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const os = require('os');
6
+
7
+ const {
8
+ extractSymbols,
9
+ buildTfIdfIndex,
10
+ searchIndex,
11
+ buildIndex,
12
+ loadIndex,
13
+ retrieveContext,
14
+ buildReviewContext,
15
+ isIndexStale,
16
+ INDEX_DIR,
17
+ } = require('./rag-indexer');
18
+
19
+ // Helper: create a temporary directory with test files
20
+ function createTestRepo(files) {
21
+ const dir = path.join(os.tmpdir(), 'coderev-rag-test-' + Date.now());
22
+ fs.mkdirSync(dir, { recursive: true });
23
+
24
+ for (const [filePath, content] of Object.entries(files)) {
25
+ const fullPath = path.join(dir, filePath);
26
+ fs.mkdirSync(path.dirname(fullPath), { recursive: true });
27
+ fs.writeFileSync(fullPath, content, 'utf-8');
28
+ }
29
+
30
+ return dir;
31
+ }
32
+
33
+ describe('extractSymbols', () => {
34
+ it('should extract function declarations from JavaScript', () => {
35
+ const source = `
36
+ function hello(name) {
37
+ return "Hello " + name;
38
+ }
39
+
40
+ const arrow = (x) => x * 2;
41
+
42
+ class MyClass {
43
+ constructor() { }
44
+ getValue() { return 42; }
45
+ }
46
+
47
+ import { foo } from './bar';
48
+ export default hello;
49
+ `;
50
+ const symbols = extractSymbols(source, 'test.js');
51
+
52
+ const names = symbols.map(s => s.name);
53
+ assert.ok(names.includes('hello'), 'should find hello function');
54
+ assert.ok(names.includes('arrow'), 'should find arrow function');
55
+ assert.ok(names.includes('MyClass'), 'should find MyClass');
56
+ assert.ok(names.includes('getValue'), 'should find getValue method');
57
+ });
58
+
59
+ it('should extract import paths from JavaScript', () => {
60
+ const source = `import React from 'react';
61
+ import { useState } from './hooks';`;
62
+ const symbols = extractSymbols(source, 'test.js');
63
+ const imports = symbols.filter(s => s.type === 'import');
64
+ assert.ok(imports.some(s => s.name.includes('react')), 'should find react import');
65
+ });
66
+
67
+ it('should extract Python functions and classes', () => {
68
+ const source = `
69
+ def calculate(a, b):
70
+ return a + b
71
+
72
+ class Calculator:
73
+ def add(self, x, y):
74
+ return x + y
75
+ `;
76
+ const symbols = extractSymbols(source, 'test.py');
77
+ const names = symbols.map(s => s.name);
78
+ assert.ok(names.includes('calculate'), 'should find calculate function');
79
+ assert.ok(names.includes('Calculator'), 'should find Calculator class');
80
+ assert.ok(names.includes('add'), 'should find add method');
81
+ });
82
+
83
+ it('should extract Go functions and types', () => {
84
+ const source = `
85
+ func main() {
86
+ fmt.Println("hello")
87
+ }
88
+
89
+ func (s *Server) Start(port int) error {
90
+ return nil
91
+ }
92
+
93
+ type Config struct {
94
+ Host string
95
+ }
96
+ `;
97
+ const symbols = extractSymbols(source, 'test.go');
98
+ const names = symbols.map(s => s.name);
99
+ assert.ok(names.includes('main'), 'should find main');
100
+ assert.ok(names.includes('Start'), 'should find Start');
101
+ assert.ok(names.includes('Config'), 'should find Config struct');
102
+ });
103
+
104
+ it('should extract Rust functions and types', () => {
105
+ const source = `
106
+ pub fn new() -> Self {
107
+ Self {}
108
+ }
109
+
110
+ pub struct User {
111
+ name: String,
112
+ }
113
+
114
+ pub trait Display {
115
+ fn fmt(&self) -> String;
116
+ }
117
+ `;
118
+ const symbols = extractSymbols(source, 'test.rs');
119
+ const names = symbols.map(s => s.name);
120
+ assert.ok(names.includes('new'), 'should find new');
121
+ assert.ok(names.includes('User'), 'should find User struct');
122
+ assert.ok(names.includes('Display'), 'should find Display trait');
123
+ });
124
+
125
+ it('should extract Java/Kotlin methods and classes', () => {
126
+ const source = `
127
+ public class UserService {
128
+ public User findById(Long id) {
129
+ return repository.findById(id);
130
+ }
131
+ }
132
+ `;
133
+ const symbols = extractSymbols(source, 'test.java');
134
+ const names = symbols.map(s => s.name);
135
+ assert.ok(names.includes('UserService'), 'should find UserService class');
136
+ assert.ok(names.includes('findById'), 'should find findById method');
137
+ });
138
+
139
+ it('should not extract noise keywords', () => {
140
+ const source = `if (x > 0) { return x; } for (;;) { break; } while (true) { continue; }`;
141
+ const symbols = extractSymbols(source, 'test.js');
142
+ const noise = ['if', 'for', 'while', 'return', 'break', 'continue'];
143
+ for (const s of symbols) {
144
+ assert.ok(!noise.includes(s.name), `${s.name} should not be extracted`);
145
+ }
146
+ });
147
+
148
+ it('should include line numbers in extracted symbols', () => {
149
+ const source = `// line 1\n// line 2\nfunction findMe() {\n return 1;\n}`;
150
+ const symbols = extractSymbols(source, 'test.js');
151
+ const found = symbols.find(s => s.name === 'findMe');
152
+ assert.ok(found, 'should find findMe');
153
+ assert.equal(found.line, 3, 'should be on line 3');
154
+ assert.equal(found.type, 'function');
155
+ });
156
+
157
+ it('should assign correct language based on extension', () => {
158
+ const js = extractSymbols('function test() {}', 'a.js');
159
+ assert.equal(js[0].lang, 'js/ts');
160
+
161
+ const py = extractSymbols('def test():\\n pass', 'b.py');
162
+ assert.equal(py[0].lang, 'python');
163
+
164
+ const go = extractSymbols('func Test() {}', 'c.go');
165
+ assert.equal(go[0].lang, 'go');
166
+
167
+ const rs = extractSymbols('fn test() {}', 'd.rs');
168
+ assert.equal(rs[0].lang, 'rust');
169
+ });
170
+ });
171
+
172
+ describe('buildTfIdfIndex + searchIndex', () => {
173
+ it('should build index and find relevant symbols', () => {
174
+ const symbols = [
175
+ { name: 'getUser', signature: 'id', type: 'function', lang: 'js/ts', file: 'api/users.js', line: 10, snippet: '' },
176
+ { name: 'UserService', signature: '', type: 'class', lang: 'js/ts', file: 'api/users.js', line: 1, snippet: '' },
177
+ { name: 'calculate', signature: 'a, b', type: 'function', lang: 'python', file: 'calc.py', line: 5, snippet: '' },
178
+ { name: 'render', signature: '', type: 'function', lang: 'js/ts', file: 'ui/app.js', line: 20, snippet: '' },
179
+ ];
180
+
181
+ const index = buildTfIdfIndex(symbols);
182
+
183
+ // Search for user-related symbols
184
+ const results = searchIndex(index, 'user api', 3);
185
+ assert.ok(results.length > 0, 'should find results');
186
+ assert.ok(results.some(r => r.symbol.name === 'getUser'), 'should find getUser');
187
+ assert.ok(results.some(r => r.symbol.name === 'UserService'), 'should find UserService');
188
+ });
189
+
190
+ it('should return empty results for unknown queries', () => {
191
+ const symbols = [
192
+ { name: 'getUser', signature: '', type: 'function', lang: 'js/ts', file: 'api.js', line: 1, snippet: '' },
193
+ ];
194
+ const index = buildTfIdfIndex(symbols);
195
+ const results = searchIndex(index, 'unknown_term_xyz', 3);
196
+ assert.equal(results.length, 0, 'should return no results for unrelated query');
197
+ });
198
+
199
+ it('should respect topK limit', () => {
200
+ const symbols = [];
201
+ for (let i = 0; i < 50; i++) {
202
+ symbols.push({
203
+ name: `func${i}`,
204
+ signature: '',
205
+ type: 'function',
206
+ lang: 'js/ts',
207
+ file: `file${i}.js`,
208
+ line: i,
209
+ snippet: '',
210
+ });
211
+ }
212
+ const index = buildTfIdfIndex(symbols);
213
+ const results = searchIndex(index, 'func', 5);
214
+ assert.ok(results.length <= 5, 'should respect topK limit');
215
+ });
216
+ });
217
+
218
+ describe('buildIndex + loadIndex', () => {
219
+ it('should index a real directory structure', () => {
220
+ const repo = createTestRepo({
221
+ 'src/index.js': 'module.exports = function main() { return 1; };',
222
+ 'src/utils.js': 'function helper(x) { return x * 2; }',
223
+ 'lib/auth.js': 'class Auth { login() {} logout() {} }',
224
+ 'README.md': '# Test', // Not indexable
225
+ 'package.json': '{}', // Not indexable
226
+ });
227
+
228
+ const index = buildIndex(repo, { maxFiles: 100 });
229
+
230
+ assert.ok(index.stats.filesScanned >= 3, 'should scan at least 3 JS files');
231
+ assert.ok(index.stats.symbolsExtracted >= 4, 'should extract symbols');
232
+ assert.ok(index.stats.timeMs > 0, 'should measure time');
233
+ assert.equal(typeof index.stats.languageBreakdown['js/ts'], 'number', 'should have JS language stat');
234
+
235
+ // Check index persistence
236
+ const indexPath = path.join(repo, INDEX_DIR, 'codebase-index.json');
237
+ assert.ok(fs.existsSync(indexPath), 'should persist index file');
238
+
239
+ const metaPath = path.join(repo, INDEX_DIR, 'index-meta.json');
240
+ assert.ok(fs.existsSync(metaPath), 'should persist meta file');
241
+
242
+ // Cleanup
243
+ fs.rmSync(repo, { recursive: true, force: true });
244
+ });
245
+
246
+ it('should load a persisted index', () => {
247
+ const repo = createTestRepo({
248
+ 'src/app.js': 'function init() { return true; }',
249
+ });
250
+
251
+ const built = buildIndex(repo, { maxFiles: 100 });
252
+ const loaded = loadIndex(repo);
253
+
254
+ assert.ok(loaded, 'should load index');
255
+ assert.equal(loaded.stats.symbolsExtracted, built.stats.symbolsExtracted, 'should match built stats');
256
+ assert.ok(loaded.tfidf.vocabulary instanceof Map, 'should rebuild vocabulary Map');
257
+
258
+ fs.rmSync(repo, { recursive: true, force: true });
259
+ });
260
+
261
+ it('should return null for non-existent index', () => {
262
+ const index = loadIndex('/nonexistent/path');
263
+ assert.equal(index, null);
264
+ });
265
+
266
+ it('should skip node_modules directory', () => {
267
+ const repo = createTestRepo({
268
+ 'src/app.js': 'function init() {}',
269
+ 'node_modules/lib/index.js': 'function hack() {}', // Should be skipped
270
+ });
271
+
272
+ const index = buildIndex(repo, { maxFiles: 100 });
273
+ const symFiles = index.symbols.map(s => s.file);
274
+
275
+ assert.ok(symFiles.some(f => f.includes('src/app.js')), 'should include src');
276
+ assert.ok(!symFiles.some(f => f.includes('node_modules')), 'should skip node_modules');
277
+
278
+ fs.rmSync(repo, { recursive: true, force: true });
279
+ });
280
+
281
+ it('should respect maxFiles limit', () => {
282
+ const repo = createTestRepo({});
283
+ // Create many files
284
+ for (let i = 0; i < 10; i++) {
285
+ fs.writeFileSync(path.join(repo, `file${i}.js`), `function f${i}() { return ${i}; }`, 'utf-8');
286
+ }
287
+
288
+ const index = buildIndex(repo, { maxFiles: 5 });
289
+ assert.ok(index.stats.filesScanned <= 5, 'should not exceed maxFiles');
290
+
291
+ fs.rmSync(repo, { recursive: true, force: true });
292
+ });
293
+ });
294
+
295
+ describe('retrieveContext + buildReviewContext', () => {
296
+ it('should retrieve same-file context from changed files', () => {
297
+ const repo = createTestRepo({
298
+ 'src/api.js': 'function getUsers() { return []; }\nfunction saveUser(data) { return {}; }\nclass UserRepo { find() {} }',
299
+ 'src/app.js': 'function main() { getUsers(); }',
300
+ });
301
+
302
+ const index = buildIndex(repo);
303
+
304
+ const diff = `diff --git a/src/api.js b/src/api.js
305
+ --- a/src/api.js
306
+ +++ b/src/api.js
307
+ @@ -1,3 +1,4 @@
308
+ function getUsers() { return []; }
309
+ +function deleteUser(id) { return true; }
310
+ `;
311
+ const ctx = retrieveContext(index, diff);
312
+
313
+ assert.ok(ctx.symbols.length > 0, 'should find symbols');
314
+ assert.ok(ctx.changedFiles.includes('src/api.js'), 'should detect changed file');
315
+ assert.ok(ctx.grouped.sameFile.some(s => s.name === 'getUsers'), 'should find same-file symbol');
316
+
317
+ fs.rmSync(repo, { recursive: true, force: true });
318
+ });
319
+
320
+ it('should return empty context when no index available', () => {
321
+ const ctx = retrieveContext(null, 'some diff');
322
+ assert.equal(ctx.symbols.length, 0);
323
+ assert.ok(ctx.summary.includes('No codebase index'));
324
+ });
325
+
326
+ it('should return empty context for empty symbols', () => {
327
+ const ctx = retrieveContext({ symbols: [] }, 'some diff');
328
+ assert.equal(ctx.symbols.length, 0);
329
+ });
330
+
331
+ it('should build review context string for prompt injection', () => {
332
+ const repo = createTestRepo({
333
+ 'src/auth.js': 'function login(user, pass) { return true; }\nclass AuthService { validate() {} }',
334
+ });
335
+
336
+ const index = buildIndex(repo);
337
+
338
+ const diff = `diff --git a/src/auth.js b/src/auth.js
339
+ --- a/src/auth.js
340
+ +++ b/src/auth.js
341
+ @@ -1,1 +1,2 @@
342
+ +function logout() { return true; }
343
+ `;
344
+ const contextStr = buildReviewContext(index, diff);
345
+
346
+ assert.ok(contextStr.includes('Codebase Context'), 'should include codebase context header');
347
+ assert.ok(contextStr.includes('Same File Symbols'), 'should include same-file section');
348
+ assert.ok(contextStr.includes('login'), 'should include login function');
349
+
350
+ fs.rmSync(repo, { recursive: true, force: true });
351
+ });
352
+
353
+ it('should build empty context for empty index', () => {
354
+ const result = buildReviewContext(null, 'some diff');
355
+ assert.equal(result, '');
356
+ });
357
+ });
358
+
359
+ describe('isIndexStale', () => {
360
+ it('should return true for non-existent index', () => {
361
+ assert.equal(isIndexStale('/nonexistent-path'), true);
362
+ });
363
+
364
+ it('should detect stale index', () => {
365
+ const repo = createTestRepo({
366
+ 'src/app.js': 'function test() {}',
367
+ });
368
+
369
+ buildIndex(repo);
370
+ assert.equal(isIndexStale(repo, 0), true, 'should be stale with maxAge 0');
371
+
372
+ fs.rmSync(repo, { recursive: true, force: true });
373
+ });
374
+
375
+ it('should detect fresh index', () => {
376
+ const repo = createTestRepo({
377
+ 'src/app.js': 'function test() {}',
378
+ });
379
+
380
+ buildIndex(repo);
381
+ assert.equal(isIndexStale(repo, 999999), false, 'should be fresh with large maxAge');
382
+
383
+ fs.rmSync(repo, { recursive: true, force: true });
384
+ });
385
+ });
package/src/reviewer.js CHANGED
@@ -3,6 +3,7 @@ const { cacheKey, getCached, setCached } = require('./cache');
3
3
  const { recordReview } = require('./stats');
4
4
  const { getRuleDescriptions } = require('./rules');
5
5
  const { analyzeDiffContext, tagIssuesWithBlame } = require('./blame');
6
+ const { loadIndex, buildReviewContext, isIndexStale } = require('./rag-indexer');
6
7
 
7
8
  // ── 多智能体并行审查 ──
8
9
 
@@ -122,6 +123,7 @@ function buildAgentPrompts(diff, config, options = {}) {
122
123
  const rulesConfig = config.rules || {};
123
124
  const ruleLines = getRuleDescriptions(rulesConfig, diff);
124
125
  const auditBlock = options.audit ? '\n- **SECURITY AUDIT MODE ACTIVE**' : '';
126
+ const ragContext = options.ragContext || '';
125
127
 
126
128
  return AGENT_ROLES.map(role => {
127
129
  return {
@@ -136,8 +138,10 @@ Your task: Review the provided git diff and ${role.focus}
136
138
  ${options.projectHint ? `Project context:
137
139
  ${options.projectHint}
138
140
 
139
- ` : ''}
140
- Return a JSON object:
141
+ ` : ''}${ragContext ? `Codebase context (RAG):
142
+ ${ragContext}
143
+
144
+ ` : ''}Return a JSON object:
141
145
  \`\`\`json
142
146
  {
143
147
  "issues": [
@@ -220,6 +224,7 @@ async function runParallelAgents(apiKey, config, prompts) {
220
224
  * @param {string} [options.ignorePattern] - File patterns to ignore
221
225
  * @param {number} [options.minConfidence] - Minimum confidence threshold (default: 60)
222
226
  * @param {boolean} [options.blame] - Enable git blame context analysis
227
+ * @param {boolean} [options.rag] - Enable RAG codebase context retrieval
223
228
  * @returns {Promise<object>} Review result with issues, suggestions, score, etc.
224
229
  */
225
230
  async function reviewDiff(diff, config, options = {}) {
@@ -234,8 +239,8 @@ async function reviewDiff(diff, config, options = {}) {
234
239
  diff = filterDiffByPattern(diff, options.ignorePattern);
235
240
  }
236
241
 
237
- // Check cache
238
- if (!options.noCache && !options.single && !options.audit) {
242
+ // Check cache (skip cache when RAG is enabled — context may differ)
243
+ if (!options.noCache && !options.single && !options.audit && !options.rag) {
239
244
  const ckey = cacheKey(diff);
240
245
  const cached = getCached(ckey);
241
246
  if (cached) {
@@ -247,11 +252,31 @@ async function reviewDiff(diff, config, options = {}) {
247
252
  const projectHint = loadProjectHint();
248
253
  const minConfidence = options.minConfidence ?? 60;
249
254
 
255
+ // ── RAG codebase context retrieval ──
256
+ let ragContext = '';
257
+ let _ragStats = null;
258
+ if (options.rag) {
259
+ const repoRoot = options.repoRoot || process.cwd();
260
+ let index = loadIndex(repoRoot);
261
+ if (!index || isIndexStale(repoRoot)) {
262
+ // Auto-build index if needed
263
+ const { buildIndex } = require('./rag-indexer');
264
+ index = buildIndex(repoRoot);
265
+ }
266
+ if (index && index.symbols && index.symbols.length > 0) {
267
+ ragContext = buildReviewContext(index, diff);
268
+ _ragStats = {
269
+ indexedSymbols: index.symbols.length,
270
+ retrievedSymbols: ragContext ? (ragContext.match(/`(\w+)` \*\*/g) || []).length : 0,
271
+ };
272
+ }
273
+ }
274
+
250
275
  let result;
251
276
 
252
277
  if (options.single) {
253
278
  // Legacy single-agent mode
254
- const prompt = buildReviewPrompt(diff, config, { ...options, projectHint });
279
+ const prompt = buildReviewPrompt(diff, config, { ...options, projectHint, ragContext });
255
280
  const aiResponse = await callAI(apiKey, prompt, config);
256
281
  result = parseReviewResponse(aiResponse);
257
282
  // Add default confidence to legacy results
@@ -263,7 +288,7 @@ async function reviewDiff(diff, config, options = {}) {
263
288
  }
264
289
  } else {
265
290
  // Multi-agent parallel review
266
- const prompts = buildAgentPrompts(diff, config, { ...options, projectHint });
291
+ const prompts = buildAgentPrompts(diff, config, { ...options, projectHint, ragContext });
267
292
  const { results: agentResults, errors } = await runParallelAgents(apiKey, config, prompts);
268
293
 
269
294
  // Merge and score issues
@@ -295,6 +320,11 @@ async function reviewDiff(diff, config, options = {}) {
295
320
  }
296
321
  }
297
322
 
323
+ // ── Attach RAG stats ──
324
+ if (_ragStats) {
325
+ result._rag = _ragStats;
326
+ }
327
+
298
328
  // ── Git blame context analysis ──
299
329
  if (options.blame && result.issues && result.issues.length > 0) {
300
330
  try {