ai-code-review-toolkit 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/core.js ADDED
@@ -0,0 +1,420 @@
1
+ import AIProvider from './providers/index.js'
2
+ import prompts from './prompts.js'
3
+ import fs from 'fs'
4
+ import path from 'path'
5
+ import chalk from 'chalk'
6
+ import AIError from './AIError.js'
7
+ import { createEmbeddingsClient, resolveEmbeddingsClientConfig } from './rag/embeddings.js'
8
+ import { retrieveKnowledgeContext } from './rag/index.js'
9
+
10
+ export default class CodeReviewer {
11
+ constructor(config) {
12
+ this.validateConfig(config)
13
+ this.provider = AIProvider.create(config)
14
+ this.config = config
15
+ this._ragEmbedder = undefined
16
+ this._warnedRagMissingIndex = false
17
+ this._warnedRagError = false
18
+ this._ragUsedOnce = false
19
+ }
20
+
21
+ validateConfig(config) {
22
+ if (!config || typeof config !== 'object') {
23
+ throw new Error('Config must be an object')
24
+ }
25
+ const whiteList = ['LMSTUDIO', 'OLLAMA']
26
+ const providerType = config.providerType.toUpperCase();
27
+ if(!config.apiKey && !whiteList.includes(providerType)) {
28
+ throw new Error('apiKey is required in config')
29
+ }
30
+ }
31
+
32
+ async review(diff, allowedExtensions) {
33
+ const result = await this.analyzeInChunks(diff, allowedExtensions)
34
+ return result
35
+ }
36
+
37
+ async analyzeInChunks(diff, allowedExtensions) {
38
+ const chunks = this.splitDiffIntoChunks(diff, allowedExtensions)
39
+ console.log(chalk.green(`Running code review with AI: The content will be reviewed in ${chalk.cyan(chunks.length)} sessions for better accuracy.`))
40
+
41
+ this._ragUsedOnce = false
42
+ const startedAt = Date.now()
43
+ const concurrency = Number.isFinite(this.config.concurrency) ? Math.max(1, this.config.concurrency) : 2
44
+ const output = await this.mapWithConcurrency(chunks, concurrency, async (chunk) => {
45
+ return this.analyzeChunk(chunk)
46
+ })
47
+ const combined = this.combineResults(output)
48
+ const durationMs = Date.now() - startedAt
49
+ return {
50
+ ...combined,
51
+ meta: {
52
+ sessions: chunks.length,
53
+ durationMs,
54
+ providerType: this.config.providerType,
55
+ model: this.provider?.config?.model,
56
+ baseURL: this.provider?.config?.baseURL,
57
+ ragEnabled: Boolean(this.config.enableRag),
58
+ ragUsed: this._ragUsedOnce,
59
+ },
60
+ }
61
+ }
62
+
63
+ async analyzeChunk(chunk) {
64
+ try {
65
+ const maxRetries = Number.isFinite(this.config.maxRetries) ? Math.max(0, this.config.maxRetries) : 1
66
+ const retryDelayMs = Number.isFinite(this.config.retryDelayMs) ? Math.max(0, this.config.retryDelayMs) : 500
67
+ let prompt = this.generateReviewPrompt(chunk)
68
+ prompt = await this.maybeInjectProjectKnowledge(prompt, chunk)
69
+
70
+ const result = await this.retry(async () => {
71
+ return this.provider.analyze(prompt)
72
+ }, { maxRetries, retryDelayMs })
73
+
74
+ if (this.config.correctedResult) {
75
+ if (result.list?.every((item) => item.severity !== 'high')) {
76
+ result.result = 'YES'
77
+ } else {
78
+ result.result = 'NO'
79
+ }
80
+ }
81
+ return result
82
+ } catch (error) {
83
+ return { error: error }
84
+ }
85
+ }
86
+
87
+ getRagEmbedder() {
88
+ if (this._ragEmbedder) return this._ragEmbedder
89
+
90
+ const resolved = resolveEmbeddingsClientConfig(this.config)
91
+ if (!resolved.baseURL) return undefined
92
+
93
+ this._ragEmbedder = createEmbeddingsClient(resolved)
94
+ return this._ragEmbedder
95
+ }
96
+
97
+ async maybeInjectProjectKnowledge(prompt, chunk) {
98
+ if (!this.config.enableRag) return prompt
99
+
100
+ const repoRoot = this.config.repoRoot || process.cwd()
101
+ const indexDir = this.config.knowledgeBaseIndexDir || '.ai-reviewer-cache/lancedb'
102
+ const tableName = this.config.knowledgeBaseTable || 'project_kb'
103
+ const topK = Number.isFinite(this.config.ragTopK) ? this.config.ragTopK : 6
104
+ const maxChars = Number.isFinite(this.config.ragMaxChars) ? this.config.ragMaxChars : 8000
105
+
106
+ const absIndexDir = path.resolve(repoRoot, indexDir)
107
+ if (!fs.existsSync(absIndexDir)) {
108
+ if (!this._warnedRagMissingIndex) {
109
+ this._warnedRagMissingIndex = true
110
+ console.log(
111
+ chalk.yellow(
112
+ `RAG enabled but index not found at ${path.relative(repoRoot, absIndexDir)}. Run: ai-review index`
113
+ )
114
+ )
115
+ }
116
+ return prompt
117
+ }
118
+
119
+ const embedder = this.getRagEmbedder()
120
+ if (!embedder) return prompt
121
+
122
+ try {
123
+ const items = await retrieveKnowledgeContext({
124
+ repoRoot,
125
+ indexDir,
126
+ tableName,
127
+ queryText: chunk,
128
+ embedder,
129
+ topK,
130
+ maxChars,
131
+ })
132
+ if (!items.length) return prompt
133
+
134
+ this._ragUsedOnce = true
135
+
136
+ const knowledgeBlock =
137
+ `\n<project_knowledge>\n` +
138
+ items.map((it) => `[${it.path}]\n${it.text}`).join('\n\n') +
139
+ `\n</project_knowledge>\n\n`
140
+
141
+ const gitDiffTag = '<git_diff>'
142
+ const idx = prompt.userPrompt.indexOf(gitDiffTag)
143
+ if (idx !== -1) {
144
+ prompt.userPrompt =
145
+ prompt.userPrompt.slice(0, idx) +
146
+ knowledgeBlock +
147
+ prompt.userPrompt.slice(idx)
148
+ } else {
149
+ prompt.userPrompt += knowledgeBlock
150
+ }
151
+ return prompt
152
+ } catch (err) {
153
+ if (!this._warnedRagError) {
154
+ this._warnedRagError = true
155
+ console.log(chalk.yellow(`RAG retrieval failed, falling back without knowledge: ${err?.message || err}`))
156
+ }
157
+ return prompt
158
+ }
159
+ }
160
+
161
+ async retry(fn, { maxRetries, retryDelayMs }) {
162
+ let attempt = 0
163
+ // maxRetries = number of retries after the first attempt
164
+ // total attempts = 1 + maxRetries
165
+ while (true) {
166
+ try {
167
+ return await fn()
168
+ } catch (error) {
169
+ if (attempt >= maxRetries) throw error
170
+ const jitter = Math.floor(Math.random() * 100)
171
+ const delay = retryDelayMs * Math.pow(2, attempt) + jitter
172
+ await new Promise((resolve) => setTimeout(resolve, delay))
173
+ attempt += 1
174
+ }
175
+ }
176
+ }
177
+
178
+ async mapWithConcurrency(items, concurrency, worker) {
179
+ const results = new Array(items.length)
180
+ let nextIndex = 0
181
+
182
+ const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
183
+ while (true) {
184
+ const currentIndex = nextIndex
185
+ nextIndex += 1
186
+ if (currentIndex >= items.length) return
187
+ results[currentIndex] = await worker(items[currentIndex], currentIndex)
188
+ }
189
+ })
190
+
191
+ await Promise.all(runners)
192
+ return results
193
+ }
194
+ combineResults(results) {
195
+ const success = results.every(item => {
196
+ if (item.result === 'YES') {
197
+ return true
198
+ }
199
+ if(this.config.strict) {
200
+ return false
201
+ }
202
+ return item.error instanceof AIError
203
+ })
204
+ const weight = {
205
+ high: 3,
206
+ medium: 2,
207
+ low: 1
208
+ }
209
+ const list = results.flatMap(item => item.list || []).sort((a,b) => {
210
+ return weight[b.severity] - weight[a.severity]
211
+ })
212
+ const errors = results.map(item => item.error).filter(error => error)
213
+ return {result: success ? 'YES' : 'NO', list, errors}
214
+ }
215
+ /**
216
+ * Split git diff into manageable chunks
217
+ * @param {string} diff - Raw git diff output
218
+ * @returns {Array} Array of diff chunks
219
+ */
220
+ splitDiffIntoChunks(diff, allowedExtensions) {
221
+ const fileSections = diff.split(/(?=^diff --git)/m)
222
+ const filteredSections = fileSections.filter(section => {
223
+ const fileMatch = section.match(/^diff --git a\/(.+?) b\/(.+?)$/m)
224
+ if (!fileMatch) return false
225
+ const fileName = fileMatch[1]
226
+ const ext = path.extname(fileName).toLowerCase()
227
+
228
+ return allowedExtensions.includes(ext)
229
+ }).map(section => {
230
+ return this.filterDiffLines(section)
231
+ })
232
+ const splitSections = this.getChunksByLength(filteredSections)
233
+ const groups = this.normalSplicing(splitSections)
234
+ return groups.map(group => {
235
+ const content = group.map(item => item.content).join('\n')
236
+ return content
237
+ })
238
+ }
239
+
240
+ filterDiffLines(section) {
241
+ const ignoreDeletions = this.config.ignoreDeletions !== false
242
+ const stripUnchangedCommentLines = this.config.stripUnchangedCommentLines !== false
243
+
244
+ const lines = section.split('\n')
245
+ const filteredLines = lines.filter((line) => {
246
+ if (ignoreDeletions) {
247
+ // Keep file header lines (---/+++), only drop actual deleted lines in hunks.
248
+ if (!line.startsWith('--- ') && line.startsWith('-')) return false
249
+ }
250
+
251
+ if (stripUnchangedCommentLines) {
252
+ // Strip only *unchanged* comment-only context lines to reduce tokens.
253
+ if (line.startsWith(' ') && line.slice(1).trimStart().startsWith('//')) return false
254
+ }
255
+
256
+ return true
257
+ })
258
+
259
+ return filteredLines.join('\n')
260
+ }
261
+
262
+ normalSplicing(splitSections) {
263
+ const result = []
264
+ const sortedSections = splitSections.sort((a, b) => a.length - b.length)
265
+ const maxChunkSize = this.config.maxChunkSize
266
+ let currentChunk = []
267
+ let currentSize = 0
268
+ for (const section of sortedSections) {
269
+ if (currentSize + section.length > maxChunkSize) {
270
+ result.push(currentChunk)
271
+ currentChunk = []
272
+ currentSize = 0
273
+ }
274
+ currentChunk.push(section)
275
+ currentSize += section.length
276
+ }
277
+ if (currentChunk.length > 0) {
278
+ result.push(currentChunk)
279
+ }
280
+ return result
281
+ }
282
+ backtrack(splitSections) {
283
+ const maxChunkSize = this.config.maxChunkSize
284
+ let minGroup = Infinity
285
+ let result = []
286
+ const dfs = (index, groups) => {
287
+ if (index === splitSections.length) {
288
+ const total = groups.reduce((sum, group) => sum + group.length, 0)
289
+ if (groups.length < minGroup && total === splitSections.length) {
290
+ minGroup = groups.length
291
+ result = groups.slice()
292
+ }
293
+ return
294
+ }
295
+ const current = splitSections[index]
296
+ for (let i = 0; i < groups.length; i++) {
297
+ const sum = groups[i].reduce((a, b) => a + b.length, 0)
298
+ if (sum + current.length <= maxChunkSize) {
299
+ groups[i].push(current)
300
+ dfs(index + 1, groups)
301
+ groups[i].pop()
302
+ }
303
+ }
304
+ if (groups.length < minGroup) {
305
+ groups.push([current])
306
+ dfs(index + 1, groups)
307
+ groups.pop()
308
+ }
309
+ }
310
+ dfs(0, [])
311
+ return result
312
+ }
313
+ getChunksByLength(fileSections) {
314
+ const processed = []
315
+ const maxChunkSize = this.config.maxChunkSize
316
+
317
+ for (const section of fileSections) {
318
+ const fileMatch = section.match(/^diff --git a\/(.+?) b\/(.+?)$/m)
319
+ if (!fileMatch) continue
320
+
321
+ const fileName = fileMatch[1]
322
+
323
+ const fileContent = section.trim()
324
+ const length = fileContent.length
325
+ if (fileContent.length < maxChunkSize) {
326
+ processed.push({length, content: fileContent})
327
+ } else {
328
+ const fileChunks = this.splitFileDiff(fileName, fileContent, maxChunkSize)
329
+ processed.push(...fileChunks)
330
+ }
331
+ }
332
+
333
+ return processed
334
+ }
335
+ /**
336
+ * Split a single file diff into chunks
337
+ * @param {string} fileDiff - Diff content for one file
338
+ * @param {number} maxSize - Maximum chunk size
339
+ * @returns {Array} Array of diff chunks
340
+ */
341
+ splitFileDiff(fileName, fileDiff, maxSize) {
342
+ const chunks = []
343
+ let currentChunk = ''
344
+ const head = `diff --git a/${fileName} b/${fileName}\n`
345
+ const hunks = fileDiff.split(/(?=^@@ -)/m)
346
+ for (const hunk of hunks) {
347
+ const pureHunk = hunk.trim()
348
+ if (currentChunk.length + pureHunk.length > maxSize && currentChunk.length > 0) {
349
+ chunks.push({length: currentChunk.length, content: currentChunk})
350
+ currentChunk = head
351
+ }
352
+ let location = 0
353
+ while ((location + maxSize) < pureHunk.length) {
354
+ const hunkChunk = pureHunk.slice(location, location + maxSize)
355
+ const content = location === 0 ? hunkChunk : `${head}\n${hunkChunk}`
356
+ location += maxSize
357
+ chunks.push({length: content.length, content})
358
+ }
359
+ currentChunk += pureHunk.slice(location, pureHunk.length) + '\n'
360
+ }
361
+
362
+ if (currentChunk.length > 0) {
363
+ chunks.push({length: currentChunk.length, content: currentChunk})
364
+ }
365
+
366
+ return chunks
367
+ }
368
+ generateReviewPrompt(diff) {
369
+ const { language } = this.config
370
+ let systemPrompt = `${prompts.system}\n`
371
+ let userPrompt = `${prompts.instruction}:\n\n<git_diff>\n${diff}\n</git_diff>\n\n`
372
+ const { reviewContentPrompt, analyzeList} = this.getReviewContentPrompt()
373
+ systemPrompt += reviewContentPrompt
374
+ systemPrompt += `\n${prompts.response.requirement}\n`
375
+ Object.entries(prompts.response.fields).forEach(([key, description]) => {
376
+ systemPrompt += `\n${key}: ${description}\n`
377
+ if (key === 'list') {
378
+ Object.entries(prompts.response.itemFields).forEach(([field, fieldDescription]) => {
379
+ let text = fieldDescription
380
+ if (field === 'perspective') {
381
+ text = analyzeList.join('/')
382
+ }
383
+ text = text.replace('${language}', language)
384
+ systemPrompt += `\n- ${field}: ${text}\n`
385
+ })
386
+ }
387
+ })
388
+ return { systemPrompt, userPrompt }
389
+ }
390
+
391
+ getReviewContentPrompt() {
392
+ const { checkSecurity, checkPerformance, checkStyle, customPrompts} = this.config
393
+ if (customPrompts) {
394
+ return {
395
+ reviewContentPrompt: customPrompts,
396
+ analyzeList: ['customized']
397
+ }
398
+ }
399
+ let ouput = ''
400
+ const analyzeList = [
401
+ checkSecurity && 'security',
402
+ checkPerformance && 'performance',
403
+ checkStyle && 'style'
404
+ ].filter(Boolean);
405
+ if (analyzeList.length === 0) {
406
+ analyzeList.push('general')
407
+ }
408
+ analyzeList.forEach((item) => {
409
+ ouput += `${prompts.rules[item].name}\n`
410
+ prompts.rules[item].checks.forEach((check) => {
411
+ ouput += `\n- ${check}`
412
+ })
413
+ ouput += `\n${prompts.rules[item].severity_guidance}\n`
414
+ })
415
+ return {
416
+ reviewContentPrompt: ouput,
417
+ analyzeList
418
+ }
419
+ }
420
+ }
package/src/index.js ADDED
@@ -0,0 +1,5 @@
1
+ export { default as CodeReviewer } from './core.js';
2
+ export { default as prompts } from './prompts.js';
3
+ export { default as AIError } from './AIError.js';
4
+ export { default as AIProvider } from './providers/index.js';
5
+
@@ -0,0 +1,251 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { execFileSync } from 'child_process'
4
+ import chalk from 'chalk'
5
+ import fs from 'fs'
6
+ import path from 'path'
7
+ import { fileURLToPath } from 'url'
8
+
9
+ import { createEmbeddingsClient, resolveEmbeddingsClientConfig } from './rag/embeddings.js'
10
+ import { buildKnowledgeIndex } from './rag/index.js'
11
+
12
+ function runGit(args, options = {}) {
13
+ return execFileSync('git', args, { encoding: 'utf8', ...options }).trim()
14
+ }
15
+
16
+ function getGitRoot() {
17
+ try {
18
+ return runGit(['rev-parse', '--show-toplevel'], { cwd: process.cwd() })
19
+ } catch {
20
+ console.error(chalk.red('Not a git repository (or any of the parent directories).'))
21
+ process.exit(1)
22
+ }
23
+ }
24
+
25
+ function stripOuterQuotes(value) {
26
+ if (typeof value !== 'string') return value
27
+ const trimmed = value.trim()
28
+ const first = trimmed[0]
29
+ const last = trimmed[trimmed.length - 1]
30
+ if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
31
+ return trimmed.slice(1, -1)
32
+ }
33
+ return trimmed
34
+ }
35
+
36
+ function parseEnvFile(envContent) {
37
+ const config = {}
38
+ envContent.split(/\r?\n/).forEach((line) => {
39
+ const trimmed = line.trim()
40
+ if (!trimmed || trimmed.startsWith('#')) return
41
+
42
+ const normalized = trimmed.startsWith('export ') ? trimmed.slice('export '.length) : trimmed
43
+ const idx = normalized.indexOf('=')
44
+ if (idx === -1) return
45
+
46
+ const key = normalized.slice(0, idx).trim()
47
+ const rawValue = normalized.slice(idx + 1).trim()
48
+ if (!key) return
49
+
50
+ config[key] = stripOuterQuotes(rawValue)
51
+ })
52
+ return config
53
+ }
54
+
55
+ function canonicalizeKey(key) {
56
+ return String(key || '')
57
+ .toLowerCase()
58
+ .replace(/[^a-z0-9]/g, '')
59
+ }
60
+
61
+ function buildCanonicalKeyMap(knownKeys) {
62
+ const map = new Map()
63
+ for (const key of knownKeys) {
64
+ map.set(canonicalizeKey(key), key)
65
+ }
66
+ return map
67
+ }
68
+
69
+ function normalizeConfigKeys(rawConfig, knownKeys, keyMap) {
70
+ const config = rawConfig && typeof rawConfig === 'object' ? rawConfig : {}
71
+ const normalized = {}
72
+ for (const [rawKey, rawValue] of Object.entries(config)) {
73
+ if (knownKeys.has(rawKey)) {
74
+ normalized[rawKey] = rawValue
75
+ continue
76
+ }
77
+ const mapped = keyMap.get(canonicalizeKey(rawKey))
78
+ if (mapped) {
79
+ normalized[mapped] = rawValue
80
+ continue
81
+ }
82
+ normalized[rawKey] = rawValue
83
+ }
84
+ return normalized
85
+ }
86
+
87
+ function pickConfigFromProcessEnv(knownKeys, keyMap) {
88
+ const picked = {}
89
+ const prefixes = ['AI_REVIEW_', 'AI_CODE_REVIEW_KIT_']
90
+ for (const [rawKey, rawValue] of Object.entries(process.env || {})) {
91
+ if (rawValue == null || rawValue === '') continue
92
+
93
+ // Backward compatible: allow exact, case-sensitive config keys
94
+ if (knownKeys.has(rawKey)) {
95
+ picked[rawKey] = rawValue
96
+ continue
97
+ }
98
+
99
+ const prefix = prefixes.find((p) => rawKey.startsWith(p))
100
+ if (!prefix) continue
101
+
102
+ const suffix = rawKey.slice(prefix.length)
103
+ const mapped = keyMap.get(canonicalizeKey(suffix))
104
+ if (!mapped) continue
105
+ picked[mapped] = rawValue
106
+ }
107
+ return picked
108
+ }
109
+
110
+ function coerceConfigTypes(config) {
111
+ const next = { ...config }
112
+ const numberKeys = [
113
+ 'timeoutMs',
114
+ 'ragTopK',
115
+ 'ragMaxChars',
116
+ 'kbChunkSize',
117
+ 'kbChunkOverlap',
118
+ 'kbMaxFileSizeBytes',
119
+ 'embeddingsDimensions',
120
+ ]
121
+ const booleanKeys = ['enableRag']
122
+
123
+ for (const key of booleanKeys) {
124
+ if (typeof next[key] === 'string') {
125
+ const value = next[key].trim().toLowerCase()
126
+ if (value === 'true' || value === '1') next[key] = true
127
+ if (value === 'false' || value === '0') next[key] = false
128
+ }
129
+ }
130
+
131
+ for (const key of numberKeys) {
132
+ if (typeof next[key] === 'string' && next[key].trim() !== '') {
133
+ const num = Number(next[key])
134
+ if (!Number.isNaN(num)) next[key] = num
135
+ }
136
+ }
137
+ return next
138
+ }
139
+
140
+ function loadConfig(gitRoot) {
141
+ const envPath = path.join(gitRoot, '.env')
142
+ const pkgPath = path.join(gitRoot, 'package.json')
143
+
144
+ let pkgConfig = {}
145
+ if (fs.existsSync(pkgPath)) {
146
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'))
147
+ if (pkg.aiCheckConfig && typeof pkg.aiCheckConfig === 'object') {
148
+ pkgConfig = pkg.aiCheckConfig
149
+ }
150
+ }
151
+
152
+ let envConfig = {}
153
+ if (fs.existsSync(envPath)) {
154
+ envConfig = parseEnvFile(fs.readFileSync(envPath, 'utf8'))
155
+ }
156
+
157
+ const knownKeys = new Set([
158
+ ...Object.keys(pkgConfig || {}),
159
+ 'providerType',
160
+ 'apiKey',
161
+ 'baseURL',
162
+ 'model',
163
+ 'timeoutMs',
164
+ 'knowledgeBasePaths',
165
+ 'knowledgeBaseIndexDir',
166
+ 'knowledgeBaseTable',
167
+ 'enableRag',
168
+ 'ragTopK',
169
+ 'ragMaxChars',
170
+ 'embeddingsProviderType',
171
+ 'embeddingsBaseURL',
172
+ 'embeddingsApiKey',
173
+ 'embeddingsModel',
174
+ 'embeddingsDimensions',
175
+ 'kbChunkSize',
176
+ 'kbChunkOverlap',
177
+ 'kbMaxFileSizeBytes',
178
+ ])
179
+ const keyMap = buildCanonicalKeyMap(knownKeys)
180
+
181
+ pkgConfig = normalizeConfigKeys(pkgConfig, knownKeys, keyMap)
182
+ envConfig = normalizeConfigKeys(envConfig, knownKeys, keyMap)
183
+ const processEnvConfig = pickConfigFromProcessEnv(knownKeys, keyMap)
184
+
185
+ return coerceConfigTypes({ ...pkgConfig, ...envConfig, ...processEnvConfig })
186
+ }
187
+
188
+ function parseArgs(argv) {
189
+ const args = { verbose: false }
190
+ for (const a of argv) {
191
+ if (a === '--verbose' || a === '-v') args.verbose = true
192
+ }
193
+ return args
194
+ }
195
+
196
+ export async function main(argv = process.argv.slice(2)) {
197
+ const args = parseArgs(argv)
198
+ const gitRoot = getGitRoot()
199
+ const config = loadConfig(gitRoot)
200
+
201
+ const knowledgeBasePaths = config.knowledgeBasePaths || ['.']
202
+ const indexDir = config.knowledgeBaseIndexDir || '.ai-reviewer-cache/lancedb'
203
+ const tableName = config.knowledgeBaseTable || 'project_kb'
204
+
205
+ const resolvedEmbeddings = resolveEmbeddingsClientConfig(config)
206
+ if (!resolvedEmbeddings.baseURL) {
207
+ console.error(
208
+ chalk.red(
209
+ 'Missing embeddings baseURL. Set embeddingsBaseURL (or baseURL), or set embeddingsProviderType to a provider with a default embeddings baseURL.'
210
+ )
211
+ )
212
+ process.exit(1)
213
+ }
214
+ if (config.model && resolvedEmbeddings.model && String(config.model) === String(resolvedEmbeddings.model)) {
215
+ console.log(
216
+ chalk.yellow(
217
+ `Warning: embeddingsModel is the same as model (${resolvedEmbeddings.model}). Make sure embeddingsModel is an embeddings model (e.g. text-embedding-3-small).`
218
+ )
219
+ )
220
+ }
221
+
222
+ const embedder = createEmbeddingsClient(resolvedEmbeddings)
223
+
224
+ console.log(chalk.cyan('Building knowledge index...'))
225
+ const result = await buildKnowledgeIndex({
226
+ repoRoot: gitRoot,
227
+ knowledgeBasePaths,
228
+ indexDir,
229
+ tableName,
230
+ embedder,
231
+ chunkSize: config.kbChunkSize,
232
+ chunkOverlap: config.kbChunkOverlap,
233
+ maxFileSizeBytes: config.kbMaxFileSizeBytes,
234
+ })
235
+ console.log(
236
+ chalk.green(
237
+ `Index built: ${result.files} files, ${result.chunks} chunks → ${path.relative(gitRoot, result.indexDir)} (${result.tableName})`
238
+ )
239
+ )
240
+ if (args.verbose) {
241
+ console.log(chalk.gray('Tip: add .ai-reviewer-cache/ to .gitignore'))
242
+ }
243
+ }
244
+
245
+ const isMain = process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url)
246
+ if (isMain) {
247
+ main().catch((err) => {
248
+ console.error(chalk.red('Index build failed:'), err?.message || err)
249
+ process.exit(1)
250
+ })
251
+ }