npm - ai-code-review-kit - Versions diffs - 1.1.2 - Mend

ai-code-review-kit 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +275 -0
package/README.zh-CN.md +268 -0
package/package.json +37 -0
package/src/AIError.js +7 -0
package/src/ai-review-cli.js +79 -0
package/src/ai-review.js +392 -0
package/src/cli.js +282 -0
package/src/core.js +420 -0
package/src/index.js +5 -0
package/src/kb-index.js +251 -0
package/src/prompts.js +63 -0
package/src/providers/adapters/ollama.js +61 -0
package/src/providers/adapters/openai.js +144 -0
package/src/providers/base.js +120 -0
package/src/providers/index.js +11 -0
package/src/rag/embeddings.js +168 -0
package/src/rag/fs.js +97 -0
package/src/rag/index.js +121 -0
package/src/rag/lancedb.js +14 -0
package/src/rag/text.js +18 -0
package/src/utils/openai.js +50 -0

package/src/core.js ADDED Viewed

@@ -0,0 +1,420 @@
+import AIProvider from './providers/index.js'
+import prompts from './prompts.js'
+import fs from 'fs'
+import path from 'path'
+import chalk from 'chalk'
+import AIError from './AIError.js'
+import { createEmbeddingsClient, resolveEmbeddingsClientConfig } from './rag/embeddings.js'
+import { retrieveKnowledgeContext } from './rag/index.js'
+export default class CodeReviewer {
+  constructor(config) {
+    this.validateConfig(config)
+    this.provider = AIProvider.create(config)
+    this.config = config
+    this._ragEmbedder = undefined
+    this._warnedRagMissingIndex = false
+    this._warnedRagError = false
+    this._ragUsedOnce = false
+  }
+  validateConfig(config) {
+    if (!config || typeof config !== 'object') {
+      throw new Error('Config must be an object')
+    }
+    const whiteList = ['LMSTUDIO', 'OLLAMA']
+    const providerType = config.providerType.toUpperCase();
+    if(!config.apiKey && !whiteList.includes(providerType)) {
+      throw new Error('apiKey is required in config')
+    }
+  }
+  async review(diff, allowedExtensions) {
+    const result = await this.analyzeInChunks(diff, allowedExtensions)
+    return result
+  }
+  async analyzeInChunks(diff, allowedExtensions) {
+    const chunks = this.splitDiffIntoChunks(diff, allowedExtensions)
+    console.log(chalk.green(`Running code review with AI: The content will be reviewed in ${chalk.cyan(chunks.length)} sessions for better accuracy.`))
+    this._ragUsedOnce = false
+    const startedAt = Date.now()
+    const concurrency = Number.isFinite(this.config.concurrency) ? Math.max(1, this.config.concurrency) : 2
+    const output = await this.mapWithConcurrency(chunks, concurrency, async (chunk) => {
+      return this.analyzeChunk(chunk)
+    })
+    const combined = this.combineResults(output)
+    const durationMs = Date.now() - startedAt
+    return {
+      ...combined,
+      meta: {
+        sessions: chunks.length,
+        durationMs,
+        providerType: this.config.providerType,
+        model: this.provider?.config?.model,
+        baseURL: this.provider?.config?.baseURL,
+        ragEnabled: Boolean(this.config.enableRag),
+        ragUsed: this._ragUsedOnce,
+      },
+    }
+  }
+  async analyzeChunk(chunk) {
+    try {
+      const maxRetries = Number.isFinite(this.config.maxRetries) ? Math.max(0, this.config.maxRetries) : 1
+      const retryDelayMs = Number.isFinite(this.config.retryDelayMs) ? Math.max(0, this.config.retryDelayMs) : 500
+      let prompt = this.generateReviewPrompt(chunk)
+      prompt = await this.maybeInjectProjectKnowledge(prompt, chunk)
+      const result = await this.retry(async () => {
+        return this.provider.analyze(prompt)
+      }, { maxRetries, retryDelayMs })
+      if (this.config.correctedResult) {
+        if (result.list?.every((item) => item.severity !== 'high')) {
+          result.result = 'YES'
+        } else {
+          result.result = 'NO'
+        }
+      }
+      return result
+    } catch (error) {
+      return { error: error }
+    }
+  }
+  getRagEmbedder() {
+    if (this._ragEmbedder) return this._ragEmbedder
+    const resolved = resolveEmbeddingsClientConfig(this.config)
+    if (!resolved.baseURL) return undefined
+    this._ragEmbedder = createEmbeddingsClient(resolved)
+    return this._ragEmbedder
+  }
+  async maybeInjectProjectKnowledge(prompt, chunk) {
+    if (!this.config.enableRag) return prompt
+    const repoRoot = this.config.repoRoot || process.cwd()
+    const indexDir = this.config.knowledgeBaseIndexDir || '.ai-reviewer-cache/lancedb'
+    const tableName = this.config.knowledgeBaseTable || 'project_kb'
+    const topK = Number.isFinite(this.config.ragTopK) ? this.config.ragTopK : 6
+    const maxChars = Number.isFinite(this.config.ragMaxChars) ? this.config.ragMaxChars : 8000
+    const absIndexDir = path.resolve(repoRoot, indexDir)
+    if (!fs.existsSync(absIndexDir)) {
+      if (!this._warnedRagMissingIndex) {
+        this._warnedRagMissingIndex = true
+        console.log(
+          chalk.yellow(
+            `RAG enabled but index not found at ${path.relative(repoRoot, absIndexDir)}. Run: ai-review index`
+          )
+        )
+      }
+      return prompt
+    }
+    const embedder = this.getRagEmbedder()
+    if (!embedder) return prompt
+    try {
+      const items = await retrieveKnowledgeContext({
+        repoRoot,
+        indexDir,
+        tableName,
+        queryText: chunk,
+        embedder,
+        topK,
+        maxChars,
+      })
+      if (!items.length) return prompt
+      this._ragUsedOnce = true
+      const knowledgeBlock =
+        `\n<project_knowledge>\n` +
+        items.map((it) => `[${it.path}]\n${it.text}`).join('\n\n') +
+        `\n</project_knowledge>\n\n`
+      const gitDiffTag = '<git_diff>'
+      const idx = prompt.userPrompt.indexOf(gitDiffTag)
+      if (idx !== -1) {
+        prompt.userPrompt =
+          prompt.userPrompt.slice(0, idx) +
+          knowledgeBlock +
+          prompt.userPrompt.slice(idx)
+      } else {
+        prompt.userPrompt += knowledgeBlock
+      }
+      return prompt
+    } catch (err) {
+      if (!this._warnedRagError) {
+        this._warnedRagError = true
+        console.log(chalk.yellow(`RAG retrieval failed, falling back without knowledge: ${err?.message || err}`))
+      }
+      return prompt
+    }
+  }
+  async retry(fn, { maxRetries, retryDelayMs }) {
+    let attempt = 0
+    // maxRetries = number of retries after the first attempt
+    // total attempts = 1 + maxRetries
+    while (true) {
+      try {
+        return await fn()
+      } catch (error) {
+        if (attempt >= maxRetries) throw error
+        const jitter = Math.floor(Math.random() * 100)
+        const delay = retryDelayMs * Math.pow(2, attempt) + jitter
+        await new Promise((resolve) => setTimeout(resolve, delay))
+        attempt += 1
+      }
+    }
+  }
+  async mapWithConcurrency(items, concurrency, worker) {
+    const results = new Array(items.length)
+    let nextIndex = 0
+    const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
+      while (true) {
+        const currentIndex = nextIndex
+        nextIndex += 1
+        if (currentIndex >= items.length) return
+        results[currentIndex] = await worker(items[currentIndex], currentIndex)
+      }
+    })
+    await Promise.all(runners)
+    return results
+  }
+  combineResults(results) {
+    const success = results.every(item => {
+      if (item.result === 'YES') {
+        return true
+      }
+      if(this.config.strict) {
+        return false
+      }
+      return item.error instanceof AIError
+    })
+    const weight = {
+      high: 3,
+      medium: 2,
+      low: 1
+    }
+    const list = results.flatMap(item => item.list || []).sort((a,b) => {
+      return weight[b.severity] - weight[a.severity]
+    })
+    const errors = results.map(item => item.error).filter(error => error)
+    return {result: success ? 'YES' : 'NO', list, errors}
+  }
+  /**
+   * Split git diff into manageable chunks
+   * @param {string} diff - Raw git diff output
+   * @returns {Array} Array of diff chunks
+   */
+  splitDiffIntoChunks(diff, allowedExtensions) {
+    const fileSections = diff.split(/(?=^diff --git)/m)
+    const filteredSections = fileSections.filter(section => {
+      const fileMatch = section.match(/^diff --git a\/(.+?) b\/(.+?)$/m)
+      if (!fileMatch) return false
+      const fileName = fileMatch[1]
+      const ext = path.extname(fileName).toLowerCase()
+      return allowedExtensions.includes(ext)
+    }).map(section => {
+      return this.filterDiffLines(section)
+    })
+    const splitSections = this.getChunksByLength(filteredSections)
+    const groups = this.normalSplicing(splitSections)
+    return groups.map(group => {
+      const content = group.map(item => item.content).join('\n')
+      return content
+    })
+  }
+  filterDiffLines(section) {
+    const ignoreDeletions = this.config.ignoreDeletions !== false
+    const stripUnchangedCommentLines = this.config.stripUnchangedCommentLines !== false
+    const lines = section.split('\n')
+    const filteredLines = lines.filter((line) => {
+      if (ignoreDeletions) {
+        // Keep file header lines (---/+++), only drop actual deleted lines in hunks.
+        if (!line.startsWith('--- ') && line.startsWith('-')) return false
+      }
+      if (stripUnchangedCommentLines) {
+        // Strip only *unchanged* comment-only context lines to reduce tokens.
+        if (line.startsWith(' ') && line.slice(1).trimStart().startsWith('//')) return false
+      }
+      return true
+    })
+    return filteredLines.join('\n')
+  }
+  normalSplicing(splitSections) {
+    const result = []
+    const sortedSections = splitSections.sort((a, b) => a.length - b.length)
+    const maxChunkSize = this.config.maxChunkSize
+    let currentChunk = []
+    let currentSize = 0
+    for (const section of sortedSections) {
+      if (currentSize + section.length > maxChunkSize) {
+        result.push(currentChunk)
+        currentChunk = []
+        currentSize = 0
+      }
+      currentChunk.push(section)
+      currentSize += section.length
+    }
+    if (currentChunk.length > 0) {
+      result.push(currentChunk)
+    }
+    return result
+  }
+  backtrack(splitSections) {
+    const maxChunkSize = this.config.maxChunkSize
+    let minGroup = Infinity
+    let result = []
+    const dfs = (index, groups) => {
+      if (index === splitSections.length) {
+        const total = groups.reduce((sum, group) => sum + group.length, 0)
+        if (groups.length < minGroup && total === splitSections.length) {
+          minGroup = groups.length
+          result = groups.slice()
+        }
+        return
+      }
+      const current = splitSections[index]
+      for (let i = 0; i < groups.length; i++) {
+        const sum = groups[i].reduce((a, b) => a + b.length, 0)
+        if (sum + current.length <= maxChunkSize) {
+          groups[i].push(current)
+          dfs(index + 1, groups)
+          groups[i].pop()
+        }
+      }
+      if (groups.length < minGroup) {
+        groups.push([current])
+        dfs(index + 1, groups)
+        groups.pop()
+      }
+    }
+    dfs(0, [])
+    return result
+  }
+  getChunksByLength(fileSections) {
+    const processed = []
+    const maxChunkSize = this.config.maxChunkSize
+    for (const section of fileSections) {
+      const fileMatch = section.match(/^diff --git a\/(.+?) b\/(.+?)$/m)
+      if (!fileMatch) continue
+      const fileName = fileMatch[1]
+      const fileContent = section.trim()
+      const length = fileContent.length
+      if (fileContent.length < maxChunkSize) {
+        processed.push({length, content: fileContent})
+      } else {
+        const fileChunks = this.splitFileDiff(fileName, fileContent, maxChunkSize)
+        processed.push(...fileChunks)
+      }
+    }
+    return processed
+  }
+  /**
+   * Split a single file diff into chunks
+   * @param {string} fileDiff - Diff content for one file
+   * @param {number} maxSize - Maximum chunk size
+   * @returns {Array} Array of diff chunks
+   */
+  splitFileDiff(fileName, fileDiff, maxSize) {
+    const chunks = []
+    let currentChunk = ''
+    const head = `diff --git a/${fileName} b/${fileName}\n`
+    const hunks = fileDiff.split(/(?=^@@ -)/m)
+    for (const hunk of hunks) {
+      const pureHunk = hunk.trim()
+      if (currentChunk.length + pureHunk.length > maxSize && currentChunk.length > 0) {
+        chunks.push({length: currentChunk.length, content: currentChunk})
+        currentChunk = head
+      }
+      let location = 0
+      while ((location + maxSize) < pureHunk.length) {
+        const hunkChunk = pureHunk.slice(location, location + maxSize)
+        const content = location === 0 ? hunkChunk : `${head}\n${hunkChunk}`
+        location += maxSize
+        chunks.push({length: content.length, content})
+      }
+      currentChunk += pureHunk.slice(location, pureHunk.length) + '\n'
+    }
+    if (currentChunk.length > 0) {
+      chunks.push({length: currentChunk.length, content: currentChunk})
+    }
+    return chunks
+  }
+  generateReviewPrompt(diff) {
+    const { language } = this.config
+    let systemPrompt = `${prompts.system}\n`
+    let userPrompt = `${prompts.instruction}:\n\n<git_diff>\n${diff}\n</git_diff>\n\n`
+    const { reviewContentPrompt, analyzeList} = this.getReviewContentPrompt()
+    systemPrompt += reviewContentPrompt
+    systemPrompt += `\n${prompts.response.requirement}\n`
+    Object.entries(prompts.response.fields).forEach(([key, description]) => {
+      systemPrompt += `\n${key}: ${description}\n`
+      if (key === 'list') {
+        Object.entries(prompts.response.itemFields).forEach(([field, fieldDescription]) => {
+          let text = fieldDescription
+          if (field === 'perspective') {
+            text = analyzeList.join('/')
+          }
+          text = text.replace('${language}', language)
+          systemPrompt += `\n- ${field}: ${text}\n`
+        })
+      }
+    })
+    return { systemPrompt, userPrompt }
+  }
+  getReviewContentPrompt() {
+    const { checkSecurity, checkPerformance, checkStyle, customPrompts} = this.config
+    if (customPrompts) {
+      return {
+        reviewContentPrompt: customPrompts,
+        analyzeList: ['customized']
+      }
+    }
+    let ouput = ''
+    const analyzeList = [
+      checkSecurity && 'security',
+      checkPerformance && 'performance',
+      checkStyle && 'style'
+    ].filter(Boolean);
+    if (analyzeList.length === 0) {
+      analyzeList.push('general')
+    }
+    analyzeList.forEach((item) => {
+      ouput += `${prompts.rules[item].name}\n`
+      prompts.rules[item].checks.forEach((check) => {
+        ouput += `\n- ${check}`
+      })
+      ouput += `\n${prompts.rules[item].severity_guidance}\n`
+    })
+    return {
+      reviewContentPrompt: ouput,
+      analyzeList
+    }
+  }
+}

package/src/index.js ADDED Viewed

@@ -0,0 +1,5 @@
+export { default as CodeReviewer } from './core.js';
+export { default as prompts } from './prompts.js';
+export { default as AIError } from './AIError.js';
+export { default as AIProvider } from './providers/index.js';

package/src/kb-index.js ADDED Viewed

@@ -0,0 +1,251 @@
+#!/usr/bin/env node
+import { execFileSync } from 'child_process'
+import chalk from 'chalk'
+import fs from 'fs'
+import path from 'path'
+import { fileURLToPath } from 'url'
+import { createEmbeddingsClient, resolveEmbeddingsClientConfig } from './rag/embeddings.js'
+import { buildKnowledgeIndex } from './rag/index.js'
+function runGit(args, options = {}) {
+  return execFileSync('git', args, { encoding: 'utf8', ...options }).trim()
+}
+function getGitRoot() {
+  try {
+    return runGit(['rev-parse', '--show-toplevel'], { cwd: process.cwd() })
+  } catch {
+    console.error(chalk.red('Not a git repository (or any of the parent directories).'))
+    process.exit(1)
+  }
+}
+function stripOuterQuotes(value) {
+  if (typeof value !== 'string') return value
+  const trimmed = value.trim()
+  const first = trimmed[0]
+  const last = trimmed[trimmed.length - 1]
+  if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
+    return trimmed.slice(1, -1)
+  }
+  return trimmed
+}
+function parseEnvFile(envContent) {
+  const config = {}
+  envContent.split(/\r?\n/).forEach((line) => {
+    const trimmed = line.trim()
+    if (!trimmed || trimmed.startsWith('#')) return
+    const normalized = trimmed.startsWith('export ') ? trimmed.slice('export '.length) : trimmed
+    const idx = normalized.indexOf('=')
+    if (idx === -1) return
+    const key = normalized.slice(0, idx).trim()
+    const rawValue = normalized.slice(idx + 1).trim()
+    if (!key) return
+    config[key] = stripOuterQuotes(rawValue)
+  })
+  return config
+}
+function canonicalizeKey(key) {
+  return String(key || '')
+    .toLowerCase()
+    .replace(/[^a-z0-9]/g, '')
+}
+function buildCanonicalKeyMap(knownKeys) {
+  const map = new Map()
+  for (const key of knownKeys) {
+    map.set(canonicalizeKey(key), key)
+  }
+  return map
+}
+function normalizeConfigKeys(rawConfig, knownKeys, keyMap) {
+  const config = rawConfig && typeof rawConfig === 'object' ? rawConfig : {}
+  const normalized = {}
+  for (const [rawKey, rawValue] of Object.entries(config)) {
+    if (knownKeys.has(rawKey)) {
+      normalized[rawKey] = rawValue
+      continue
+    }
+    const mapped = keyMap.get(canonicalizeKey(rawKey))
+    if (mapped) {
+      normalized[mapped] = rawValue
+      continue
+    }
+    normalized[rawKey] = rawValue
+  }
+  return normalized
+}
+function pickConfigFromProcessEnv(knownKeys, keyMap) {
+  const picked = {}
+  const prefixes = ['AI_REVIEW_', 'AI_CODE_REVIEW_KIT_']
+  for (const [rawKey, rawValue] of Object.entries(process.env || {})) {
+    if (rawValue == null || rawValue === '') continue
+    // Backward compatible: allow exact, case-sensitive config keys
+    if (knownKeys.has(rawKey)) {
+      picked[rawKey] = rawValue
+      continue
+    }
+    const prefix = prefixes.find((p) => rawKey.startsWith(p))
+    if (!prefix) continue
+    const suffix = rawKey.slice(prefix.length)
+    const mapped = keyMap.get(canonicalizeKey(suffix))
+    if (!mapped) continue
+    picked[mapped] = rawValue
+  }
+  return picked
+}
+function coerceConfigTypes(config) {
+  const next = { ...config }
+  const numberKeys = [
+    'timeoutMs',
+    'ragTopK',
+    'ragMaxChars',
+    'kbChunkSize',
+    'kbChunkOverlap',
+    'kbMaxFileSizeBytes',
+    'embeddingsDimensions',
+  ]
+  const booleanKeys = ['enableRag']
+  for (const key of booleanKeys) {
+    if (typeof next[key] === 'string') {
+      const value = next[key].trim().toLowerCase()
+      if (value === 'true' || value === '1') next[key] = true
+      if (value === 'false' || value === '0') next[key] = false
+    }
+  }
+  for (const key of numberKeys) {
+    if (typeof next[key] === 'string' && next[key].trim() !== '') {
+      const num = Number(next[key])
+      if (!Number.isNaN(num)) next[key] = num
+    }
+  }
+  return next
+}
+function loadConfig(gitRoot) {
+  const envPath = path.join(gitRoot, '.env')
+  const pkgPath = path.join(gitRoot, 'package.json')
+  let pkgConfig = {}
+  if (fs.existsSync(pkgPath)) {
+    const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'))
+    if (pkg.aiCheckConfig && typeof pkg.aiCheckConfig === 'object') {
+      pkgConfig = pkg.aiCheckConfig
+    }
+  }
+  let envConfig = {}
+  if (fs.existsSync(envPath)) {
+    envConfig = parseEnvFile(fs.readFileSync(envPath, 'utf8'))
+  }
+  const knownKeys = new Set([
+    ...Object.keys(pkgConfig || {}),
+    'providerType',
+    'apiKey',
+    'baseURL',
+    'model',
+    'timeoutMs',
+    'knowledgeBasePaths',
+    'knowledgeBaseIndexDir',
+    'knowledgeBaseTable',
+    'enableRag',
+    'ragTopK',
+    'ragMaxChars',
+    'embeddingsProviderType',
+    'embeddingsBaseURL',
+    'embeddingsApiKey',
+    'embeddingsModel',
+    'embeddingsDimensions',
+    'kbChunkSize',
+    'kbChunkOverlap',
+    'kbMaxFileSizeBytes',
+  ])
+  const keyMap = buildCanonicalKeyMap(knownKeys)
+  pkgConfig = normalizeConfigKeys(pkgConfig, knownKeys, keyMap)
+  envConfig = normalizeConfigKeys(envConfig, knownKeys, keyMap)
+  const processEnvConfig = pickConfigFromProcessEnv(knownKeys, keyMap)
+  return coerceConfigTypes({ ...pkgConfig, ...envConfig, ...processEnvConfig })
+}
+function parseArgs(argv) {
+  const args = { verbose: false }
+  for (const a of argv) {
+    if (a === '--verbose' || a === '-v') args.verbose = true
+  }
+  return args
+}
+export async function main(argv = process.argv.slice(2)) {
+  const args = parseArgs(argv)
+  const gitRoot = getGitRoot()
+  const config = loadConfig(gitRoot)
+  const knowledgeBasePaths = config.knowledgeBasePaths || ['.']
+  const indexDir = config.knowledgeBaseIndexDir || '.ai-reviewer-cache/lancedb'
+  const tableName = config.knowledgeBaseTable || 'project_kb'
+  const resolvedEmbeddings = resolveEmbeddingsClientConfig(config)
+  if (!resolvedEmbeddings.baseURL) {
+    console.error(
+      chalk.red(
+        'Missing embeddings baseURL. Set embeddingsBaseURL (or baseURL), or set embeddingsProviderType to a provider with a default embeddings baseURL.'
+      )
+    )
+    process.exit(1)
+  }
+  if (config.model && resolvedEmbeddings.model && String(config.model) === String(resolvedEmbeddings.model)) {
+    console.log(
+      chalk.yellow(
+        `Warning: embeddingsModel is the same as model (${resolvedEmbeddings.model}). Make sure embeddingsModel is an embeddings model (e.g. text-embedding-3-small).`
+      )
+    )
+  }
+  const embedder = createEmbeddingsClient(resolvedEmbeddings)
+  console.log(chalk.cyan('Building knowledge index...'))
+  const result = await buildKnowledgeIndex({
+    repoRoot: gitRoot,
+    knowledgeBasePaths,
+    indexDir,
+    tableName,
+    embedder,
+    chunkSize: config.kbChunkSize,
+    chunkOverlap: config.kbChunkOverlap,
+    maxFileSizeBytes: config.kbMaxFileSizeBytes,
+  })
+  console.log(
+    chalk.green(
+      `Index built: ${result.files} files, ${result.chunks} chunks → ${path.relative(gitRoot, result.indexDir)} (${result.tableName})`
+    )
+  )
+  if (args.verbose) {
+    console.log(chalk.gray('Tip: add .ai-reviewer-cache/ to .gitignore'))
+  }
+}
+const isMain = process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url)
+if (isMain) {
+  main().catch((err) => {
+    console.error(chalk.red('Index build failed:'), err?.message || err)
+    process.exit(1)
+  })
+}