ai-code-review-kit 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,121 @@
1
+ import fs from 'fs'
2
+ import path from 'path'
3
+
4
+ import { connectLanceDB } from './lancedb.js'
5
+ import { collectTextFiles } from './fs.js'
6
+ import { chunkText } from './text.js'
7
+
8
+ function normalizePaths(value) {
9
+ if (Array.isArray(value)) return value.map(String).filter(Boolean)
10
+ if (typeof value === 'string') {
11
+ const trimmed = value.trim()
12
+ if (!trimmed) return []
13
+ return trimmed.split(',').map((s) => s.trim()).filter(Boolean)
14
+ }
15
+ return []
16
+ }
17
+
18
+ function ensureDir(dirPath) {
19
+ if (!fs.existsSync(dirPath)) {
20
+ fs.mkdirSync(dirPath, { recursive: true })
21
+ }
22
+ }
23
+
24
+ export async function buildKnowledgeIndex({
25
+ repoRoot,
26
+ knowledgeBasePaths,
27
+ indexDir,
28
+ tableName,
29
+ embedder,
30
+ chunkSize = 1500,
31
+ chunkOverlap = 200,
32
+ maxFileSizeBytes = 512 * 1024,
33
+ batchSize = 64,
34
+ }) {
35
+ if (!repoRoot) throw new Error('repoRoot is required')
36
+ if (!embedder?.embedTexts) throw new Error('embedder.embedTexts is required')
37
+
38
+ const paths = normalizePaths(knowledgeBasePaths)
39
+ if (paths.length === 0) throw new Error('knowledgeBasePaths is empty')
40
+
41
+ const files = collectTextFiles(repoRoot, paths, { maxFileSizeBytes })
42
+
43
+ const pending = []
44
+ for (const file of files) {
45
+ const chunks = chunkText(file.text, { chunkSize, overlap: chunkOverlap })
46
+ for (let i = 0; i < chunks.length; i++) {
47
+ pending.push({
48
+ id: `${file.relPath}::${i}`,
49
+ path: file.relPath,
50
+ chunkIndex: i,
51
+ text: chunks[i],
52
+ })
53
+ }
54
+ }
55
+
56
+ const rows = []
57
+ for (let i = 0; i < pending.length; i += batchSize) {
58
+ const slice = pending.slice(i, i + batchSize)
59
+ const vectors = await embedder.embedTexts(slice.map((x) => x.text))
60
+ if (!Array.isArray(vectors) || vectors.length !== slice.length) {
61
+ throw new Error('Embeddings vector count mismatch')
62
+ }
63
+ for (let j = 0; j < slice.length; j++) {
64
+ rows.push({ ...slice[j], vector: vectors[j] })
65
+ }
66
+ }
67
+
68
+ const absIndexDir = path.resolve(repoRoot, indexDir)
69
+ ensureDir(absIndexDir)
70
+ const db = await connectLanceDB(absIndexDir)
71
+ await db.createTable(tableName, rows, { mode: 'overwrite' })
72
+ // LanceDB keeps historical versions by default. Prune them to avoid the cache directory
73
+ // growing unbounded across repeated `index` runs.
74
+ try {
75
+ const table = await db.openTable(tableName)
76
+ if (typeof table?.optimize === 'function') {
77
+ await table.optimize({ cleanupOlderThan: new Date() })
78
+ }
79
+ } catch {
80
+ // Best-effort cleanup; ignore failures to keep indexing usable.
81
+ }
82
+
83
+ return { files: files.length, chunks: rows.length, indexDir: absIndexDir, tableName }
84
+ }
85
+
86
+ export async function retrieveKnowledgeContext({
87
+ repoRoot,
88
+ indexDir,
89
+ tableName,
90
+ queryText,
91
+ embedder,
92
+ topK = 6,
93
+ maxChars = 8000,
94
+ }) {
95
+ if (!repoRoot) throw new Error('repoRoot is required')
96
+ if (!queryText) return []
97
+ if (!embedder?.embedTexts) throw new Error('embedder.embedTexts is required')
98
+
99
+ const absIndexDir = path.resolve(repoRoot, indexDir)
100
+ if (!fs.existsSync(absIndexDir)) return []
101
+
102
+ const [vector] = await embedder.embedTexts([queryText])
103
+ if (!vector) return []
104
+
105
+ const db = await connectLanceDB(absIndexDir)
106
+ const table = await db.openTable(tableName)
107
+ const results = await table.vectorSearch(vector).limit(topK).toArray()
108
+
109
+ const picked = []
110
+ let used = 0
111
+ for (const row of results || []) {
112
+ const relPath = row.path || row.file || 'unknown'
113
+ const text = String(row.text || '').trim()
114
+ if (!text) continue
115
+ const snippet = `[${relPath}]\n${text}`
116
+ if (used + snippet.length > maxChars) break
117
+ used += snippet.length
118
+ picked.push({ path: relPath, text })
119
+ }
120
+ return picked
121
+ }
@@ -0,0 +1,14 @@
1
+ function resolveConnectFn(mod) {
2
+ if (typeof mod?.connect === 'function') return mod.connect
3
+ if (typeof mod?.default?.connect === 'function') return mod.default.connect
4
+ return undefined
5
+ }
6
+
7
+ export async function connectLanceDB(uri) {
8
+ const mod = await import('@lancedb/lancedb')
9
+ const connect = resolveConnectFn(mod)
10
+ if (!connect) {
11
+ throw new Error('Unable to find lancedb.connect()')
12
+ }
13
+ return connect(uri)
14
+ }
@@ -0,0 +1,18 @@
1
+ export function chunkText(text, { chunkSize = 1500, overlap = 200 } = {}) {
2
+ const input = String(text || '')
3
+ if (!input) return []
4
+
5
+ const size = Math.max(200, chunkSize)
6
+ const ov = Math.max(0, Math.min(overlap, Math.floor(size / 2)))
7
+
8
+ const chunks = []
9
+ let start = 0
10
+ while (start < input.length) {
11
+ const end = Math.min(input.length, start + size)
12
+ const piece = input.slice(start, end)
13
+ if (piece.trim()) chunks.push(piece)
14
+ if (end >= input.length) break
15
+ start = Math.max(0, end - ov)
16
+ }
17
+ return chunks
18
+ }
@@ -0,0 +1,50 @@
1
+ function stripTrailingSlashes(value) {
2
+ return String(value || '').replace(/\/+$/, '')
3
+ }
4
+
5
+ /**
6
+ * Normalize user-provided OpenAI-compatible base URLs.
7
+ *
8
+ * Users sometimes paste a full endpoint (e.g. `/v1/responses` or `/v1/chat/completions`)
9
+ * into `baseURL`. This helper strips known endpoint suffixes so the rest of the code can
10
+ * safely append API paths.
11
+ */
12
+ export function normalizeOpenAIBaseURL(baseURL) {
13
+ const original = String(baseURL || '').trim()
14
+ if (!original) return ''
15
+
16
+ const url = stripTrailingSlashes(original)
17
+ const lower = url.toLowerCase()
18
+
19
+ const rules = [
20
+ { suffix: '/v1/chat/completions', replace: '/v1' },
21
+ { suffix: '/chat/completions', replace: '' },
22
+ { suffix: '/v1/embeddings', replace: '/v1' },
23
+ { suffix: '/embeddings', replace: '' },
24
+ { suffix: '/v1/responses', replace: '/v1' },
25
+ { suffix: '/responses', replace: '' },
26
+ ]
27
+
28
+ for (const rule of rules) {
29
+ if (lower.endsWith(rule.suffix)) {
30
+ return stripTrailingSlashes(url.slice(0, url.length - rule.suffix.length) + rule.replace)
31
+ }
32
+ }
33
+
34
+ return url
35
+ }
36
+
37
+ export function getOpenAIChatCompletionsPath(baseURL) {
38
+ const url = String(baseURL || '')
39
+ return /\/v\d+\/?$/.test(url) ? '/chat/completions' : '/v1/chat/completions'
40
+ }
41
+
42
+ export function getOpenAIEmbeddingsPath(baseURL) {
43
+ const url = String(baseURL || '')
44
+ return /\/v\d+\/?$/.test(url) ? '/embeddings' : '/v1/embeddings'
45
+ }
46
+
47
+ export function getOpenAIResponsesPath(baseURL) {
48
+ const url = String(baseURL || '')
49
+ return /\/v\d+\/?$/.test(url) ? '/responses' : '/v1/responses'
50
+ }