codecritique 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -114
- package/package.json +10 -9
- package/src/content-retrieval.test.js +775 -0
- package/src/custom-documents.test.js +440 -0
- package/src/feedback-loader.test.js +529 -0
- package/src/llm.test.js +256 -0
- package/src/project-analyzer.test.js +747 -0
- package/src/rag-analyzer.js +12 -0
- package/src/rag-analyzer.test.js +1109 -0
- package/src/rag-review.test.js +317 -0
- package/src/setupTests.js +131 -0
- package/src/zero-shot-classifier-open.test.js +278 -0
- package/src/embeddings/cache-manager.js +0 -364
- package/src/embeddings/constants.js +0 -40
- package/src/embeddings/database.js +0 -921
- package/src/embeddings/errors.js +0 -208
- package/src/embeddings/factory.js +0 -447
- package/src/embeddings/file-processor.js +0 -851
- package/src/embeddings/model-manager.js +0 -337
- package/src/embeddings/similarity-calculator.js +0 -97
- package/src/embeddings/types.js +0 -113
- package/src/pr-history/analyzer.js +0 -579
- package/src/pr-history/bot-detector.js +0 -123
- package/src/pr-history/cli-utils.js +0 -204
- package/src/pr-history/comment-processor.js +0 -549
- package/src/pr-history/database.js +0 -819
- package/src/pr-history/github-client.js +0 -629
- package/src/technology-keywords.json +0 -753
- package/src/utils/command.js +0 -48
- package/src/utils/constants.js +0 -263
- package/src/utils/context-inference.js +0 -364
- package/src/utils/document-detection.js +0 -105
- package/src/utils/file-validation.js +0 -271
- package/src/utils/git.js +0 -232
- package/src/utils/language-detection.js +0 -170
- package/src/utils/logging.js +0 -24
- package/src/utils/markdown.js +0 -132
- package/src/utils/mobilebert-tokenizer.js +0 -141
- package/src/utils/pr-chunking.js +0 -276
- package/src/utils/string-utils.js +0 -28
package/src/utils/pr-chunking.js
DELETED
|
@@ -1,276 +0,0 @@
|
|
|
1
|
-
import chalk from 'chalk';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Determines if a PR should be chunked based on estimated token usage
|
|
5
|
-
* @param {Array} prFiles - Array of PR files with diffContent and content
|
|
6
|
-
* @returns {Object} Decision object with shouldChunk flag and estimates
|
|
7
|
-
*/
|
|
8
|
-
export function shouldChunkPR(prFiles) {
|
|
9
|
-
// IMPORTANT: The holistic PR prompt includes BOTH full file content AND diff content
|
|
10
|
-
// for each file, plus context (code examples, guidelines, PR comments, custom docs)
|
|
11
|
-
|
|
12
|
-
// Calculate tokens for diff content
|
|
13
|
-
const diffTokens = prFiles.reduce((sum, file) => {
|
|
14
|
-
return sum + Math.ceil((file.diffContent?.length || 0) / 3);
|
|
15
|
-
}, 0);
|
|
16
|
-
|
|
17
|
-
// Calculate tokens for full file content (included in prompt for context awareness)
|
|
18
|
-
const fullContentTokens = prFiles.reduce((sum, file) => {
|
|
19
|
-
return sum + Math.ceil((file.content?.length || 0) / 3);
|
|
20
|
-
}, 0);
|
|
21
|
-
|
|
22
|
-
// Total file-related tokens (both diff AND full content are sent)
|
|
23
|
-
const fileTokens = diffTokens + fullContentTokens;
|
|
24
|
-
|
|
25
|
-
// Estimate context overhead (code examples, guidelines, PR comments, custom docs, project summary)
|
|
26
|
-
// This is typically 10-30k tokens depending on project size
|
|
27
|
-
const CONTEXT_OVERHEAD_TOKENS = 25000;
|
|
28
|
-
|
|
29
|
-
// Total estimated prompt tokens
|
|
30
|
-
const totalEstimatedTokens = fileTokens + CONTEXT_OVERHEAD_TOKENS;
|
|
31
|
-
|
|
32
|
-
// Claude's limit is 200k tokens. Leave buffer for response and safety margin.
|
|
33
|
-
// Max safe prompt size ~150k tokens to be conservative
|
|
34
|
-
const MAX_SINGLE_REVIEW_TOKENS = 100000;
|
|
35
|
-
|
|
36
|
-
const shouldChunk = totalEstimatedTokens > MAX_SINGLE_REVIEW_TOKENS || prFiles.length > 30;
|
|
37
|
-
|
|
38
|
-
console.log(
|
|
39
|
-
chalk.gray(
|
|
40
|
-
` Token breakdown: ${diffTokens} diff + ${fullContentTokens} full content + ${CONTEXT_OVERHEAD_TOKENS} context overhead = ${totalEstimatedTokens} total`
|
|
41
|
-
)
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
return {
|
|
45
|
-
shouldChunk,
|
|
46
|
-
estimatedTokens: totalEstimatedTokens,
|
|
47
|
-
diffTokens,
|
|
48
|
-
fullContentTokens,
|
|
49
|
-
contextOverhead: CONTEXT_OVERHEAD_TOKENS,
|
|
50
|
-
recommendedChunks: Math.ceil(totalEstimatedTokens / 35000), // More aggressive chunking
|
|
51
|
-
};
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* Chunks PR files into manageable groups based on token limits and logical grouping
|
|
56
|
-
* @param {Array} prFiles - Array of PR files with diffContent and content
|
|
57
|
-
* @param {number} maxTokensPerChunk - Maximum tokens per chunk
|
|
58
|
-
* @returns {Array} Array of chunks with files and metadata
|
|
59
|
-
*/
|
|
60
|
-
export function chunkPRFiles(prFiles, maxTokensPerChunk = 35000) {
|
|
61
|
-
// Calculate change complexity for each file (works for any language)
|
|
62
|
-
// IMPORTANT: Token estimate must include BOTH diff AND full content since both are sent
|
|
63
|
-
const filesWithMetrics = prFiles.map((file) => ({
|
|
64
|
-
...file,
|
|
65
|
-
changeSize: calculateChangeSize(file.diffContent),
|
|
66
|
-
fileComplexity: calculateFileComplexity(file),
|
|
67
|
-
// Estimate tokens for BOTH diff content AND full file content (both are included in prompt)
|
|
68
|
-
estimatedTokens: Math.ceil((file.diffContent?.length || 0) / 3) + Math.ceil((file.content?.length || 0) / 3),
|
|
69
|
-
}));
|
|
70
|
-
|
|
71
|
-
// Sort by directory + change importance for logical grouping
|
|
72
|
-
const sortedFiles = filesWithMetrics.sort((a, b) => {
|
|
73
|
-
const dirA = getDirectoryDepth(a.filePath);
|
|
74
|
-
const dirB = getDirectoryDepth(b.filePath);
|
|
75
|
-
|
|
76
|
-
// Primary: Directory structure (keep related files together)
|
|
77
|
-
if (dirA !== dirB) return dirA.localeCompare(dirB);
|
|
78
|
-
|
|
79
|
-
// Secondary: Change importance (larger changes first)
|
|
80
|
-
return b.changeSize - a.changeSize;
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
// Chunk files based on token budget
|
|
84
|
-
const chunks = [];
|
|
85
|
-
let currentChunk = [];
|
|
86
|
-
let currentTokens = 0;
|
|
87
|
-
|
|
88
|
-
for (const file of sortedFiles) {
|
|
89
|
-
// Start new chunk if adding this file exceeds budget
|
|
90
|
-
if (currentTokens + file.estimatedTokens > maxTokensPerChunk && currentChunk.length > 0) {
|
|
91
|
-
chunks.push({
|
|
92
|
-
files: [...currentChunk],
|
|
93
|
-
totalTokens: currentTokens,
|
|
94
|
-
chunkId: chunks.length + 1,
|
|
95
|
-
});
|
|
96
|
-
currentChunk = [];
|
|
97
|
-
currentTokens = 0;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
currentChunk.push(file);
|
|
101
|
-
currentTokens += file.estimatedTokens;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// Add final chunk
|
|
105
|
-
if (currentChunk.length > 0) {
|
|
106
|
-
chunks.push({
|
|
107
|
-
files: [...currentChunk],
|
|
108
|
-
totalTokens: currentTokens,
|
|
109
|
-
chunkId: chunks.length + 1,
|
|
110
|
-
});
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
return chunks;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Language-agnostic change size calculation
|
|
118
|
-
* @param {string} diffContent - The diff content
|
|
119
|
-
* @returns {number} Total number of additions and deletions
|
|
120
|
-
*/
|
|
121
|
-
function calculateChangeSize(diffContent) {
|
|
122
|
-
if (!diffContent) return 0;
|
|
123
|
-
const lines = diffContent.split('\n');
|
|
124
|
-
const additions = lines.filter((line) => line.startsWith('+')).length;
|
|
125
|
-
const deletions = lines.filter((line) => line.startsWith('-')).length;
|
|
126
|
-
return additions + deletions;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Language-agnostic file complexity scoring
|
|
131
|
-
* @param {Object} file - File object with filePath and diffContent
|
|
132
|
-
* @returns {number} Complexity score
|
|
133
|
-
*/
|
|
134
|
-
function calculateFileComplexity(file) {
|
|
135
|
-
let complexity = 0;
|
|
136
|
-
|
|
137
|
-
// File size factor
|
|
138
|
-
complexity += Math.min(file.diffContent ? file.diffContent.length / 1000 : 0, 20);
|
|
139
|
-
|
|
140
|
-
// Path-based heuristics (works for any language)
|
|
141
|
-
const path = file.filePath.toLowerCase();
|
|
142
|
-
if (path.includes('/src/') || path.includes('/lib/')) complexity += 10;
|
|
143
|
-
if (path.includes('/test/') || path.includes('/spec/')) complexity += 5;
|
|
144
|
-
if (path.includes('/config/') || path.includes('/settings/')) complexity += 8;
|
|
145
|
-
if (path.includes('/main.') || path.includes('/index.')) complexity += 15;
|
|
146
|
-
|
|
147
|
-
// Change type heuristics
|
|
148
|
-
if (file.diffContent) {
|
|
149
|
-
if (file.diffContent.includes('new file mode')) complexity += 12;
|
|
150
|
-
if (file.diffContent.includes('deleted file mode')) complexity += 8;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
return complexity;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
/**
|
|
157
|
-
* Gets directory path for grouping related files
|
|
158
|
-
* @param {string} filePath - The file path
|
|
159
|
-
* @returns {string} Directory path without filename
|
|
160
|
-
*/
|
|
161
|
-
function getDirectoryDepth(filePath) {
|
|
162
|
-
return filePath.split('/').slice(0, -1).join('/'); // Directory path without filename
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
/**
|
|
166
|
-
* Combines results from multiple chunk reviews into a single result
|
|
167
|
-
* @param {Array} chunkResults - Array of chunk review results
|
|
168
|
-
* @param {number} totalFiles - Total number of files in the PR
|
|
169
|
-
* @returns {Object} Combined result object
|
|
170
|
-
*/
|
|
171
|
-
export function combineChunkResults(chunkResults, totalFiles) {
|
|
172
|
-
const combinedResult = {
|
|
173
|
-
success: true,
|
|
174
|
-
results: [],
|
|
175
|
-
prContext: {
|
|
176
|
-
totalFiles: totalFiles,
|
|
177
|
-
chunkedReview: true,
|
|
178
|
-
chunks: chunkResults.length,
|
|
179
|
-
},
|
|
180
|
-
};
|
|
181
|
-
|
|
182
|
-
// Combine file-specific results
|
|
183
|
-
chunkResults.forEach((chunkResult, chunkIndex) => {
|
|
184
|
-
if (chunkResult.success && chunkResult.results) {
|
|
185
|
-
chunkResult.results.forEach((fileResult) => {
|
|
186
|
-
// Add chunk context to each result
|
|
187
|
-
const enhancedResult = {
|
|
188
|
-
...fileResult,
|
|
189
|
-
chunkInfo: {
|
|
190
|
-
chunkNumber: chunkIndex + 1,
|
|
191
|
-
totalChunks: chunkResults.length,
|
|
192
|
-
},
|
|
193
|
-
};
|
|
194
|
-
combinedResult.results.push(enhancedResult);
|
|
195
|
-
});
|
|
196
|
-
}
|
|
197
|
-
});
|
|
198
|
-
|
|
199
|
-
// Create combined summary
|
|
200
|
-
combinedResult.combinedSummary = createCombinedSummary(chunkResults);
|
|
201
|
-
|
|
202
|
-
// Detect and merge cross-chunk issues
|
|
203
|
-
combinedResult.crossChunkIssues = detectCrossChunkIssues(chunkResults);
|
|
204
|
-
|
|
205
|
-
console.log(chalk.green(`✅ Combined results from ${chunkResults.length} chunks: ${combinedResult.results.length} file reviews`));
|
|
206
|
-
|
|
207
|
-
return combinedResult;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Creates a summary from combined chunk results
|
|
212
|
-
* @param {Array} chunkResults - Array of chunk review results
|
|
213
|
-
* @returns {string} Combined summary text
|
|
214
|
-
*/
|
|
215
|
-
function createCombinedSummary(chunkResults) {
|
|
216
|
-
const totalIssues = chunkResults.reduce((sum, chunk) => {
|
|
217
|
-
if (!chunk.results) return sum;
|
|
218
|
-
return (
|
|
219
|
-
sum +
|
|
220
|
-
chunk.results.reduce((fileSum, file) => {
|
|
221
|
-
return fileSum + (file.results?.issues?.length || 0);
|
|
222
|
-
}, 0)
|
|
223
|
-
);
|
|
224
|
-
}, 0);
|
|
225
|
-
|
|
226
|
-
const successfulChunks = chunkResults.filter((c) => c.success).length;
|
|
227
|
-
|
|
228
|
-
return `Chunked PR review completed: ${successfulChunks}/${chunkResults.length} chunks processed successfully. Total issues found: ${totalIssues}. Review performed in parallel chunks to optimize token usage.`;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
/**
|
|
232
|
-
* Detects issues that span across multiple chunks
|
|
233
|
-
* @param {Array} chunkResults - Array of chunk review results
|
|
234
|
-
* @returns {Array} Array of cross-chunk issues
|
|
235
|
-
*/
|
|
236
|
-
function detectCrossChunkIssues(chunkResults) {
|
|
237
|
-
const crossChunkIssues = [];
|
|
238
|
-
|
|
239
|
-
// Simple heuristic: Look for similar issues across chunks that might indicate patterns
|
|
240
|
-
const allIssues = chunkResults.flatMap(
|
|
241
|
-
(chunk) =>
|
|
242
|
-
chunk.results?.flatMap((file) =>
|
|
243
|
-
(file.results?.issues || []).map((issue) => ({
|
|
244
|
-
...issue,
|
|
245
|
-
chunkId: chunk.chunkId,
|
|
246
|
-
filePath: file.filePath,
|
|
247
|
-
}))
|
|
248
|
-
) || []
|
|
249
|
-
);
|
|
250
|
-
|
|
251
|
-
// Group by issue type and description similarity
|
|
252
|
-
const issueGroups = new Map();
|
|
253
|
-
allIssues.forEach((issue) => {
|
|
254
|
-
const key = `${issue.type}-${issue.description ? issue.description.substring(0, 50) : ''}`;
|
|
255
|
-
if (!issueGroups.has(key)) {
|
|
256
|
-
issueGroups.set(key, []);
|
|
257
|
-
}
|
|
258
|
-
issueGroups.get(key).push(issue);
|
|
259
|
-
});
|
|
260
|
-
|
|
261
|
-
// Identify patterns that appear across multiple chunks
|
|
262
|
-
issueGroups.forEach((issues) => {
|
|
263
|
-
const uniqueChunks = new Set(issues.map((i) => i.chunkId));
|
|
264
|
-
if (uniqueChunks.size > 1) {
|
|
265
|
-
crossChunkIssues.push({
|
|
266
|
-
type: 'pattern',
|
|
267
|
-
severity: 'medium',
|
|
268
|
-
description: `Similar issue pattern detected across ${uniqueChunks.size} chunks: ${issues[0].description || 'Pattern issue'}`,
|
|
269
|
-
affectedFiles: issues.map((i) => i.filePath),
|
|
270
|
-
suggestion: `This issue appears in multiple parts of the PR. Consider addressing it consistently across all affected files.`,
|
|
271
|
-
});
|
|
272
|
-
}
|
|
273
|
-
});
|
|
274
|
-
|
|
275
|
-
return crossChunkIssues;
|
|
276
|
-
}
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* String Utilities Module
|
|
3
|
-
*
|
|
4
|
-
* This module provides utilities for string manipulation, formatting,
|
|
5
|
-
* and text processing operations.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Slugify text for use in IDs and URLs
|
|
10
|
-
*
|
|
11
|
-
* @param {string} text - The text to slugify
|
|
12
|
-
* @returns {string} A slugified string safe for use in IDs and URLs
|
|
13
|
-
*
|
|
14
|
-
* @example
|
|
15
|
-
* slugify('Hello World!'); // 'hello-world'
|
|
16
|
-
* slugify('My Component Name'); // 'my-component-name'
|
|
17
|
-
* slugify(' Multiple Spaces '); // 'multiple-spaces'
|
|
18
|
-
*/
|
|
19
|
-
export function slugify(text) {
|
|
20
|
-
if (!text) return '';
|
|
21
|
-
return text
|
|
22
|
-
.toString()
|
|
23
|
-
.toLowerCase()
|
|
24
|
-
.trim()
|
|
25
|
-
.replace(/\s+/g, '-') // Replace spaces with -
|
|
26
|
-
.replace(/[^\w-]+/g, '') // Remove all non-word chars
|
|
27
|
-
.replace(/--+/g, '-'); // Replace multiple - with single -
|
|
28
|
-
}
|