codecritique 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +82 -114
  2. package/package.json +10 -9
  3. package/src/content-retrieval.test.js +775 -0
  4. package/src/custom-documents.test.js +440 -0
  5. package/src/feedback-loader.test.js +529 -0
  6. package/src/llm.test.js +256 -0
  7. package/src/project-analyzer.test.js +747 -0
  8. package/src/rag-analyzer.js +12 -0
  9. package/src/rag-analyzer.test.js +1109 -0
  10. package/src/rag-review.test.js +317 -0
  11. package/src/setupTests.js +131 -0
  12. package/src/zero-shot-classifier-open.test.js +278 -0
  13. package/src/embeddings/cache-manager.js +0 -364
  14. package/src/embeddings/constants.js +0 -40
  15. package/src/embeddings/database.js +0 -921
  16. package/src/embeddings/errors.js +0 -208
  17. package/src/embeddings/factory.js +0 -447
  18. package/src/embeddings/file-processor.js +0 -851
  19. package/src/embeddings/model-manager.js +0 -337
  20. package/src/embeddings/similarity-calculator.js +0 -97
  21. package/src/embeddings/types.js +0 -113
  22. package/src/pr-history/analyzer.js +0 -579
  23. package/src/pr-history/bot-detector.js +0 -123
  24. package/src/pr-history/cli-utils.js +0 -204
  25. package/src/pr-history/comment-processor.js +0 -549
  26. package/src/pr-history/database.js +0 -819
  27. package/src/pr-history/github-client.js +0 -629
  28. package/src/technology-keywords.json +0 -753
  29. package/src/utils/command.js +0 -48
  30. package/src/utils/constants.js +0 -263
  31. package/src/utils/context-inference.js +0 -364
  32. package/src/utils/document-detection.js +0 -105
  33. package/src/utils/file-validation.js +0 -271
  34. package/src/utils/git.js +0 -232
  35. package/src/utils/language-detection.js +0 -170
  36. package/src/utils/logging.js +0 -24
  37. package/src/utils/markdown.js +0 -132
  38. package/src/utils/mobilebert-tokenizer.js +0 -141
  39. package/src/utils/pr-chunking.js +0 -276
  40. package/src/utils/string-utils.js +0 -28
@@ -1,276 +0,0 @@
1
- import chalk from 'chalk';
2
-
3
- /**
4
- * Determines if a PR should be chunked based on estimated token usage
5
- * @param {Array} prFiles - Array of PR files with diffContent and content
6
- * @returns {Object} Decision object with shouldChunk flag and estimates
7
- */
8
- export function shouldChunkPR(prFiles) {
9
- // IMPORTANT: The holistic PR prompt includes BOTH full file content AND diff content
10
- // for each file, plus context (code examples, guidelines, PR comments, custom docs)
11
-
12
- // Calculate tokens for diff content
13
- const diffTokens = prFiles.reduce((sum, file) => {
14
- return sum + Math.ceil((file.diffContent?.length || 0) / 3);
15
- }, 0);
16
-
17
- // Calculate tokens for full file content (included in prompt for context awareness)
18
- const fullContentTokens = prFiles.reduce((sum, file) => {
19
- return sum + Math.ceil((file.content?.length || 0) / 3);
20
- }, 0);
21
-
22
- // Total file-related tokens (both diff AND full content are sent)
23
- const fileTokens = diffTokens + fullContentTokens;
24
-
25
- // Estimate context overhead (code examples, guidelines, PR comments, custom docs, project summary)
26
- // This is typically 10-30k tokens depending on project size
27
- const CONTEXT_OVERHEAD_TOKENS = 25000;
28
-
29
- // Total estimated prompt tokens
30
- const totalEstimatedTokens = fileTokens + CONTEXT_OVERHEAD_TOKENS;
31
-
32
- // Claude's limit is 200k tokens. Leave buffer for response and safety margin.
33
- // Max safe prompt size ~150k tokens to be conservative
34
- const MAX_SINGLE_REVIEW_TOKENS = 100000;
35
-
36
- const shouldChunk = totalEstimatedTokens > MAX_SINGLE_REVIEW_TOKENS || prFiles.length > 30;
37
-
38
- console.log(
39
- chalk.gray(
40
- ` Token breakdown: ${diffTokens} diff + ${fullContentTokens} full content + ${CONTEXT_OVERHEAD_TOKENS} context overhead = ${totalEstimatedTokens} total`
41
- )
42
- );
43
-
44
- return {
45
- shouldChunk,
46
- estimatedTokens: totalEstimatedTokens,
47
- diffTokens,
48
- fullContentTokens,
49
- contextOverhead: CONTEXT_OVERHEAD_TOKENS,
50
- recommendedChunks: Math.ceil(totalEstimatedTokens / 35000), // More aggressive chunking
51
- };
52
- }
53
-
54
- /**
55
- * Chunks PR files into manageable groups based on token limits and logical grouping
56
- * @param {Array} prFiles - Array of PR files with diffContent and content
57
- * @param {number} maxTokensPerChunk - Maximum tokens per chunk
58
- * @returns {Array} Array of chunks with files and metadata
59
- */
60
- export function chunkPRFiles(prFiles, maxTokensPerChunk = 35000) {
61
- // Calculate change complexity for each file (works for any language)
62
- // IMPORTANT: Token estimate must include BOTH diff AND full content since both are sent
63
- const filesWithMetrics = prFiles.map((file) => ({
64
- ...file,
65
- changeSize: calculateChangeSize(file.diffContent),
66
- fileComplexity: calculateFileComplexity(file),
67
- // Estimate tokens for BOTH diff content AND full file content (both are included in prompt)
68
- estimatedTokens: Math.ceil((file.diffContent?.length || 0) / 3) + Math.ceil((file.content?.length || 0) / 3),
69
- }));
70
-
71
- // Sort by directory + change importance for logical grouping
72
- const sortedFiles = filesWithMetrics.sort((a, b) => {
73
- const dirA = getDirectoryDepth(a.filePath);
74
- const dirB = getDirectoryDepth(b.filePath);
75
-
76
- // Primary: Directory structure (keep related files together)
77
- if (dirA !== dirB) return dirA.localeCompare(dirB);
78
-
79
- // Secondary: Change importance (larger changes first)
80
- return b.changeSize - a.changeSize;
81
- });
82
-
83
- // Chunk files based on token budget
84
- const chunks = [];
85
- let currentChunk = [];
86
- let currentTokens = 0;
87
-
88
- for (const file of sortedFiles) {
89
- // Start new chunk if adding this file exceeds budget
90
- if (currentTokens + file.estimatedTokens > maxTokensPerChunk && currentChunk.length > 0) {
91
- chunks.push({
92
- files: [...currentChunk],
93
- totalTokens: currentTokens,
94
- chunkId: chunks.length + 1,
95
- });
96
- currentChunk = [];
97
- currentTokens = 0;
98
- }
99
-
100
- currentChunk.push(file);
101
- currentTokens += file.estimatedTokens;
102
- }
103
-
104
- // Add final chunk
105
- if (currentChunk.length > 0) {
106
- chunks.push({
107
- files: [...currentChunk],
108
- totalTokens: currentTokens,
109
- chunkId: chunks.length + 1,
110
- });
111
- }
112
-
113
- return chunks;
114
- }
115
-
116
- /**
117
- * Language-agnostic change size calculation
118
- * @param {string} diffContent - The diff content
119
- * @returns {number} Total number of additions and deletions
120
- */
121
- function calculateChangeSize(diffContent) {
122
- if (!diffContent) return 0;
123
- const lines = diffContent.split('\n');
124
- const additions = lines.filter((line) => line.startsWith('+')).length;
125
- const deletions = lines.filter((line) => line.startsWith('-')).length;
126
- return additions + deletions;
127
- }
128
-
129
- /**
130
- * Language-agnostic file complexity scoring
131
- * @param {Object} file - File object with filePath and diffContent
132
- * @returns {number} Complexity score
133
- */
134
- function calculateFileComplexity(file) {
135
- let complexity = 0;
136
-
137
- // File size factor
138
- complexity += Math.min(file.diffContent ? file.diffContent.length / 1000 : 0, 20);
139
-
140
- // Path-based heuristics (works for any language)
141
- const path = file.filePath.toLowerCase();
142
- if (path.includes('/src/') || path.includes('/lib/')) complexity += 10;
143
- if (path.includes('/test/') || path.includes('/spec/')) complexity += 5;
144
- if (path.includes('/config/') || path.includes('/settings/')) complexity += 8;
145
- if (path.includes('/main.') || path.includes('/index.')) complexity += 15;
146
-
147
- // Change type heuristics
148
- if (file.diffContent) {
149
- if (file.diffContent.includes('new file mode')) complexity += 12;
150
- if (file.diffContent.includes('deleted file mode')) complexity += 8;
151
- }
152
-
153
- return complexity;
154
- }
155
-
156
- /**
157
- * Gets directory path for grouping related files
158
- * @param {string} filePath - The file path
159
- * @returns {string} Directory path without filename
160
- */
161
- function getDirectoryDepth(filePath) {
162
- return filePath.split('/').slice(0, -1).join('/'); // Directory path without filename
163
- }
164
-
165
- /**
166
- * Combines results from multiple chunk reviews into a single result
167
- * @param {Array} chunkResults - Array of chunk review results
168
- * @param {number} totalFiles - Total number of files in the PR
169
- * @returns {Object} Combined result object
170
- */
171
- export function combineChunkResults(chunkResults, totalFiles) {
172
- const combinedResult = {
173
- success: true,
174
- results: [],
175
- prContext: {
176
- totalFiles: totalFiles,
177
- chunkedReview: true,
178
- chunks: chunkResults.length,
179
- },
180
- };
181
-
182
- // Combine file-specific results
183
- chunkResults.forEach((chunkResult, chunkIndex) => {
184
- if (chunkResult.success && chunkResult.results) {
185
- chunkResult.results.forEach((fileResult) => {
186
- // Add chunk context to each result
187
- const enhancedResult = {
188
- ...fileResult,
189
- chunkInfo: {
190
- chunkNumber: chunkIndex + 1,
191
- totalChunks: chunkResults.length,
192
- },
193
- };
194
- combinedResult.results.push(enhancedResult);
195
- });
196
- }
197
- });
198
-
199
- // Create combined summary
200
- combinedResult.combinedSummary = createCombinedSummary(chunkResults);
201
-
202
- // Detect and merge cross-chunk issues
203
- combinedResult.crossChunkIssues = detectCrossChunkIssues(chunkResults);
204
-
205
- console.log(chalk.green(`✅ Combined results from ${chunkResults.length} chunks: ${combinedResult.results.length} file reviews`));
206
-
207
- return combinedResult;
208
- }
209
-
210
- /**
211
- * Creates a summary from combined chunk results
212
- * @param {Array} chunkResults - Array of chunk review results
213
- * @returns {string} Combined summary text
214
- */
215
- function createCombinedSummary(chunkResults) {
216
- const totalIssues = chunkResults.reduce((sum, chunk) => {
217
- if (!chunk.results) return sum;
218
- return (
219
- sum +
220
- chunk.results.reduce((fileSum, file) => {
221
- return fileSum + (file.results?.issues?.length || 0);
222
- }, 0)
223
- );
224
- }, 0);
225
-
226
- const successfulChunks = chunkResults.filter((c) => c.success).length;
227
-
228
- return `Chunked PR review completed: ${successfulChunks}/${chunkResults.length} chunks processed successfully. Total issues found: ${totalIssues}. Review performed in parallel chunks to optimize token usage.`;
229
- }
230
-
231
- /**
232
- * Detects issues that span across multiple chunks
233
- * @param {Array} chunkResults - Array of chunk review results
234
- * @returns {Array} Array of cross-chunk issues
235
- */
236
- function detectCrossChunkIssues(chunkResults) {
237
- const crossChunkIssues = [];
238
-
239
- // Simple heuristic: Look for similar issues across chunks that might indicate patterns
240
- const allIssues = chunkResults.flatMap(
241
- (chunk) =>
242
- chunk.results?.flatMap((file) =>
243
- (file.results?.issues || []).map((issue) => ({
244
- ...issue,
245
- chunkId: chunk.chunkId,
246
- filePath: file.filePath,
247
- }))
248
- ) || []
249
- );
250
-
251
- // Group by issue type and description similarity
252
- const issueGroups = new Map();
253
- allIssues.forEach((issue) => {
254
- const key = `${issue.type}-${issue.description ? issue.description.substring(0, 50) : ''}`;
255
- if (!issueGroups.has(key)) {
256
- issueGroups.set(key, []);
257
- }
258
- issueGroups.get(key).push(issue);
259
- });
260
-
261
- // Identify patterns that appear across multiple chunks
262
- issueGroups.forEach((issues) => {
263
- const uniqueChunks = new Set(issues.map((i) => i.chunkId));
264
- if (uniqueChunks.size > 1) {
265
- crossChunkIssues.push({
266
- type: 'pattern',
267
- severity: 'medium',
268
- description: `Similar issue pattern detected across ${uniqueChunks.size} chunks: ${issues[0].description || 'Pattern issue'}`,
269
- affectedFiles: issues.map((i) => i.filePath),
270
- suggestion: `This issue appears in multiple parts of the PR. Consider addressing it consistently across all affected files.`,
271
- });
272
- }
273
- });
274
-
275
- return crossChunkIssues;
276
- }
@@ -1,28 +0,0 @@
1
- /**
2
- * String Utilities Module
3
- *
4
- * This module provides utilities for string manipulation, formatting,
5
- * and text processing operations.
6
- */
7
-
8
- /**
9
- * Slugify text for use in IDs and URLs
10
- *
11
- * @param {string} text - The text to slugify
12
- * @returns {string} A slugified string safe for use in IDs and URLs
13
- *
14
- * @example
15
- * slugify('Hello World!'); // 'hello-world'
16
- * slugify('My Component Name'); // 'my-component-name'
17
- * slugify(' Multiple Spaces '); // 'multiple-spaces'
18
- */
19
- export function slugify(text) {
20
- if (!text) return '';
21
- return text
22
- .toString()
23
- .toLowerCase()
24
- .trim()
25
- .replace(/\s+/g, '-') // Replace spaces with -
26
- .replace(/[^\w-]+/g, '') // Remove all non-word chars
27
- .replace(/--+/g, '-'); // Replace multiple - with single -
28
- }