@juspay/yama 1.5.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.mcp-config.example.json +26 -0
  2. package/CHANGELOG.md +40 -0
  3. package/README.md +311 -685
  4. package/dist/cli/v2.cli.d.ts +13 -0
  5. package/dist/cli/v2.cli.js +290 -0
  6. package/dist/index.d.ts +12 -13
  7. package/dist/index.js +18 -19
  8. package/dist/v2/config/ConfigLoader.d.ts +50 -0
  9. package/dist/v2/config/ConfigLoader.js +205 -0
  10. package/dist/v2/config/DefaultConfig.d.ts +9 -0
  11. package/dist/v2/config/DefaultConfig.js +191 -0
  12. package/dist/v2/core/MCPServerManager.d.ts +22 -0
  13. package/dist/v2/core/MCPServerManager.js +92 -0
  14. package/dist/v2/core/SessionManager.d.ts +72 -0
  15. package/dist/v2/core/SessionManager.js +200 -0
  16. package/dist/v2/core/YamaV2Orchestrator.d.ts +112 -0
  17. package/dist/v2/core/YamaV2Orchestrator.js +549 -0
  18. package/dist/v2/prompts/EnhancementSystemPrompt.d.ts +8 -0
  19. package/dist/v2/prompts/EnhancementSystemPrompt.js +216 -0
  20. package/dist/v2/prompts/PromptBuilder.d.ts +38 -0
  21. package/dist/v2/prompts/PromptBuilder.js +228 -0
  22. package/dist/v2/prompts/ReviewSystemPrompt.d.ts +8 -0
  23. package/dist/v2/prompts/ReviewSystemPrompt.js +270 -0
  24. package/dist/v2/types/config.types.d.ts +120 -0
  25. package/dist/v2/types/config.types.js +5 -0
  26. package/dist/v2/types/mcp.types.d.ts +191 -0
  27. package/dist/v2/types/mcp.types.js +6 -0
  28. package/dist/v2/types/v2.types.d.ts +182 -0
  29. package/dist/v2/types/v2.types.js +42 -0
  30. package/dist/v2/utils/ObservabilityConfig.d.ts +22 -0
  31. package/dist/v2/utils/ObservabilityConfig.js +48 -0
  32. package/package.json +11 -9
  33. package/yama.config.example.yaml +214 -193
  34. package/dist/cli/index.d.ts +0 -12
  35. package/dist/cli/index.js +0 -538
  36. package/dist/core/ContextGatherer.d.ts +0 -110
  37. package/dist/core/ContextGatherer.js +0 -470
  38. package/dist/core/Guardian.d.ts +0 -81
  39. package/dist/core/Guardian.js +0 -474
  40. package/dist/core/providers/BitbucketProvider.d.ts +0 -105
  41. package/dist/core/providers/BitbucketProvider.js +0 -489
  42. package/dist/features/CodeReviewer.d.ts +0 -173
  43. package/dist/features/CodeReviewer.js +0 -1707
  44. package/dist/features/DescriptionEnhancer.d.ts +0 -64
  45. package/dist/features/DescriptionEnhancer.js +0 -445
  46. package/dist/features/MultiInstanceProcessor.d.ts +0 -74
  47. package/dist/features/MultiInstanceProcessor.js +0 -360
  48. package/dist/types/index.d.ts +0 -624
  49. package/dist/types/index.js +0 -104
  50. package/dist/utils/Cache.d.ts +0 -103
  51. package/dist/utils/Cache.js +0 -444
  52. package/dist/utils/ConfigManager.d.ts +0 -88
  53. package/dist/utils/ConfigManager.js +0 -603
  54. package/dist/utils/ContentSimilarityService.d.ts +0 -74
  55. package/dist/utils/ContentSimilarityService.js +0 -215
  56. package/dist/utils/ExactDuplicateRemover.d.ts +0 -77
  57. package/dist/utils/ExactDuplicateRemover.js +0 -361
  58. package/dist/utils/Logger.d.ts +0 -31
  59. package/dist/utils/Logger.js +0 -214
  60. package/dist/utils/MemoryBankManager.d.ts +0 -73
  61. package/dist/utils/MemoryBankManager.js +0 -310
  62. package/dist/utils/ParallelProcessing.d.ts +0 -140
  63. package/dist/utils/ParallelProcessing.js +0 -333
  64. package/dist/utils/ProviderLimits.d.ts +0 -58
  65. package/dist/utils/ProviderLimits.js +0 -143
  66. package/dist/utils/RetryManager.d.ts +0 -78
  67. package/dist/utils/RetryManager.js +0 -205
@@ -1,1707 +0,0 @@
1
- /**
2
- * Enhanced Code Reviewer - Optimized to work with Unified Context
3
- * Preserves all original functionality from pr-police.js but optimized
4
- */
5
- // NeuroLink will be dynamically imported
6
- import { ProviderError, } from "../types/index.js";
7
- import { createMultiInstanceProcessor, } from "./MultiInstanceProcessor.js";
8
- import { logger } from "../utils/Logger.js";
9
- import { getProviderTokenLimit } from "../utils/ProviderLimits.js";
10
- import { Semaphore, TokenBudgetManager, calculateOptimalConcurrency, } from "../utils/ParallelProcessing.js";
11
- import { createExactDuplicateRemover } from "../utils/ExactDuplicateRemover.js";
12
- export class CodeReviewer {
13
- neurolink;
14
- bitbucketProvider;
15
- aiConfig;
16
- reviewConfig;
17
- constructor(bitbucketProvider, aiConfig, reviewConfig) {
18
- this.bitbucketProvider = bitbucketProvider;
19
- this.aiConfig = aiConfig;
20
- this.reviewConfig = reviewConfig;
21
- }
22
- /**
23
- * Review code using pre-gathered unified context (OPTIMIZED with Multi-Instance and Batch Processing)
24
- */
25
- async reviewCodeWithContext(context, options, multiInstanceConfig) {
26
- const startTime = Date.now();
27
- try {
28
- logger.phase("🧪 Conducting AI-powered code analysis...");
29
- logger.info(`Analyzing ${context.diffStrategy.fileCount} files using ${context.diffStrategy.strategy} strategy`);
30
- let violations;
31
- let processingStrategy;
32
- // Check if multi-instance processing is enabled and configured
33
- if (multiInstanceConfig?.enabled &&
34
- multiInstanceConfig.instances?.length > 1) {
35
- logger.info("🚀 Using multi-instance processing for enhanced analysis");
36
- const multiInstanceResult = await this.reviewWithMultipleInstances(context, options, multiInstanceConfig);
37
- violations = multiInstanceResult.finalViolations;
38
- processingStrategy = "multi-instance";
39
- }
40
- else {
41
- // Determine if we should use batch processing
42
- const batchConfig = this.getBatchProcessingConfig();
43
- const shouldUseBatchProcessing = this.shouldUseBatchProcessing(context, batchConfig);
44
- if (shouldUseBatchProcessing) {
45
- logger.info("🔄 Using batch processing for large PR analysis");
46
- const batchResult = await this.reviewWithBatchProcessing(context, options, batchConfig);
47
- violations = batchResult.violations;
48
- processingStrategy = "batch-processing";
49
- }
50
- else {
51
- logger.info("⚡ Using single-request analysis for small PR");
52
- const analysisPrompt = this.buildAnalysisPrompt(context, options);
53
- violations = await this.analyzeWithAI(analysisPrompt, context);
54
- processingStrategy = "single-request";
55
- }
56
- }
57
- if (!options.dryRun && violations.length > 0) {
58
- violations = await this.postComments(context, violations, options);
59
- }
60
- const duration = Math.round((Date.now() - startTime) / 1000);
61
- const result = this.generateReviewResult(violations, duration, context, processingStrategy);
62
- logger.success(`Code review completed in ${duration}s: ${violations.length} violations found (${processingStrategy})`);
63
- return result;
64
- }
65
- catch (error) {
66
- logger.error(`Code review failed: ${error.message}`);
67
- throw new ProviderError(`Code review failed: ${error.message}`);
68
- }
69
- }
70
- /**
71
- * Review code using multiple instances for enhanced analysis
72
- */
73
- async reviewWithMultipleInstances(context, options, multiInstanceConfig) {
74
- try {
75
- // Create multi-instance processor
76
- const multiInstanceProcessor = createMultiInstanceProcessor(this.bitbucketProvider, this.reviewConfig);
77
- // Execute multi-instance processing
78
- const result = await multiInstanceProcessor.processWithMultipleInstances(context, options, multiInstanceConfig);
79
- return result;
80
- }
81
- catch (error) {
82
- logger.error(`Multi-instance processing failed: ${error.message}`);
83
- throw error;
84
- }
85
- }
86
- /**
87
- * Get system prompt for security-focused code review
88
- */
89
- getSecurityReviewSystemPrompt() {
90
- return (this.reviewConfig.systemPrompt ||
91
- `You are an Expert Security Code Reviewer for enterprise applications. Your role is to:
92
-
93
- 🔒 SECURITY FIRST: Prioritize security vulnerabilities and data protection
94
- ⚡ PERFORMANCE AWARE: Identify performance bottlenecks and optimization opportunities
95
- 🏗️ QUALITY FOCUSED: Ensure maintainable, readable, and robust code
96
- 🛡️ ERROR RESILIENT: Verify comprehensive error handling and edge cases
97
-
98
- You provide actionable, educational feedback with specific examples and solutions.
99
- Focus on critical issues that could impact production systems.
100
-
101
- CRITICAL INSTRUCTION: When identifying issues, you MUST copy the EXACT line from the diff, including the diff prefix (+, -, or space). Do not modify or clean the line in any way.`);
102
- }
103
- /**
104
- * Get analysis requirements from config or defaults
105
- */
106
- getAnalysisRequirements() {
107
- if (this.reviewConfig.focusAreas &&
108
- this.reviewConfig.focusAreas.length > 0) {
109
- return this.reviewConfig.focusAreas
110
- .map((area) => `### ${area}`)
111
- .join("\n\n");
112
- }
113
- // Default analysis requirements
114
- return `### 🔒 Security Analysis (CRITICAL PRIORITY)
115
- - SQL/XSS/Command injection vulnerabilities
116
- - Authentication/authorization flaws
117
- - Input validation and sanitization
118
- - Hardcoded secrets or credentials
119
- - Data exposure and privacy concerns
120
-
121
- ### ⚡ Performance Review
122
- - Algorithm efficiency and complexity
123
- - Database query optimization
124
- - Memory management and resource leaks
125
- - Caching opportunities
126
-
127
- ### 🏗️ Code Quality
128
- - SOLID principles compliance
129
- - Error handling robustness
130
- - Code organization and readability
131
- - Test coverage considerations`;
132
- }
133
- /**
134
- * Build focused analysis prompt separated from context
135
- */
136
- buildCoreAnalysisPrompt(context) {
137
- const diffContent = this.extractDiffContent(context);
138
- return `Conduct a comprehensive security and quality analysis of this ${context.diffStrategy.strategy === "whole" ? "pull request" : "code changeset"}.
139
-
140
- ## COMPLETE PR CONTEXT:
141
- **Title**: ${context.pr.title}
142
- **Author**: ${context.pr.author}
143
- **Description**: ${context.pr.description}
144
- **Files Changed**: ${context.pr.fileChanges?.length || 0}
145
- **Existing Comments**: ${JSON.stringify(context.pr.comments || [], null, 2)}
146
- **Branch**: ${context.identifier.branch}
147
- **Repository**: ${context.identifier.workspace}/${context.identifier.repository}
148
-
149
- ## DIFF STRATEGY (${context.diffStrategy.strategy.toUpperCase()}):
150
- **Reason**: ${context.diffStrategy.reason}
151
- **File Count**: ${context.diffStrategy.fileCount}
152
- **Estimated Size**: ${context.diffStrategy.estimatedSize}
153
-
154
- ## COMPLETE PROJECT CONTEXT:
155
- ${context.projectContext.memoryBank.projectContext || context.projectContext.memoryBank.summary}
156
-
157
- ## PROJECT RULES & STANDARDS:
158
- ${context.projectContext.clinerules || "No specific rules defined"}
159
-
160
- ## COMPLETE CODE CHANGES (NO TRUNCATION):
161
- ${diffContent}
162
-
163
- ## CRITICAL INSTRUCTIONS FOR CODE SNIPPETS:
164
-
165
- When you identify an issue in the code, you MUST:
166
- 1. Copy the EXACT line from the diff above, including the diff prefix (+, -, or space at the beginning)
167
- 2. Do NOT modify, clean, or reformat the line
168
- 3. Include the complete line as it appears in the diff
169
- 4. If the issue spans multiple lines, choose the most relevant single line
170
-
171
- Example of CORRECT snippet format:
172
- - For added lines: "+ const password = 'hardcoded123';"
173
- - For removed lines: "- return userData;"
174
- - For context lines: " function processPayment() {"
175
-
176
- Example of INCORRECT snippet format (DO NOT DO THIS):
177
- - "const password = 'hardcoded123';" (missing the + prefix)
178
- - "return userData" (missing the - prefix and semicolon)
179
-
180
- ## ANALYSIS REQUIREMENTS:
181
-
182
- ${this.getAnalysisRequirements()}
183
-
184
- ### 📋 OUTPUT FORMAT
185
- Return ONLY valid JSON:
186
- {
187
- "violations": [
188
- {
189
- "type": "inline",
190
- "file": "exact/file/path.ext",
191
- "code_snippet": "EXACT line from diff INCLUDING the +/- prefix",
192
- "search_context": {
193
- "before": ["line before from diff with prefix"],
194
- "after": ["line after from diff with prefix"]
195
- },
196
- "severity": "CRITICAL|MAJOR|MINOR|SUGGESTION",
197
- "category": "security|performance|maintainability|functionality",
198
- "issue": "Brief issue title",
199
- "message": "Detailed explanation",
200
- "impact": "Potential impact description",
201
- "suggestion": "Clean, executable code fix (no diff symbols)"
202
- }
203
- ],
204
- "summary": "Analysis summary",
205
- "positiveObservations": ["Good practices found"],
206
- "statistics": {
207
- "filesReviewed": ${context.diffStrategy.fileCount},
208
- "totalIssues": 0,
209
- "criticalCount": 0,
210
- "majorCount": 0,
211
- "minorCount": 0,
212
- "suggestionCount": 0
213
- }
214
- }`;
215
- }
216
- /**
217
- * Extract diff content based on strategy
218
- */
219
- extractDiffContent(context) {
220
- if (context.diffStrategy.strategy === "whole" && context.prDiff) {
221
- return context.prDiff.diff || JSON.stringify(context.prDiff, null, 2);
222
- }
223
- else if (context.diffStrategy.strategy === "file-by-file" &&
224
- context.fileDiffs) {
225
- const fileDiffArray = Array.from(context.fileDiffs.entries()).map(([file, diff]) => ({
226
- file,
227
- diff,
228
- }));
229
- return JSON.stringify(fileDiffArray, null, 2);
230
- }
231
- return "No diff content available";
232
- }
233
- /**
234
- * Detect project type for better context
235
- */
236
- detectProjectType(context) {
237
- const fileExtensions = new Set();
238
- // Extract file extensions from changes
239
- if (context.pr.fileChanges) {
240
- context.pr.fileChanges.forEach((file) => {
241
- const ext = file.split(".").pop()?.toLowerCase();
242
- if (ext) {
243
- fileExtensions.add(ext);
244
- }
245
- });
246
- }
247
- if (fileExtensions.has("rs") || fileExtensions.has("res")) {
248
- return "rescript";
249
- }
250
- if (fileExtensions.has("ts") || fileExtensions.has("tsx")) {
251
- return "typescript";
252
- }
253
- if (fileExtensions.has("js") || fileExtensions.has("jsx")) {
254
- return "javascript";
255
- }
256
- if (fileExtensions.has("py")) {
257
- return "python";
258
- }
259
- if (fileExtensions.has("go")) {
260
- return "golang";
261
- }
262
- if (fileExtensions.has("java")) {
263
- return "java";
264
- }
265
- if (fileExtensions.has("cpp") || fileExtensions.has("c")) {
266
- return "cpp";
267
- }
268
- return "mixed";
269
- }
270
- /**
271
- * Assess complexity level for better AI context
272
- */
273
- assessComplexity(context) {
274
- const fileCount = context.diffStrategy.fileCount;
275
- const hasLargeFiles = context.diffStrategy.estimatedSize.includes("Large");
276
- const hasComments = (context.pr.comments?.length || 0) > 0;
277
- if (fileCount > 50) {
278
- return "very-high";
279
- }
280
- if (fileCount > 20 || hasLargeFiles) {
281
- return "high";
282
- }
283
- if (fileCount > 10 || hasComments) {
284
- return "medium";
285
- }
286
- return "low";
287
- }
288
- /**
289
- * Legacy method - kept for compatibility but simplified
290
- */
291
- buildAnalysisPrompt(context, _options) {
292
- // Legacy method - now delegates to new structure
293
- return this.buildCoreAnalysisPrompt(context);
294
- }
295
- /**
296
- * Get safe token limit based on AI provider using shared utility
297
- */
298
- getSafeTokenLimit() {
299
- const provider = this.aiConfig.provider || "auto";
300
- const configuredTokens = this.aiConfig.maxTokens;
301
- // Use conservative limits for CodeReviewer (safer for large diffs)
302
- const providerLimit = getProviderTokenLimit(provider, true);
303
- // Use the smaller of configured tokens or provider limit
304
- if (configuredTokens && configuredTokens > 0) {
305
- const safeLimit = Math.min(configuredTokens, providerLimit);
306
- logger.debug(`Token limit: configured=${configuredTokens}, provider=${providerLimit}, using=${safeLimit}`);
307
- return safeLimit;
308
- }
309
- logger.debug(`Token limit: using provider default=${providerLimit} for ${provider}`);
310
- return providerLimit;
311
- }
312
- /**
313
- * Analyze code with AI using the enhanced prompt
314
- */
315
- async analyzeWithAI(prompt, context) {
316
- try {
317
- logger.debug("Starting AI analysis...");
318
- // Initialize NeuroLink with eval-based dynamic import
319
- if (!this.neurolink) {
320
- const { NeuroLink } = await import("@juspay/neurolink");
321
- this.neurolink = new NeuroLink();
322
- }
323
- // Extract context from unified context for better AI understanding
324
- const aiContext = {
325
- operation: "code-review",
326
- repository: `${context.identifier.workspace}/${context.identifier.repository}`,
327
- branch: context.identifier.branch,
328
- prId: context.identifier.pullRequestId,
329
- prTitle: context.pr.title,
330
- prAuthor: context.pr.author,
331
- fileCount: context.diffStrategy.fileCount,
332
- diffStrategy: context.diffStrategy.strategy,
333
- analysisType: context.diffStrategy.strategy === "whole"
334
- ? "comprehensive"
335
- : "file-by-file",
336
- projectType: this.detectProjectType(context),
337
- hasExistingComments: (context.pr.comments?.length || 0) > 0,
338
- complexity: this.assessComplexity(context),
339
- };
340
- // Simplified, focused prompt without context pollution
341
- const corePrompt = this.buildCoreAnalysisPrompt(context);
342
- // Get safe token limit based on provider
343
- const safeMaxTokens = this.getSafeTokenLimit();
344
- logger.debug(`Using AI provider: ${this.aiConfig.provider || "auto"}`);
345
- logger.debug(`Configured maxTokens: ${this.aiConfig.maxTokens}`);
346
- logger.debug(`Safe maxTokens limit: ${safeMaxTokens}`);
347
- const result = await this.neurolink.generate({
348
- input: { text: corePrompt },
349
- systemPrompt: this.getSecurityReviewSystemPrompt(),
350
- provider: this.aiConfig.provider || "auto", // Auto-select best provider
351
- model: this.aiConfig.model || "best", // Use most capable model
352
- temperature: this.aiConfig.temperature || 0.3, // Lower for more focused analysis
353
- maxTokens: safeMaxTokens, // Use provider-aware safe token limit
354
- timeout: "15m", // Allow plenty of time for thorough analysis
355
- context: aiContext,
356
- enableAnalytics: this.aiConfig.enableAnalytics || true,
357
- enableEvaluation: false, // Disabled to prevent evaluation warnings
358
- });
359
- // Log analytics if available
360
- if (result.analytics) {
361
- logger.debug(`AI Analytics - Provider: ${result.provider}, Response Time: ${result.responseTime}ms, Quality Score: ${result.evaluation?.overallScore}`);
362
- }
363
- logger.debug("AI analysis completed, parsing response...");
364
- // Modern NeuroLink returns { content: string }
365
- const analysisData = this.parseAIResponse(result);
366
- // Display AI response for debugging
367
- if (logger.getConfig().verbose) {
368
- logger.debug("AI Analysis Response:");
369
- logger.debug("═".repeat(80));
370
- logger.debug(JSON.stringify(analysisData, null, 2));
371
- logger.debug("═".repeat(80));
372
- }
373
- if (!analysisData.violations || !Array.isArray(analysisData.violations)) {
374
- logger.debug("No violations array found in AI response");
375
- return [];
376
- }
377
- logger.debug(`AI analysis found ${analysisData.violations.length} violations`);
378
- return analysisData.violations;
379
- }
380
- catch (error) {
381
- if (error.message?.includes("timeout")) {
382
- logger.error("⏰ AI analysis timed out after 15 minutes");
383
- throw new Error("Analysis timeout - try reducing diff size or adjusting timeout");
384
- }
385
- logger.error(`AI analysis failed: ${error.message}`);
386
- throw error;
387
- }
388
- }
389
- /**
390
- * Post comments to PR using unified context - matching pr-police.js exactly
391
- */
392
- async postComments(context, violations, _options) {
393
- logger.phase("📝 Posting review comments...");
394
- // NEW: Apply semantic comment deduplication before posting
395
- const duplicateRemover = createExactDuplicateRemover();
396
- const deduplicationResult = await duplicateRemover.removeAgainstExistingComments(violations, context.pr.comments || [], this.aiConfig, 85);
397
- logger.info(`🔍 Semantic deduplication: ${violations.length} → ${deduplicationResult.uniqueViolations.length} violations ` +
398
- `(${deduplicationResult.duplicatesRemoved} duplicates removed)`);
399
- // Log deduplication details if any duplicates were found
400
- if (deduplicationResult.duplicatesRemoved > 0) {
401
- logger.info(duplicateRemover.getCommentDeduplicationStats(deduplicationResult));
402
- // Log details of semantic matches
403
- deduplicationResult.semanticMatches.forEach((match, index) => {
404
- logger.debug(`🎯 Semantic match ${index + 1}: "${match.violation}" matches ${match.comment} ` +
405
- `(${match.similarityScore}% similarity)${match.reasoning ? ` - ${match.reasoning}` : ""}`);
406
- });
407
- }
408
- // Use deduplicated violations for posting
409
- const uniqueViolations = deduplicationResult.uniqueViolations;
410
- let commentsPosted = 0;
411
- let commentsFailed = 0;
412
- const failedComments = [];
413
- // Post inline comments
414
- const inlineViolations = uniqueViolations.filter((v) => v.type === "inline" && v.file && v.code_snippet);
415
- for (const violation of inlineViolations) {
416
- try {
417
- // Clean file path - remove protocol prefixes ONLY (keep a/ and b/ prefixes)
418
- let cleanFilePath = violation.file;
419
- if (cleanFilePath.startsWith("src://")) {
420
- cleanFilePath = cleanFilePath.replace("src://", "");
421
- }
422
- if (cleanFilePath.startsWith("dst://")) {
423
- cleanFilePath = cleanFilePath.replace("dst://", "");
424
- }
425
- // Clean code snippet and fix search context - EXACTLY like pr-police.js
426
- const processedViolation = this.cleanCodeSnippet(violation);
427
- if (!processedViolation) {
428
- logger.debug(`⚠️ Skipping invalid violation for ${cleanFilePath}`);
429
- continue;
430
- }
431
- const formattedComment = this.formatInlineComment(processedViolation);
432
- // Debug logging
433
- logger.debug(`🔍 Posting inline comment:`);
434
- logger.debug(` File: ${cleanFilePath}`);
435
- logger.debug(` Issue: ${processedViolation.issue}`);
436
- logger.debug(` Original snippet: ${violation.code_snippet}`);
437
- logger.debug(` Processed snippet: ${processedViolation.code_snippet}`);
438
- if (processedViolation.search_context) {
439
- logger.debug(` Search context before: ${JSON.stringify(processedViolation.search_context.before)}`);
440
- logger.debug(` Search context after: ${JSON.stringify(processedViolation.search_context.after)}`);
441
- }
442
- // Use new code snippet approach - EXACTLY like pr-police.js
443
- await this.bitbucketProvider.addComment(context.identifier, formattedComment, {
444
- filePath: cleanFilePath,
445
- lineNumber: undefined, // No line number needed - use pure snippet matching
446
- lineType: processedViolation.line_type || "ADDED", // Default to ADDED if not specified
447
- codeSnippet: processedViolation.code_snippet,
448
- searchContext: processedViolation.search_context,
449
- matchStrategy: "best", // Use best match strategy instead of strict for flexibility
450
- suggestion: processedViolation.suggestion, // Pass the suggestion for inline code suggestions
451
- });
452
- commentsPosted++;
453
- logger.debug(`✅ Posted inline comment: ${cleanFilePath} (${processedViolation.issue})`);
454
- }
455
- catch (error) {
456
- commentsFailed++;
457
- const errorMsg = error.message;
458
- logger.debug(`❌ Failed to post inline comment: ${errorMsg}`);
459
- logger.debug(` File: ${violation.file}, Issue: ${violation.issue}`);
460
- logger.debug(` Code snippet: ${violation.code_snippet}`);
461
- failedComments.push({
462
- file: violation.file,
463
- issue: violation.issue,
464
- error: errorMsg,
465
- });
466
- }
467
- }
468
- // Post summary comment (include failed comments info if any) - only if enabled in config
469
- const shouldPostSummary = this.reviewConfig.postSummaryComment !== false; // Default to true if not specified
470
- if (uniqueViolations.length > 0 && shouldPostSummary) {
471
- try {
472
- const summaryComment = this.generateSummaryComment(uniqueViolations, context, failedComments);
473
- await this.bitbucketProvider.addComment(context.identifier, summaryComment);
474
- commentsPosted++;
475
- logger.debug("✅ Posted summary comment");
476
- }
477
- catch (error) {
478
- logger.debug(`❌ Failed to post summary comment: ${error.message}`);
479
- }
480
- }
481
- else if (uniqueViolations.length > 0 && !shouldPostSummary) {
482
- logger.debug("📝 Summary comment posting disabled in configuration");
483
- }
484
- logger.success(`✅ Posted ${commentsPosted} comments successfully`);
485
- if (commentsFailed > 0) {
486
- logger.warn(`⚠️ Failed to post ${commentsFailed} inline comments`);
487
- }
488
- return uniqueViolations;
489
- }
490
- /**
491
- * Format inline comment for specific violation
492
- */
493
- formatInlineComment(violation) {
494
- const severityConfig = {
495
- CRITICAL: {
496
- emoji: "🚨",
497
- badge: "**🚨 CRITICAL SECURITY ISSUE**",
498
- color: "red",
499
- },
500
- MAJOR: { emoji: "⚠️", badge: "**⚠️ MAJOR ISSUE**", color: "orange" },
501
- MINOR: { emoji: "📝", badge: "**📝 MINOR IMPROVEMENT**", color: "blue" },
502
- SUGGESTION: { emoji: "💡", badge: "**💡 SUGGESTION**", color: "green" },
503
- };
504
- const categoryIcons = {
505
- security: "🔒",
506
- performance: "⚡",
507
- maintainability: "🏗️",
508
- functionality: "⚙️",
509
- error_handling: "🛡️",
510
- testing: "🧪",
511
- general: "📋",
512
- };
513
- const config = severityConfig[violation.severity] || severityConfig.MINOR;
514
- const categoryIcon = categoryIcons[violation.category] || categoryIcons.general;
515
- let comment = `${config.badge}
516
-
517
- **${categoryIcon} ${violation.issue}**
518
-
519
- **Category**: ${violation.category.replace(/_/g, " ").replace(/\b\w/g, (l) => l.toUpperCase())}
520
-
521
- **Issue**: ${violation.message}`;
522
- if (violation.impact) {
523
- comment += `\n\n**Impact**: ${violation.impact}`;
524
- }
525
- // Add suggested fix section if suggestion is provided
526
- if (violation.suggestion) {
527
- comment += `\n\n**Suggested Fix**:\n`;
528
- // Detect the language for syntax highlighting
529
- const language = this.detectLanguageFromFile(violation.file || "");
530
- // Use proper markdown escaping for code blocks
531
- const escapedCodeBlock = this.escapeMarkdownCodeBlock(violation.suggestion, language);
532
- comment += escapedCodeBlock;
533
- }
534
- comment += `\n\n---\n*🛡️ Automated review by **Yama** • Powered by AI*`;
535
- return comment;
536
- }
537
- /**
538
- * Generate comprehensive summary comment with failed comments info
539
- */
540
- generateSummaryComment(violations, context, failedComments = []) {
541
- const stats = this.calculateStats(violations);
542
- const statusEmoji = stats.criticalCount > 0
543
- ? "🚨"
544
- : stats.majorCount > 0
545
- ? "⚠️ "
546
- : stats.minorCount > 0
547
- ? "📝"
548
- : "✅";
549
- const statusText = stats.criticalCount > 0
550
- ? "CRITICAL ISSUES FOUND"
551
- : stats.majorCount > 0
552
- ? "ISSUES DETECTED"
553
- : stats.minorCount > 0
554
- ? "IMPROVEMENTS SUGGESTED"
555
- : "CODE QUALITY APPROVED";
556
- let comment = `
557
- ╭─────────────────────────────────────────────────────────────╮
558
- │ ⚔️ **YAMA REVIEW REPORT** ⚔️ │
559
- ╰─────────────────────────────────────────────────────────────╯
560
-
561
- ## ${statusEmoji} **${statusText}**
562
-
563
- ### 📊 **Security & Quality Analysis**
564
- | **Severity** | **Count** | **Status** |
565
- |--------------|-----------|------------|
566
- | 🚨 Critical | ${stats.criticalCount} | ${stats.criticalCount > 0 ? "⛔ Must Fix" : "✅ Clear"} |
567
- | ⚠️ Major | ${stats.majorCount} | ${stats.majorCount > 0 ? "⚠️ Should Fix" : "✅ Clear"} |
568
- | 📝 Minor | ${stats.minorCount} | ${stats.minorCount > 0 ? "📝 Consider Fixing" : "✅ Clear"} |
569
- | 💡 Suggestions | ${stats.suggestionCount} | ${stats.suggestionCount > 0 ? "💡 Optional" : "✅ Clear"} |
570
-
571
- ### 🔍 **Analysis Summary**
572
- - **📁 Files Analyzed**: ${context.diffStrategy.fileCount}
573
- - **📊 Strategy Used**: ${context.diffStrategy.strategy} (${context.diffStrategy.reason})
574
- - **🎯 Total Issues**: ${stats.totalIssues}
575
- - **🏷️ PR**: #${context.pr.id} - "${context.pr.title}"`;
576
- // Add category breakdown if there are violations
577
- const violationsByCategory = this.groupViolationsByCategory(violations);
578
- if (Object.keys(violationsByCategory).length > 0) {
579
- comment += `\n\n### 📍 **Issues by Category**\n`;
580
- for (const [category, categoryViolations] of Object.entries(violationsByCategory)) {
581
- const categoryIcons = {
582
- security: "🔒",
583
- performance: "⚡",
584
- maintainability: "🏗️",
585
- functionality: "⚙️",
586
- error_handling: "🛡️",
587
- testing: "🧪",
588
- general: "📋",
589
- };
590
- const icon = categoryIcons[category] || "📋";
591
- const name = category
592
- .replace(/_/g, " ")
593
- .replace(/\b\w/g, (l) => l.toUpperCase());
594
- comment += `**${icon} ${name}**: ${categoryViolations.length} issue${categoryViolations.length !== 1 ? "s" : ""}\n`;
595
- }
596
- }
597
- // Add failed comments section if any
598
- if (failedComments.length > 0) {
599
- comment += `\n\n### ⚠️ **Note on Inline Comments**\n`;
600
- comment += `Some inline comments could not be posted due to code matching issues. `;
601
- comment += `Please review the following issues manually:\n\n`;
602
- for (const failed of failedComments) {
603
- comment += `- **${failed.issue}** in \`${failed.file || "unknown file"}\`\n`;
604
- }
605
- }
606
- // Add recommendation
607
- const recommendation = stats.criticalCount > 0
608
- ? "🚨 **URGENT**: Critical security issues must be resolved before merge"
609
- : stats.majorCount > 0
610
- ? "⚠️ **RECOMMENDED**: Address major issues before merge"
611
- : stats.minorCount > 0
612
- ? "📝 **OPTIONAL**: Consider addressing minor improvements"
613
- : "✅ **APPROVED**: Code meets security and quality standards";
614
- comment += `\n\n### 💡 **Recommendation**
615
- ${recommendation}
616
-
617
- ---
618
- **🛡️ Automated Security & Quality Review**
619
- *Powered by Yama AI • Keeping your code secure and maintainable* 🚀`;
620
- return comment;
621
- }
622
- /**
623
- * Helper methods for processing violations
624
- */
625
- cleanFilePath(filePath) {
626
- // Clean the file path but preserve the structure - EXACTLY like pr-police.js
627
- // Only clean src:// and dst:// prefixes, keep a/ and b/ prefixes
628
- const cleaned = filePath.replace(/^(src|dst):\/\//, "");
629
- // Log the cleaning for debugging
630
- if (cleaned !== filePath) {
631
- logger.debug(`Cleaned file path: ${filePath} -> ${cleaned}`);
632
- }
633
- return cleaned;
634
- }
635
- /**
636
- * Extract exact file path from diff
637
- */
638
- extractFilePathFromDiff(diff, fileName) {
639
- const lines = diff.split("\n");
640
- for (const line of lines) {
641
- if (line.startsWith("diff --git")) {
642
- // Extract both paths: a/path/to/file b/path/to/file
643
- const match = line.match(/diff --git a\/(.*?) b\/(.*?)$/);
644
- if (match &&
645
- (match[1].includes(fileName) || match[2].includes(fileName))) {
646
- return match[2]; // Return the 'b/' path (destination)
647
- }
648
- }
649
- }
650
- return null;
651
- }
652
- /**
653
- * Extract line number from diff for a specific code snippet
654
- */
655
- extractLineNumberFromDiff(fileDiff, codeSnippet) {
656
- const lines = fileDiff.split("\n");
657
- let currentNewLine = 0;
658
- let currentOldLine = 0;
659
- let inHunk = false;
660
- // Debug logging
661
- logger.debug(`Looking for snippet: "${codeSnippet}"`);
662
- for (let i = 0; i < lines.length; i++) {
663
- const line = lines[i];
664
- // Parse hunk headers (e.g., @@ -10,6 +10,8 @@)
665
- const hunkMatch = line.match(/@@ -(\d+),?\d* \+(\d+),?\d* @@/);
666
- if (hunkMatch) {
667
- // Hunk headers show the starting line numbers (1-based)
668
- currentOldLine = parseInt(hunkMatch[1]);
669
- currentNewLine = parseInt(hunkMatch[2]);
670
- inHunk = true;
671
- logger.debug(`Found hunk header: old=${currentOldLine}, new=${currentNewLine}`);
672
- continue;
673
- }
674
- // Skip lines that aren't part of the diff content
675
- if (!inHunk ||
676
- (!line.startsWith("+") &&
677
- !line.startsWith("-") &&
678
- !line.startsWith(" "))) {
679
- continue;
680
- }
681
- // Check if this line matches our snippet
682
- if (line === codeSnippet) {
683
- let resultLine;
684
- let lineType;
685
- if (line.startsWith("+")) {
686
- resultLine = currentNewLine;
687
- lineType = "ADDED";
688
- }
689
- else if (line.startsWith("-")) {
690
- resultLine = currentOldLine;
691
- lineType = "REMOVED";
692
- }
693
- else {
694
- resultLine = currentNewLine;
695
- lineType = "CONTEXT";
696
- }
697
- logger.debug(`Found match at line ${resultLine} (${lineType})`);
698
- return { lineNumber: resultLine, lineType };
699
- }
700
- // Update line counters AFTER checking for match
701
- // For added lines: only increment new line counter
702
- // For removed lines: only increment old line counter
703
- // For context lines: increment both counters
704
- if (line.startsWith("+")) {
705
- currentNewLine++;
706
- }
707
- else if (line.startsWith("-")) {
708
- currentOldLine++;
709
- }
710
- else if (line.startsWith(" ")) {
711
- currentNewLine++;
712
- currentOldLine++;
713
- }
714
- }
715
- logger.debug(`Snippet not found in diff`);
716
- return null;
717
- }
718
- /**
719
- * Escape markdown code blocks properly
720
- */
721
- escapeMarkdownCodeBlock(code, language) {
722
- // If code contains triple backticks, use quadruple backticks
723
- if (code.includes("```")) {
724
- return `\`\`\`\`${language}\n${code}\n\`\`\`\``;
725
- }
726
- return `\`\`\`${language}\n${code}\n\`\`\``;
727
- }
728
- cleanCodeSnippet(violation) {
729
- try {
730
- // Clone the violation to avoid modifying the original - EXACTLY like pr-police.js
731
- const fixed = JSON.parse(JSON.stringify(violation));
732
- // Fix search_context arrays if they contain embedded newlines
733
- if (fixed.search_context) {
734
- if (fixed.search_context.before &&
735
- Array.isArray(fixed.search_context.before)) {
736
- fixed.search_context.before = this.splitArrayLines(fixed.search_context.before);
737
- }
738
- if (fixed.search_context.after &&
739
- Array.isArray(fixed.search_context.after)) {
740
- fixed.search_context.after = this.splitArrayLines(fixed.search_context.after);
741
- }
742
- }
743
- // Ensure line_type is set based on code snippet prefix BEFORE cleaning
744
- if (!fixed.line_type && fixed.code_snippet) {
745
- if (fixed.code_snippet.startsWith("+")) {
746
- fixed.line_type = "ADDED";
747
- }
748
- else if (fixed.code_snippet.startsWith("-")) {
749
- fixed.line_type = "REMOVED";
750
- }
751
- else {
752
- fixed.line_type = "CONTEXT";
753
- }
754
- }
755
- // Clean the code_snippet field to remove diff symbols - EXACTLY like pr-police.js
756
- if (fixed.code_snippet) {
757
- fixed.code_snippet = fixed.code_snippet.replace(/^[+\-\s]/, "").trim();
758
- }
759
- // Clean the suggestion field to remove any diff symbols
760
- if (fixed.suggestion) {
761
- fixed.suggestion = fixed.suggestion
762
- .split("\n")
763
- .map((line) => line.replace(/^[+\-\s]/, "")) // Remove diff symbols at start of each line
764
- .join("\n")
765
- .trim();
766
- }
767
- return fixed;
768
- }
769
- catch (error) {
770
- logger.debug(`❌ Error cleaning code snippet: ${error.message}`);
771
- return null;
772
- }
773
- }
774
- splitArrayLines(arr) {
775
- const result = [];
776
- for (const item of arr) {
777
- if (typeof item === "string" && item.includes("\n")) {
778
- result.push(...item.split("\n").filter((line) => line.length > 0));
779
- }
780
- else {
781
- result.push(item);
782
- }
783
- }
784
- return result;
785
- }
786
- groupViolationsByCategory(violations) {
787
- const grouped = {};
788
- violations.forEach((v) => {
789
- const category = v.category || "general";
790
- if (!grouped[category]) {
791
- grouped[category] = [];
792
- }
793
- grouped[category].push(v);
794
- });
795
- return grouped;
796
- }
797
- calculateStats(violations) {
798
- return {
799
- criticalCount: violations.filter((v) => v.severity === "CRITICAL").length,
800
- majorCount: violations.filter((v) => v.severity === "MAJOR").length,
801
- minorCount: violations.filter((v) => v.severity === "MINOR").length,
802
- suggestionCount: violations.filter((v) => v.severity === "SUGGESTION")
803
- .length,
804
- totalIssues: violations.length,
805
- filesReviewed: new Set(violations.filter((v) => v.file).map((v) => v.file)).size || 1,
806
- };
807
- }
808
- generateReviewResult(violations, _duration, _context, processingStrategy) {
809
- const stats = this.calculateStats(violations);
810
- return {
811
- violations,
812
- summary: `Review found ${stats.criticalCount} critical, ${stats.majorCount} major, ${stats.minorCount} minor issues, and ${stats.suggestionCount} suggestions`,
813
- statistics: {
814
- filesReviewed: stats.filesReviewed,
815
- totalIssues: stats.totalIssues,
816
- criticalCount: stats.criticalCount,
817
- majorCount: stats.majorCount,
818
- minorCount: stats.minorCount,
819
- suggestionCount: stats.suggestionCount,
820
- processingStrategy,
821
- },
822
- positiveObservations: [], // Could be extracted from AI response
823
- };
824
- }
825
- // ============================================================================
826
- // BATCH PROCESSING METHODS
827
- // ============================================================================
828
- /**
829
- * Get batch processing configuration with defaults
830
- */
831
- getBatchProcessingConfig() {
832
- const defaultConfig = {
833
- enabled: true,
834
- maxFilesPerBatch: 3,
835
- prioritizeSecurityFiles: true,
836
- parallelBatches: false, // Keep for backward compatibility
837
- batchDelayMs: 1000,
838
- singleRequestThreshold: 5, // Use single request for ≤5 files
839
- // NEW: Parallel processing defaults
840
- parallel: {
841
- enabled: true, // Enable parallel processing by default
842
- maxConcurrentBatches: 3,
843
- rateLimitStrategy: "fixed",
844
- tokenBudgetDistribution: "equal",
845
- failureHandling: "continue",
846
- },
847
- };
848
- const mergedConfig = {
849
- ...defaultConfig,
850
- ...this.reviewConfig.batchProcessing,
851
- };
852
- // Merge parallel config separately to handle nested object properly
853
- if (mergedConfig.parallel && this.reviewConfig.batchProcessing?.parallel) {
854
- mergedConfig.parallel = {
855
- ...defaultConfig.parallel,
856
- ...this.reviewConfig.batchProcessing.parallel,
857
- };
858
- }
859
- else if (!mergedConfig.parallel) {
860
- mergedConfig.parallel = defaultConfig.parallel;
861
- }
862
- return mergedConfig;
863
- }
864
- /**
865
- * Determine if batch processing should be used
866
- */
867
- shouldUseBatchProcessing(context, batchConfig) {
868
- if (!batchConfig.enabled) {
869
- logger.debug("Batch processing disabled in config");
870
- return false;
871
- }
872
- const fileCount = context.diffStrategy.fileCount;
873
- if (fileCount <= batchConfig.singleRequestThreshold) {
874
- logger.debug(`File count (${fileCount}) ≤ threshold (${batchConfig.singleRequestThreshold}), using single request`);
875
- return false;
876
- }
877
- // Force batch processing for file-by-file strategy with many files
878
- if (context.diffStrategy.strategy === "file-by-file" && fileCount > 10) {
879
- logger.debug(`File-by-file strategy with ${fileCount} files, forcing batch processing`);
880
- return true;
881
- }
882
- logger.debug(`File count (${fileCount}) > threshold (${batchConfig.singleRequestThreshold}), using batch processing`);
883
- return true;
884
- }
885
- /**
886
- * Main batch processing method with parallel processing support
887
- */
888
- async reviewWithBatchProcessing(context, options, batchConfig) {
889
- const startTime = Date.now();
890
- try {
891
- // Step 1: Prioritize and organize files
892
- const prioritizedFiles = await this.prioritizeFiles(context, batchConfig);
893
- logger.info(`📋 Prioritized ${prioritizedFiles.length} files: ${prioritizedFiles.filter((f) => f.priority === "high").length} high, ${prioritizedFiles.filter((f) => f.priority === "medium").length} medium, ${prioritizedFiles.filter((f) => f.priority === "low").length} low priority`);
894
- // Step 2: Create batches
895
- const batches = this.createBatches(prioritizedFiles, batchConfig);
896
- logger.info(`📦 Created ${batches.length} batches (max ${batchConfig.maxFilesPerBatch} files per batch)`);
897
- // Step 3: Determine processing strategy
898
- const useParallel = batchConfig.parallel?.enabled && batches.length > 1;
899
- if (useParallel) {
900
- logger.info(`🚀 Using parallel processing: ${batches.length} batches, max ${batchConfig.parallel?.maxConcurrentBatches} concurrent`);
901
- return await this.processInParallel(batches, context, options, batchConfig);
902
- }
903
- else {
904
- logger.info(`🔄 Using serial processing: ${batches.length} batches`);
905
- return await this.processSerially(batches, context, options, batchConfig);
906
- }
907
- }
908
- catch (error) {
909
- logger.error(`Batch processing failed: ${error.message}`);
910
- throw error;
911
- }
912
- }
913
- /**
914
- * Process batches in parallel with concurrency control
915
- */
916
- async processInParallel(batches, context, options, batchConfig) {
917
- const startTime = Date.now();
918
- const parallelConfig = batchConfig.parallel;
919
- // Calculate optimal concurrency
920
- const avgTokensPerBatch = batches.reduce((sum, b) => sum + b.estimatedTokens, 0) / batches.length;
921
- const optimalConcurrency = calculateOptimalConcurrency(batches.length, parallelConfig.maxConcurrentBatches, avgTokensPerBatch, this.getSafeTokenLimit());
922
- // Initialize concurrency control
923
- const semaphore = new Semaphore(optimalConcurrency);
924
- const tokenBudget = new TokenBudgetManager(this.getSafeTokenLimit() * 0.8); // 80% for safety
925
- // NEW: Pre-allocate tokens based on distribution strategy
926
- const distributionStrategy = parallelConfig.tokenBudgetDistribution || "equal";
927
- logger.info(`🎯 Using ${distributionStrategy} token distribution strategy for ${batches.length} batches`);
928
- const tokenAllocations = this.preAllocateTokens(batches, tokenBudget, distributionStrategy);
929
- if (!tokenAllocations) {
930
- const totalRequired = batches.reduce((sum, b) => sum + b.estimatedTokens, 0);
931
- const totalBudget = tokenBudget.getTotalBudget();
932
- throw new Error(`Insufficient token budget: required ${totalRequired}, available ${totalBudget}. ` +
933
- `Consider reducing batch count (current: ${batches.length}) or increasing token limit.`);
934
- }
935
- // Apply pre-allocated tokens to the budget manager
936
- if (!tokenBudget.preAllocateAllBatches(tokenAllocations)) {
937
- throw new Error("Failed to pre-allocate tokens for all batches");
938
- }
939
- logger.info(`🎯 Parallel processing: ${optimalConcurrency} concurrent batches, ${tokenBudget.getTotalBudget()} token budget (${distributionStrategy} distribution)`);
940
- // Log allocation details
941
- tokenAllocations.forEach((tokens, batchIndex) => {
942
- logger.debug(`Batch ${batchIndex + 1}: ${tokens} tokens allocated`);
943
- });
944
- const batchResults = new Array(batches.length);
945
- const allViolations = [];
946
- const processingPromises = [];
947
- // Process batches with controlled concurrency
948
- for (let i = 0; i < batches.length; i++) {
949
- const batch = batches[i];
950
- const processingPromise = this.processBatchWithConcurrency(batch, context, options, semaphore, tokenBudget, i, batches.length)
951
- .then((result) => {
952
- batchResults[i] = result; // Maintain order
953
- if (result.violations) {
954
- allViolations.push(...result.violations);
955
- }
956
- })
957
- .catch((error) => {
958
- logger.error(`❌ Batch ${i + 1} failed: ${error.message}`);
959
- batchResults[i] = {
960
- batchIndex: i,
961
- files: batch.files,
962
- violations: [],
963
- processingTime: 0,
964
- error: error.message,
965
- };
966
- // Handle failure strategy
967
- if (parallelConfig.failureHandling === "stop-all") {
968
- throw error;
969
- }
970
- });
971
- processingPromises.push(processingPromise);
972
- // Add small delay between batch starts to avoid overwhelming
973
- if (i < batches.length - 1) {
974
- await new Promise((resolve) => setTimeout(resolve, 200));
975
- }
976
- }
977
- // Wait for all batches to complete
978
- await Promise.allSettled(processingPromises);
979
- // Filter out undefined results and sort by batch index
980
- const validResults = batchResults
981
- .filter((r) => r !== undefined)
982
- .sort((a, b) => a.batchIndex - b.batchIndex);
983
- const totalTime = Date.now() - startTime;
984
- const avgBatchSize = batches.reduce((sum, b) => sum + b.files.length, 0) / batches.length;
985
- const budgetStatus = tokenBudget.getBudgetStatus();
986
- logger.success(`🎯 Parallel processing completed: ${allViolations.length} total violations from ${batches.length} batches in ${Math.round(totalTime / 1000)}s (avg ${avgBatchSize.toFixed(1)} files/batch, ${budgetStatus.utilizationPercent}% token usage)`);
987
- return { violations: allViolations, batchResults: validResults };
988
- }
989
- /**
990
- * Process batches serially (original implementation)
991
- */
992
- async processSerially(batches, context, options, batchConfig) {
993
- const startTime = Date.now();
994
- const batchResults = [];
995
- const allViolations = [];
996
- for (let i = 0; i < batches.length; i++) {
997
- const batch = batches[i];
998
- logger.info(`🔄 Processing batch ${i + 1}/${batches.length} (${batch.files.length} files, ${batch.priority} priority, serial)`);
999
- try {
1000
- const batchResult = await this.processBatch(batch, context, options);
1001
- batchResults.push(batchResult);
1002
- allViolations.push(...batchResult.violations);
1003
- logger.info(`✅ Batch ${i + 1} completed: ${batchResult.violations.length} violations found in ${Math.round(batchResult.processingTime / 1000)}s`);
1004
- // Add delay between batches if configured
1005
- if (i < batches.length - 1 && batchConfig.batchDelayMs > 0) {
1006
- logger.debug(`⏳ Waiting ${batchConfig.batchDelayMs}ms before next batch`);
1007
- await new Promise((resolve) => setTimeout(resolve, batchConfig.batchDelayMs));
1008
- }
1009
- }
1010
- catch (error) {
1011
- logger.error(`❌ Batch ${i + 1} failed: ${error.message}`);
1012
- // Record failed batch
1013
- batchResults.push({
1014
- batchIndex: i,
1015
- files: batch.files,
1016
- violations: [],
1017
- processingTime: Date.now() - startTime,
1018
- error: error.message,
1019
- });
1020
- }
1021
- }
1022
- const totalTime = Date.now() - startTime;
1023
- const avgBatchSize = batches.reduce((sum, b) => sum + b.files.length, 0) / batches.length;
1024
- logger.success(`🎯 Serial processing completed: ${allViolations.length} total violations from ${batches.length} batches in ${Math.round(totalTime / 1000)}s (avg ${avgBatchSize.toFixed(1)} files/batch)`);
1025
- return { violations: allViolations, batchResults };
1026
- }
1027
- /**
1028
- * Pre-allocate tokens based on distribution strategy with proper integer arithmetic
1029
- */
1030
- preAllocateTokens(batches, tokenBudget, strategy) {
1031
- // Ensure we're working with integer budget to avoid floating-point issues
1032
- const totalBudget = Math.floor(tokenBudget.getTotalBudget());
1033
- const allocations = new Map();
1034
- if (strategy === "equal") {
1035
- // Equal distribution: divide budget equally among all batches with proper remainder handling
1036
- const baseTokens = Math.floor(totalBudget / batches.length);
1037
- const remainder = totalBudget % batches.length;
1038
- if (baseTokens < 1000) {
1039
- // Minimum viable tokens per batch
1040
- logger.error(`Equal distribution would give ${baseTokens} tokens per batch, which is insufficient`);
1041
- return null;
1042
- }
1043
- let totalAllocated = 0;
1044
- for (let i = 0; i < batches.length; i++) {
1045
- // Distribute remainder to first few batches
1046
- const tokens = baseTokens + (i < remainder ? 1 : 0);
1047
- allocations.set(i, tokens);
1048
- totalAllocated += tokens;
1049
- }
1050
- // Double-check that we haven't exceeded budget due to any calculation errors
1051
- if (totalAllocated > totalBudget) {
1052
- logger.error(`Equal distribution calculation error: ${totalAllocated} > ${totalBudget}`);
1053
- // Adjust the last batch to fit within budget
1054
- const lastBatchIndex = batches.length - 1;
1055
- const lastBatchTokens = allocations.get(lastBatchIndex);
1056
- const adjustment = totalAllocated - totalBudget;
1057
- const newLastBatchTokens = lastBatchTokens - adjustment;
1058
- if (newLastBatchTokens < 1000) {
1059
- logger.error(`Adjustment would result in last batch having ${newLastBatchTokens} tokens, which is below the minimum threshold (1000). Aborting allocation.`);
1060
- return null;
1061
- }
1062
- allocations.set(lastBatchIndex, newLastBatchTokens);
1063
- totalAllocated = totalBudget;
1064
- logger.warn(`Adjusted last batch by -${adjustment} tokens to fit budget`);
1065
- }
1066
- logger.info(`Equal distribution: ${baseTokens} tokens per batch for ${batches.length} batches`);
1067
- logger.debug(`Pre-allocated ${totalAllocated} tokens across ${batches.length} batches (${totalBudget - totalAllocated} remaining)`);
1068
- }
1069
- else if (strategy === "weighted") {
1070
- // Weighted distribution: try weighted first, automatically fallback to equal if needed
1071
- logger.debug(`Attempting weighted distribution...`);
1072
- const weightedResult = this.tryWeightedAllocation(batches, totalBudget);
1073
- if (weightedResult) {
1074
- // Weighted allocation succeeded
1075
- weightedResult.forEach((tokens, batchIndex) => {
1076
- allocations.set(batchIndex, tokens);
1077
- });
1078
- logger.info(`✅ Weighted distribution: optimal allocation successful`);
1079
- logger.debug(`Pre-allocated ${Array.from(weightedResult.values()).reduce((sum, tokens) => sum + tokens, 0)} tokens across ${batches.length} batches`);
1080
- }
1081
- else {
1082
- // Weighted allocation failed, automatically fallback to equal distribution
1083
- logger.warn(`⚠️ Weighted distribution: insufficient budget for optimal allocation, falling back to equal distribution`);
1084
- const equalResult = this.tryEqualAllocation(batches, totalBudget);
1085
- if (!equalResult) {
1086
- logger.error(`Weighted distribution: both optimal and equal allocation failed`);
1087
- return null;
1088
- }
1089
- equalResult.forEach((tokens, batchIndex) => {
1090
- allocations.set(batchIndex, tokens);
1091
- });
1092
- logger.info(`✅ Weighted distribution: equal allocation fallback successful`);
1093
- logger.debug(`Pre-allocated ${Array.from(equalResult.values()).reduce((sum, tokens) => sum + tokens, 0)} tokens across ${batches.length} batches`);
1094
- }
1095
- }
1096
- // Final validation with strict integer checking
1097
- const totalAllocated = Array.from(allocations.values()).reduce((sum, tokens) => sum + tokens, 0);
1098
- if (totalAllocated > totalBudget) {
1099
- logger.error(`CRITICAL: Total allocation (${totalAllocated}) exceeds budget (${totalBudget}) - this should never happen`);
1100
- logger.error(`Budget type: ${typeof totalBudget}, Allocation type: ${typeof totalAllocated}`);
1101
- logger.error(`Individual allocations: ${Array.from(allocations.entries())
1102
- .map(([i, tokens]) => `batch${i}:${tokens}`)
1103
- .join(", ")}`);
1104
- throw new Error(`Total allocation (${totalAllocated}) exceeds budget (${totalBudget}) - this should never happen`);
1105
- }
1106
- return allocations;
1107
- }
1108
- /**
1109
- * Try weighted allocation for batches
1110
- */
1111
- tryWeightedAllocation(batches, totalBudget) {
1112
- const totalEstimated = batches.reduce((sum, batch) => sum + batch.estimatedTokens, 0);
1113
- if (totalEstimated > totalBudget) {
1114
- logger.debug(`Total estimated tokens (${totalEstimated}) exceed budget (${totalBudget})`);
1115
- return null;
1116
- }
1117
- let totalAllocated = 0;
1118
- const minTokensPerBatch = 1000;
1119
- const allocations = new Map();
1120
- for (let i = 0; i < batches.length; i++) {
1121
- const batch = batches[i];
1122
- const weight = batch.estimatedTokens / totalEstimated;
1123
- const allocation = Math.floor(weight * totalBudget);
1124
- const finalAllocation = Math.max(allocation, minTokensPerBatch);
1125
- allocations.set(i, finalAllocation);
1126
- totalAllocated += finalAllocation;
1127
- }
1128
- // Check if we exceeded budget due to minimum allocations
1129
- if (totalAllocated > totalBudget) {
1130
- logger.debug(`Weighted allocation with minimums (${totalAllocated}) exceeds budget (${totalBudget})`);
1131
- return null;
1132
- }
1133
- return allocations;
1134
- }
1135
- /**
1136
- * Try equal allocation for batches
1137
- */
1138
- tryEqualAllocation(batches, totalBudget) {
1139
- const baseTokens = Math.floor(totalBudget / batches.length);
1140
- const remainder = totalBudget % batches.length;
1141
- if (baseTokens < 1000) {
1142
- // Minimum viable tokens per batch
1143
- logger.debug(`Equal distribution would give ${baseTokens} tokens per batch, which is insufficient`);
1144
- return null;
1145
- }
1146
- const allocations = new Map();
1147
- let totalAllocated = 0;
1148
- for (let i = 0; i < batches.length; i++) {
1149
- // Distribute remainder to first few batches
1150
- const tokens = baseTokens + (i < remainder ? 1 : 0);
1151
- allocations.set(i, tokens);
1152
- totalAllocated += tokens;
1153
- }
1154
- // Double-check that we haven't exceeded budget due to any calculation errors
1155
- if (totalAllocated > totalBudget) {
1156
- logger.debug(`Equal distribution calculation error: ${totalAllocated} > ${totalBudget}`);
1157
- // Adjust the last batch to fit within budget
1158
- const lastBatchIndex = batches.length - 1;
1159
- const lastBatchTokens = allocations.get(lastBatchIndex);
1160
- const adjustment = totalAllocated - totalBudget;
1161
- const newLastBatchTokens = lastBatchTokens - adjustment;
1162
- if (newLastBatchTokens < 1000) {
1163
- logger.error(`Adjustment would result in last batch having ${newLastBatchTokens} tokens, which is below the minimum threshold (1000). Aborting allocation.`);
1164
- return null;
1165
- }
1166
- allocations.set(lastBatchIndex, newLastBatchTokens);
1167
- }
1168
- return allocations;
1169
- }
1170
- /**
1171
- * Process a single batch with concurrency control
1172
- */
1173
- async processBatchWithConcurrency(batch, context, options, semaphore, tokenBudget, batchIndex, totalBatches) {
1174
- // Acquire semaphore permit
1175
- await semaphore.acquire();
1176
- try {
1177
- // NEW: In pre-allocation mode, tokens are already allocated, just verify and mark as processing
1178
- if (tokenBudget.isPreAllocationMode()) {
1179
- const batchState = tokenBudget.getBatchState(batchIndex);
1180
- if (batchState !== "pending") {
1181
- throw new Error(`Batch ${batchIndex + 1} is not in pending state (current: ${batchState})`);
1182
- }
1183
- // Mark as processing (this is handled in allocateForBatch for pre-allocation mode)
1184
- if (!tokenBudget.allocateForBatch(batchIndex, batch.estimatedTokens)) {
1185
- throw new Error(`Failed to mark batch ${batchIndex + 1} as processing`);
1186
- }
1187
- }
1188
- else {
1189
- // Legacy mode: allocate tokens dynamically
1190
- if (!tokenBudget.allocateForBatch(batchIndex, batch.estimatedTokens)) {
1191
- throw new Error(`Insufficient token budget for batch ${batchIndex + 1}`);
1192
- }
1193
- }
1194
- logger.info(`🔄 Processing batch ${batchIndex + 1}/${totalBatches} (${batch.files.length} files, parallel)`);
1195
- // Process the batch (existing logic)
1196
- const result = await this.processBatch(batch, context, options);
1197
- logger.info(`✅ Batch ${batchIndex + 1} completed: ${result.violations.length} violations in ${Math.round(result.processingTime / 1000)}s`);
1198
- return result;
1199
- }
1200
- catch (error) {
1201
- // Mark batch as failed in token budget
1202
- tokenBudget.markBatchFailed(batchIndex, error.message);
1203
- throw error;
1204
- }
1205
- finally {
1206
- // Always release resources
1207
- tokenBudget.releaseBatch(batchIndex);
1208
- semaphore.release();
1209
- }
1210
- }
1211
- /**
1212
- * Prioritize files based on security importance and file type
1213
- */
1214
- async prioritizeFiles(context, batchConfig) {
1215
- const files = context.pr.fileChanges || [];
1216
- const prioritizedFiles = [];
1217
- for (const filePath of files) {
1218
- const priority = this.calculateFilePriority(filePath, batchConfig);
1219
- const estimatedTokens = await this.estimateFileTokens(filePath, context);
1220
- prioritizedFiles.push({
1221
- path: filePath,
1222
- priority,
1223
- estimatedTokens,
1224
- diff: context.fileDiffs?.get(filePath),
1225
- });
1226
- }
1227
- // Sort by priority (high -> medium -> low) then by estimated tokens (smaller first)
1228
- prioritizedFiles.sort((a, b) => {
1229
- const priorityOrder = { high: 0, medium: 1, low: 2 };
1230
- const priorityDiff = priorityOrder[a.priority] - priorityOrder[b.priority];
1231
- if (priorityDiff !== 0) {
1232
- return priorityDiff;
1233
- }
1234
- return a.estimatedTokens - b.estimatedTokens;
1235
- });
1236
- return prioritizedFiles;
1237
- }
1238
- /**
1239
- * Calculate file priority based on path and content
1240
- */
1241
- calculateFilePriority(filePath, batchConfig) {
1242
- if (!batchConfig.prioritizeSecurityFiles) {
1243
- return "medium"; // All files same priority if not prioritizing
1244
- }
1245
- const path = filePath.toLowerCase();
1246
- // High priority: Security-sensitive files
1247
- const highPriorityPatterns = [
1248
- /auth/i,
1249
- /login/i,
1250
- /password/i,
1251
- /token/i,
1252
- /jwt/i,
1253
- /oauth/i,
1254
- /crypto/i,
1255
- /encrypt/i,
1256
- /decrypt/i,
1257
- /hash/i,
1258
- /security/i,
1259
- /payment/i,
1260
- /billing/i,
1261
- /transaction/i,
1262
- /money/i,
1263
- /wallet/i,
1264
- /admin/i,
1265
- /privilege/i,
1266
- /permission/i,
1267
- /role/i,
1268
- /access/i,
1269
- /config/i,
1270
- /env/i,
1271
- /secret/i,
1272
- /key/i,
1273
- /credential/i,
1274
- /api/i,
1275
- /endpoint/i,
1276
- /route/i,
1277
- /controller/i,
1278
- /middleware/i,
1279
- ];
1280
- if (highPriorityPatterns.some((pattern) => pattern.test(path))) {
1281
- return "high";
1282
- }
1283
- // Low priority: Documentation, tests, config files
1284
- const lowPriorityPatterns = [
1285
- /\.md$/i,
1286
- /\.txt$/i,
1287
- /readme/i,
1288
- /changelog/i,
1289
- /license/i,
1290
- /test/i,
1291
- /spec/i,
1292
- /\.test\./i,
1293
- /\.spec\./i,
1294
- /__tests__/i,
1295
- /\.json$/i,
1296
- /\.yaml$/i,
1297
- /\.yml$/i,
1298
- /\.toml$/i,
1299
- /\.ini$/i,
1300
- /\.lock$/i,
1301
- /package-lock/i,
1302
- /yarn\.lock/i,
1303
- /pnpm-lock/i,
1304
- /\.gitignore/i,
1305
- /\.eslint/i,
1306
- /\.prettier/i,
1307
- /tsconfig/i,
1308
- /\.svg$/i,
1309
- /\.png$/i,
1310
- /\.jpg$/i,
1311
- /\.jpeg$/i,
1312
- /\.gif$/i,
1313
- ];
1314
- if (lowPriorityPatterns.some((pattern) => pattern.test(path))) {
1315
- return "low";
1316
- }
1317
- // Medium priority: Everything else
1318
- return "medium";
1319
- }
1320
- /**
1321
- * Estimate token count for a file
1322
- */
1323
- async estimateFileTokens(filePath, context) {
1324
- try {
1325
- let content = "";
1326
- if (context.fileDiffs?.has(filePath)) {
1327
- content = context.fileDiffs.get(filePath) || "";
1328
- }
1329
- else if (context.prDiff) {
1330
- // Extract file content from whole diff
1331
- const diffLines = context.prDiff.diff.split("\n");
1332
- let inFile = false;
1333
- for (const line of diffLines) {
1334
- if (line.startsWith("diff --git") && line.includes(filePath)) {
1335
- inFile = true;
1336
- continue;
1337
- }
1338
- if (inFile && line.startsWith("diff --git")) {
1339
- break;
1340
- }
1341
- if (inFile) {
1342
- content += line + "\n";
1343
- }
1344
- }
1345
- }
1346
- // Rough estimation: ~4 characters per token
1347
- const estimatedTokens = Math.ceil(content.length / 4);
1348
- // Add base overhead for context and prompts
1349
- const baseOverhead = 1000;
1350
- return estimatedTokens + baseOverhead;
1351
- }
1352
- catch (error) {
1353
- logger.debug(`Error estimating tokens for ${filePath}: ${error.message}`);
1354
- return 2000; // Default estimate
1355
- }
1356
- }
1357
- /**
1358
- * Create batches from prioritized files
1359
- */
1360
- createBatches(prioritizedFiles, batchConfig) {
1361
- const batches = [];
1362
- const maxTokensPerBatch = this.getSafeTokenLimit() * 0.7; // Use 70% of limit for safety
1363
- let currentBatch = {
1364
- files: [],
1365
- priority: "medium",
1366
- estimatedTokens: 0,
1367
- batchIndex: 0,
1368
- };
1369
- for (const file of prioritizedFiles) {
1370
- const wouldExceedTokens = currentBatch.estimatedTokens + file.estimatedTokens > maxTokensPerBatch;
1371
- const wouldExceedFileCount = currentBatch.files.length >= batchConfig.maxFilesPerBatch;
1372
- if ((wouldExceedTokens || wouldExceedFileCount) &&
1373
- currentBatch.files.length > 0) {
1374
- // Finalize current batch
1375
- batches.push(currentBatch);
1376
- // Start new batch
1377
- currentBatch = {
1378
- files: [],
1379
- priority: file.priority,
1380
- estimatedTokens: 0,
1381
- batchIndex: batches.length,
1382
- };
1383
- }
1384
- // Add file to current batch
1385
- currentBatch.files.push(file.path);
1386
- currentBatch.estimatedTokens += file.estimatedTokens;
1387
- // Update batch priority to highest priority file in batch
1388
- if (file.priority === "high" ||
1389
- (file.priority === "medium" && currentBatch.priority === "low")) {
1390
- currentBatch.priority = file.priority;
1391
- }
1392
- }
1393
- // Add final batch if it has files
1394
- if (currentBatch.files.length > 0) {
1395
- batches.push(currentBatch);
1396
- }
1397
- return batches;
1398
- }
1399
- /**
1400
- * Process a single batch of files
1401
- */
1402
- async processBatch(batch, context, options) {
1403
- const startTime = Date.now();
1404
- try {
1405
- // Create batch-specific context
1406
- const batchContext = this.createBatchContext(batch, context);
1407
- // Build batch-specific prompt
1408
- const batchPrompt = this.buildBatchAnalysisPrompt(batchContext, batch, options);
1409
- // Analyze with AI
1410
- const violations = await this.analyzeWithAI(batchPrompt, batchContext);
1411
- const processingTime = Date.now() - startTime;
1412
- return {
1413
- batchIndex: batch.batchIndex,
1414
- files: batch.files,
1415
- violations,
1416
- processingTime,
1417
- };
1418
- }
1419
- catch (error) {
1420
- const processingTime = Date.now() - startTime;
1421
- return {
1422
- batchIndex: batch.batchIndex,
1423
- files: batch.files,
1424
- violations: [],
1425
- processingTime,
1426
- error: error.message,
1427
- };
1428
- }
1429
- }
1430
- /**
1431
- * Create context for a specific batch
1432
- */
1433
- createBatchContext(batch, originalContext) {
1434
- // Create a filtered context containing only the files in this batch
1435
- const batchFileDiffs = new Map();
1436
- if (originalContext.fileDiffs) {
1437
- for (const filePath of batch.files) {
1438
- const diff = originalContext.fileDiffs.get(filePath);
1439
- if (diff) {
1440
- batchFileDiffs.set(filePath, diff);
1441
- }
1442
- }
1443
- }
1444
- return {
1445
- ...originalContext,
1446
- fileDiffs: batchFileDiffs,
1447
- diffStrategy: {
1448
- ...originalContext.diffStrategy,
1449
- fileCount: batch.files.length,
1450
- strategy: "file-by-file", // Always use file-by-file for batches
1451
- reason: `Batch processing ${batch.files.length} files`,
1452
- },
1453
- pr: {
1454
- ...originalContext.pr,
1455
- fileChanges: batch.files,
1456
- },
1457
- };
1458
- }
1459
- /**
1460
- * Build analysis prompt for a specific batch
1461
- */
1462
- buildBatchAnalysisPrompt(batchContext, batch, options) {
1463
- const diffContent = this.extractDiffContent(batchContext);
1464
- return `Conduct a focused security and quality analysis of this batch of ${batch.files.length} files (${batch.priority} priority).
1465
-
1466
- ## BATCH CONTEXT:
1467
- **Batch**: ${batch.batchIndex + 1}
1468
- **Files**: ${batch.files.length}
1469
- **Priority**: ${batch.priority}
1470
- **Files in batch**: ${batch.files.join(", ")}
1471
-
1472
- ## PR CONTEXT:
1473
- **Title**: ${batchContext.pr.title}
1474
- **Author**: ${batchContext.pr.author}
1475
- **Repository**: ${batchContext.identifier.workspace}/${batchContext.identifier.repository}
1476
-
1477
- ## PROJECT CONTEXT:
1478
- ${batchContext.projectContext.memoryBank.projectContext || batchContext.projectContext.memoryBank.summary}
1479
-
1480
- ## PROJECT RULES & STANDARDS:
1481
- ${batchContext.projectContext.clinerules || "No specific rules defined"}
1482
-
1483
- ## BATCH CODE CHANGES:
1484
- ${diffContent}
1485
-
1486
- ## CRITICAL INSTRUCTIONS FOR CODE SNIPPETS:
1487
-
1488
- When you identify an issue in the code, you MUST:
1489
- 1. Copy the EXACT line from the diff above, including the diff prefix (+, -, or space at the beginning)
1490
- 2. Do NOT modify, clean, or reformat the line
1491
- 3. Include the complete line as it appears in the diff
1492
- 4. If the issue spans multiple lines, choose the most relevant single line
1493
-
1494
- ## ANALYSIS REQUIREMENTS:
1495
-
1496
- ${this.getAnalysisRequirements()}
1497
-
1498
- ### 📋 OUTPUT FORMAT
1499
- Return ONLY valid JSON:
1500
- {
1501
- "violations": [
1502
- {
1503
- "type": "inline",
1504
- "file": "exact/file/path.ext",
1505
- "code_snippet": "EXACT line from diff INCLUDING the +/- prefix",
1506
- "search_context": {
1507
- "before": ["line before from diff with prefix"],
1508
- "after": ["line after from diff with prefix"]
1509
- },
1510
- "severity": "CRITICAL|MAJOR|MINOR|SUGGESTION",
1511
- "category": "security|performance|maintainability|functionality",
1512
- "issue": "Brief issue title",
1513
- "message": "Detailed explanation",
1514
- "impact": "Potential impact description",
1515
- "suggestion": "Clean, executable code fix (no diff symbols)"
1516
- }
1517
- ],
1518
- "summary": "Batch analysis summary",
1519
- "positiveObservations": ["Good practices found"],
1520
- "statistics": {
1521
- "filesReviewed": ${batch.files.length},
1522
- "totalIssues": 0,
1523
- "criticalCount": 0,
1524
- "majorCount": 0,
1525
- "minorCount": 0,
1526
- "suggestionCount": 0
1527
- }
1528
- }`;
1529
- }
1530
- /**
1531
- * Utility methods
1532
- */
1533
- parseAIResponse(result) {
1534
- try {
1535
- const responseText = result.content || result.text || result.response || "";
1536
- if (!responseText) {
1537
- return { violations: [] };
1538
- }
1539
- // Extract JSON from response
1540
- const jsonMatch = responseText.match(/\{[\s\S]*\}/);
1541
- if (jsonMatch) {
1542
- return JSON.parse(jsonMatch[0]);
1543
- }
1544
- return { violations: [] };
1545
- }
1546
- catch (error) {
1547
- logger.debug(`Failed to parse AI response: ${error.message}`);
1548
- return { violations: [] };
1549
- }
1550
- }
1551
- /**
1552
- * Extract line information for comment from context
1553
- */
1554
- extractLineInfoForComment(violation, context) {
1555
- if (!violation.file || !violation.code_snippet) {
1556
- return null;
1557
- }
1558
- try {
1559
- // Get the diff for this specific file
1560
- let fileDiff;
1561
- if (context.diffStrategy.strategy === "whole" && context.prDiff) {
1562
- // Extract file diff from whole diff
1563
- const diffLines = context.prDiff.diff.split("\n");
1564
- let fileStartIndex = -1;
1565
- // Create all possible path variations for matching
1566
- const filePathVariations = this.generatePathVariations(violation.file);
1567
- for (let i = 0; i < diffLines.length; i++) {
1568
- const line = diffLines[i];
1569
- if (line.startsWith("diff --git")) {
1570
- // Check if any variation matches
1571
- for (const pathVariation of filePathVariations) {
1572
- if (line.includes(pathVariation)) {
1573
- fileStartIndex = i;
1574
- break;
1575
- }
1576
- }
1577
- if (fileStartIndex >= 0) {
1578
- break;
1579
- }
1580
- }
1581
- }
1582
- if (fileStartIndex >= 0) {
1583
- const nextFileIndex = diffLines.findIndex((line, idx) => idx > fileStartIndex && line.startsWith("diff --git"));
1584
- fileDiff = diffLines
1585
- .slice(fileStartIndex, nextFileIndex > 0 ? nextFileIndex : diffLines.length)
1586
- .join("\n");
1587
- }
1588
- }
1589
- else if (context.diffStrategy.strategy === "file-by-file" &&
1590
- context.fileDiffs) {
1591
- // Try all possible path variations
1592
- const pathVariations = this.generatePathVariations(violation.file);
1593
- for (const path of pathVariations) {
1594
- fileDiff = context.fileDiffs.get(path);
1595
- if (fileDiff) {
1596
- logger.debug(`Found diff for ${violation.file} using variation: ${path}`);
1597
- break;
1598
- }
1599
- }
1600
- // If still not found, try to find by partial match
1601
- if (!fileDiff) {
1602
- for (const [key, value] of context.fileDiffs.entries()) {
1603
- if (key.endsWith(violation.file) || violation.file.endsWith(key)) {
1604
- fileDiff = value;
1605
- logger.debug(`Found diff for ${violation.file} using partial match: ${key}`);
1606
- break;
1607
- }
1608
- }
1609
- }
1610
- }
1611
- if (fileDiff) {
1612
- const lineInfo = this.extractLineNumberFromDiff(fileDiff, violation.code_snippet);
1613
- if (lineInfo) {
1614
- logger.debug(`Extracted line info for ${violation.file}: line ${lineInfo.lineNumber}, type ${lineInfo.lineType}`);
1615
- }
1616
- return lineInfo;
1617
- }
1618
- else {
1619
- logger.debug(`No diff found for file: ${violation.file}`);
1620
- }
1621
- }
1622
- catch (error) {
1623
- logger.debug(`Error extracting line info: ${error.message}`);
1624
- }
1625
- return null;
1626
- }
1627
- /**
1628
- * Detect programming language from file extension
1629
- */
1630
- detectLanguageFromFile(filePath) {
1631
- const ext = filePath.split(".").pop()?.toLowerCase();
1632
- const languageMap = {
1633
- js: "javascript",
1634
- jsx: "javascript",
1635
- ts: "typescript",
1636
- tsx: "typescript",
1637
- py: "python",
1638
- java: "java",
1639
- cpp: "cpp",
1640
- c: "c",
1641
- cs: "csharp",
1642
- php: "php",
1643
- rb: "ruby",
1644
- go: "go",
1645
- rs: "rust",
1646
- res: "rescript",
1647
- kt: "kotlin",
1648
- swift: "swift",
1649
- scala: "scala",
1650
- sh: "bash",
1651
- sql: "sql",
1652
- json: "json",
1653
- yaml: "yaml",
1654
- yml: "yaml",
1655
- xml: "xml",
1656
- html: "html",
1657
- css: "css",
1658
- scss: "scss",
1659
- sass: "sass",
1660
- md: "markdown",
1661
- };
1662
- return languageMap[ext || ""] || "text";
1663
- }
1664
- /**
1665
- * Generate all possible path variations for a file
1666
- */
1667
- generatePathVariations(filePath) {
1668
- const variations = new Set();
1669
- // Add original path
1670
- variations.add(filePath);
1671
- // Add with a/ and b/ prefixes
1672
- variations.add(`a/${filePath}`);
1673
- variations.add(`b/${filePath}`);
1674
- // Handle nested paths
1675
- if (filePath.includes("/")) {
1676
- const parts = filePath.split("/");
1677
- // Try removing first directory
1678
- if (parts.length > 1) {
1679
- variations.add(parts.slice(1).join("/"));
1680
- }
1681
- // Try removing first two directories
1682
- if (parts.length > 2) {
1683
- variations.add(parts.slice(2).join("/"));
1684
- }
1685
- // Try with just the filename
1686
- variations.add(parts[parts.length - 1]);
1687
- }
1688
- // Remove app/ prefix variations
1689
- if (filePath.startsWith("app/")) {
1690
- const withoutApp = filePath.substring(4);
1691
- variations.add(withoutApp);
1692
- variations.add(`a/${withoutApp}`);
1693
- variations.add(`b/${withoutApp}`);
1694
- }
1695
- // Add app/ prefix variations
1696
- if (!filePath.startsWith("app/")) {
1697
- variations.add(`app/${filePath}`);
1698
- variations.add(`a/app/${filePath}`);
1699
- variations.add(`b/app/${filePath}`);
1700
- }
1701
- return Array.from(variations);
1702
- }
1703
- }
1704
- export function createCodeReviewer(bitbucketProvider, aiConfig, reviewConfig) {
1705
- return new CodeReviewer(bitbucketProvider, aiConfig, reviewConfig);
1706
- }
1707
- //# sourceMappingURL=CodeReviewer.js.map