@juspay/yama 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,215 @@
1
+ /**
2
+ * Content Similarity Service for Semantic Deduplication
3
+ * Uses AI to compare violations with existing PR comments for semantic similarity
4
+ */
5
+ import { logger } from "./Logger.js";
6
+ /**
7
+ * Service for calculating semantic similarity between violations and PR comments
8
+ */
9
+ export class ContentSimilarityService {
10
+ neurolink;
11
+ aiConfig;
12
+ constructor(aiConfig) {
13
+ this.aiConfig = aiConfig;
14
+ }
15
+ /**
16
+ * Calculate similarity scores between violations and comments in batches
17
+ */
18
+ async batchCalculateSimilarity(violations, comments, batchSize = 15) {
19
+ const startTime = Date.now();
20
+ logger.debug(`🔍 Starting semantic similarity analysis: ${violations.length} violations vs ${comments.length} comments`);
21
+ if (violations.length === 0 || comments.length === 0) {
22
+ logger.debug("⏭️ No violations or comments to compare, skipping similarity analysis");
23
+ return [];
24
+ }
25
+ // Prepare violation and comment content for AI analysis
26
+ const violationData = this.prepareViolationContent(violations);
27
+ const commentData = this.prepareCommentContent(comments);
28
+ logger.debug(`📝 Prepared ${violationData.length} violations and ${commentData.length} comments for analysis`);
29
+ // Process in batches to manage token limits
30
+ const allResults = [];
31
+ const totalBatches = Math.ceil(violations.length / batchSize);
32
+ for (let i = 0; i < violations.length; i += batchSize) {
33
+ const batchIndex = Math.floor(i / batchSize) + 1;
34
+ const violationBatch = violationData.slice(i, i + batchSize);
35
+ logger.debug(`🔄 Processing batch ${batchIndex}/${totalBatches} (${violationBatch.length} violations)`);
36
+ try {
37
+ const batchResults = await this.processSimilarityBatch(violationBatch, commentData);
38
+ allResults.push(...batchResults);
39
+ logger.debug(`✅ Batch ${batchIndex} completed: ${batchResults.length} similarity scores calculated`);
40
+ // Add delay between batches to avoid rate limiting
41
+ if (batchIndex < totalBatches) {
42
+ await this.delay(1000);
43
+ }
44
+ }
45
+ catch (error) {
46
+ logger.error(`❌ Batch ${batchIndex} failed: ${error.message}`);
47
+ // Continue with next batch instead of failing entirely
48
+ }
49
+ }
50
+ const processingTime = Date.now() - startTime;
51
+ logger.success(`✅ Semantic similarity analysis completed: ${allResults.length} comparisons in ${processingTime}ms`);
52
+ return allResults;
53
+ }
54
+ /**
55
+ * Prepare violation content for AI analysis
56
+ */
57
+ prepareViolationContent(violations) {
58
+ return violations.map((violation, index) => ({
59
+ index,
60
+ id: `violation_${index}`,
61
+ content: this.extractViolationContent(violation),
62
+ }));
63
+ }
64
+ /**
65
+ * Prepare comment content for AI analysis
66
+ */
67
+ prepareCommentContent(comments) {
68
+ return comments.map((comment, index) => ({
69
+ index,
70
+ id: comment.id,
71
+ content: this.extractCommentContent(comment),
72
+ }));
73
+ }
74
+ /**
75
+ * Extract meaningful content from violation for comparison
76
+ */
77
+ extractViolationContent(violation) {
78
+ const parts = [
79
+ `Issue: ${violation.issue}`,
80
+ `Message: ${violation.message}`,
81
+ violation.file ? `File: ${violation.file}` : "",
82
+ violation.code_snippet ? `Code: ${violation.code_snippet}` : "",
83
+ `Severity: ${violation.severity}`,
84
+ `Category: ${violation.category}`,
85
+ ].filter(Boolean);
86
+ return parts.join(" | ");
87
+ }
88
+ /**
89
+ * Extract meaningful content from comment for comparison
90
+ */
91
+ extractCommentContent(comment) {
92
+ const parts = [
93
+ `Comment: ${comment.text}`,
94
+ comment.anchor?.filePath ? `File: ${comment.anchor.filePath}` : "",
95
+ `Author: ${comment.author.displayName || comment.author.name}`,
96
+ ].filter(Boolean);
97
+ return parts.join(" | ");
98
+ }
99
+ /**
100
+ * Process a single batch of violations against all comments
101
+ */
102
+ async processSimilarityBatch(violationBatch, commentData) {
103
+ const prompt = this.createSimilarityPrompt(violationBatch, commentData);
104
+ try {
105
+ // Initialize NeuroLink if not already done
106
+ if (!this.neurolink) {
107
+ const { NeuroLink } = await import("@juspay/neurolink");
108
+ this.neurolink = new NeuroLink();
109
+ }
110
+ // Use NeuroLink for AI analysis
111
+ const result = await this.neurolink.generate({
112
+ input: { text: prompt },
113
+ systemPrompt: "You are an expert code reviewer analyzing semantic similarity between violations and comments. Provide accurate similarity scores based on content analysis.",
114
+ provider: this.aiConfig.provider || "auto",
115
+ model: this.aiConfig.model || "best",
116
+ temperature: 0.1, // Low temperature for consistent similarity scoring
117
+ maxTokens: this.aiConfig.maxTokens || 4000,
118
+ timeout: "5m",
119
+ enableAnalytics: this.aiConfig.enableAnalytics || false,
120
+ enableEvaluation: false,
121
+ });
122
+ return this.parseSimilarityResponse(result.content, violationBatch, commentData);
123
+ }
124
+ catch (error) {
125
+ logger.error(`Failed to process similarity batch: ${error.message}`);
126
+ throw error;
127
+ }
128
+ }
129
+ /**
130
+ * Create AI prompt for similarity analysis
131
+ */
132
+ createSimilarityPrompt(violations, comments) {
133
+ const violationList = violations
134
+ .map((v, i) => `${i + 1}. ${v.content}`)
135
+ .join("\n");
136
+ const commentList = comments
137
+ .map((c, i) => `${i + 1}. ${c.content.substring(0, 300)}${c.content.length > 300 ? "..." : ""}`)
138
+ .join("\n");
139
+ return `
140
+ Analyze the semantic similarity between these code review violations and existing PR comments.
141
+
142
+ NEW VIOLATIONS TO CHECK:
143
+ ${violationList}
144
+
145
+ EXISTING PR COMMENTS:
146
+ ${commentList}
147
+
148
+ For each violation, determine if it's semantically similar to any existing comment. Consider:
149
+ - Same or similar issues being reported
150
+ - Same file or code area being discussed
151
+ - Similar concerns or suggestions
152
+ - Related security, performance, or code quality topics
153
+
154
+ Return a JSON array with similarity scores (0-100) for each violation-comment pair that has meaningful similarity (score >= 70).
155
+
156
+ Format: [{"violation": 1, "comment": 2, "score": 85, "reasoning": "Both discuss the same security vulnerability in authentication"}, ...]
157
+
158
+ Only include pairs with scores >= 70. If no meaningful similarities exist, return an empty array [].
159
+ `.trim();
160
+ }
161
+ /**
162
+ * Parse AI response to extract similarity results
163
+ */
164
+ parseSimilarityResponse(response, violationBatch, commentData) {
165
+ try {
166
+ // Extract JSON from response
167
+ const jsonMatch = response.match(/\[[\s\S]*\]/);
168
+ if (!jsonMatch) {
169
+ logger.debug("No JSON array found in AI response, assuming no similarities");
170
+ return [];
171
+ }
172
+ const similarities = JSON.parse(jsonMatch[0]);
173
+ const results = [];
174
+ for (const similarity of similarities) {
175
+ const violationIndex = similarity.violation - 1; // Convert from 1-based to 0-based
176
+ const commentIndex = similarity.comment - 1;
177
+ if (violationIndex >= 0 &&
178
+ violationIndex < violationBatch.length &&
179
+ commentIndex >= 0 &&
180
+ commentIndex < commentData.length) {
181
+ const violation = violationBatch[violationIndex];
182
+ const comment = commentData[commentIndex];
183
+ results.push({
184
+ violationIndex: violation.index,
185
+ commentIndex: comment.index,
186
+ violationId: violation.id,
187
+ commentId: comment.id,
188
+ similarityScore: similarity.score,
189
+ reasoning: similarity.reasoning,
190
+ });
191
+ }
192
+ }
193
+ logger.debug(`📊 Parsed ${results.length} similarity results from AI response`);
194
+ return results;
195
+ }
196
+ catch (error) {
197
+ logger.error(`Failed to parse similarity response: ${error.message}`);
198
+ logger.debug(`Raw response: ${response}`);
199
+ return [];
200
+ }
201
+ }
202
+ /**
203
+ * Simple delay utility for rate limiting
204
+ */
205
+ delay(ms) {
206
+ return new Promise((resolve) => setTimeout(resolve, ms));
207
+ }
208
+ }
209
+ /**
210
+ * Factory function to create ContentSimilarityService
211
+ */
212
+ export function createContentSimilarityService(aiConfig) {
213
+ return new ContentSimilarityService(aiConfig);
214
+ }
215
+ //# sourceMappingURL=ContentSimilarityService.js.map
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Exact Duplicate Removal Utility for Multi-Instance Processing
3
+ * Handles deduplication of violations from multiple Neurolink SDK instances
4
+ */
5
+ import { Violation, DeduplicationResult, InstanceResult, PRComment, CommentDeduplicationResult, AIProviderConfig } from "../types/index.js";
6
+ /**
7
+ * Exact Duplicate Remover for Multi-Instance Results
8
+ * Implements multi-level deduplication strategy
9
+ */
10
+ export declare class ExactDuplicateRemover {
11
+ /**
12
+ * Remove exact duplicates from multiple instance results
13
+ */
14
+ removeDuplicates(instanceResults: InstanceResult[]): DeduplicationResult;
15
+ /**
16
+ * Flatten violations from all instances with source tracking
17
+ */
18
+ private flattenViolationsWithSource;
19
+ /**
20
+ * Remove exact hash duplicates (Level 1)
21
+ */
22
+ private removeExactHashDuplicates;
23
+ /**
24
+ * Remove normalized duplicates (Level 2)
25
+ */
26
+ private removeNormalizedDuplicates;
27
+ /**
28
+ * Remove same file+line duplicates (Level 3)
29
+ */
30
+ private removeSameLineDuplicates;
31
+ /**
32
+ * Create hash for exact violation matching
33
+ */
34
+ private createViolationHash;
35
+ /**
36
+ * Create hash for normalized violation matching
37
+ */
38
+ private createNormalizedViolationHash;
39
+ /**
40
+ * Normalize code snippet for comparison
41
+ */
42
+ private normalizeCodeSnippet;
43
+ /**
44
+ * Normalize text for comparison
45
+ */
46
+ private normalizeText;
47
+ /**
48
+ * Resolve duplicate by severity (and potentially other factors)
49
+ */
50
+ private resolveDuplicateBySeverity;
51
+ /**
52
+ * Track which instance contributed how many violations
53
+ */
54
+ private trackContributions;
55
+ /**
56
+ * Remove source tracking information from violation
57
+ */
58
+ private stripSourceInfo;
59
+ /**
60
+ * Remove violations that duplicate existing PR comments using semantic similarity
61
+ * Uses AI-powered ContentSimilarityService for intelligent deduplication
62
+ */
63
+ removeAgainstExistingComments(newViolations: Violation[], existingComments: PRComment[], aiConfig: AIProviderConfig, similarityThreshold?: number): Promise<CommentDeduplicationResult>;
64
+ /**
65
+ * Get detailed deduplication statistics
66
+ */
67
+ getDeduplicationStats(result: DeduplicationResult): string;
68
+ /**
69
+ * Get detailed comment deduplication statistics
70
+ */
71
+ getCommentDeduplicationStats(result: CommentDeduplicationResult): string;
72
+ }
73
+ /**
74
+ * Factory function to create ExactDuplicateRemover
75
+ */
76
+ export declare function createExactDuplicateRemover(): ExactDuplicateRemover;
77
+ //# sourceMappingURL=ExactDuplicateRemover.d.ts.map
@@ -0,0 +1,361 @@
1
+ /**
2
+ * Exact Duplicate Removal Utility for Multi-Instance Processing
3
+ * Handles deduplication of violations from multiple Neurolink SDK instances
4
+ */
5
+ import { createHash } from "crypto";
6
+ import { logger } from "./Logger.js";
7
+ /**
8
+ * Exact Duplicate Remover for Multi-Instance Results
9
+ * Implements multi-level deduplication strategy
10
+ */
11
+ export class ExactDuplicateRemover {
12
+ /**
13
+ * Remove exact duplicates from multiple instance results
14
+ */
15
+ removeDuplicates(instanceResults) {
16
+ const startTime = Date.now();
17
+ logger.debug("🔍 Starting exact duplicate removal process");
18
+ // Step 1: Flatten all violations with source tracking
19
+ const allViolations = this.flattenViolationsWithSource(instanceResults);
20
+ logger.debug(`📊 Total violations from all instances: ${allViolations.length}`);
21
+ // Step 2: Remove exact hash duplicates
22
+ const exactDuplicates = this.removeExactHashDuplicates(allViolations);
23
+ logger.debug(`🎯 Exact duplicates removed: ${exactDuplicates.removed}`);
24
+ // Step 3: Remove normalized duplicates
25
+ const normalizedDuplicates = this.removeNormalizedDuplicates(exactDuplicates.unique);
26
+ logger.debug(`📝 Normalized duplicates removed: ${normalizedDuplicates.removed}`);
27
+ // Step 4: Remove same file+line duplicates
28
+ const finalResult = this.removeSameLineDuplicates(normalizedDuplicates.unique);
29
+ logger.debug(`📍 Same-line duplicates removed: ${finalResult.removed}`);
30
+ // Step 5: Track contributions and create metrics
31
+ const instanceContributions = this.trackContributions(finalResult.unique);
32
+ const processingTime = Date.now() - startTime;
33
+ const metrics = {
34
+ totalViolationsInput: allViolations.length,
35
+ exactDuplicatesRemoved: exactDuplicates.removed,
36
+ normalizedDuplicatesRemoved: normalizedDuplicates.removed,
37
+ sameLineDuplicatesRemoved: finalResult.removed,
38
+ finalUniqueViolations: finalResult.unique.length,
39
+ deduplicationRate: ((allViolations.length - finalResult.unique.length) /
40
+ allViolations.length) *
41
+ 100,
42
+ instanceContributions: Object.fromEntries(instanceContributions),
43
+ processingTimeMs: processingTime,
44
+ };
45
+ logger.success(`✅ Deduplication completed: ${allViolations.length} → ${finalResult.unique.length} violations ` +
46
+ `(${metrics.deduplicationRate.toFixed(1)}% reduction) in ${processingTime}ms`);
47
+ return {
48
+ uniqueViolations: finalResult.unique.map((v) => this.stripSourceInfo(v)),
49
+ duplicatesRemoved: {
50
+ exactDuplicates: exactDuplicates.removed,
51
+ normalizedDuplicates: normalizedDuplicates.removed,
52
+ sameLineDuplicates: finalResult.removed,
53
+ },
54
+ instanceContributions,
55
+ processingMetrics: metrics,
56
+ };
57
+ }
58
+ /**
59
+ * Flatten violations from all instances with source tracking
60
+ */
61
+ flattenViolationsWithSource(instanceResults) {
62
+ const allViolations = [];
63
+ for (const result of instanceResults) {
64
+ if (!result.success || !result.violations) {
65
+ logger.debug(`⚠️ Skipping failed instance: ${result.instanceName}`);
66
+ continue;
67
+ }
68
+ result.violations.forEach((violation, index) => {
69
+ allViolations.push({
70
+ ...violation,
71
+ source: result.instanceName,
72
+ originalIndex: index,
73
+ });
74
+ });
75
+ }
76
+ return allViolations;
77
+ }
78
+ /**
79
+ * Remove exact hash duplicates (Level 1)
80
+ */
81
+ removeExactHashDuplicates(violations) {
82
+ const seenHashes = new Set();
83
+ const unique = [];
84
+ let removed = 0;
85
+ for (const violation of violations) {
86
+ const hash = this.createViolationHash(violation);
87
+ if (!seenHashes.has(hash)) {
88
+ seenHashes.add(hash);
89
+ unique.push(violation);
90
+ }
91
+ else {
92
+ removed++;
93
+ logger.debug(`🔄 Exact duplicate removed: ${violation.issue} (${violation.source})`);
94
+ }
95
+ }
96
+ return { unique, removed };
97
+ }
98
+ /**
99
+ * Remove normalized duplicates (Level 2)
100
+ */
101
+ removeNormalizedDuplicates(violations) {
102
+ const seenNormalizedHashes = new Set();
103
+ const unique = [];
104
+ let removed = 0;
105
+ for (const violation of violations) {
106
+ const normalizedHash = this.createNormalizedViolationHash(violation);
107
+ if (!seenNormalizedHashes.has(normalizedHash)) {
108
+ seenNormalizedHashes.add(normalizedHash);
109
+ unique.push(violation);
110
+ }
111
+ else {
112
+ removed++;
113
+ logger.debug(`📝 Normalized duplicate removed: ${violation.issue} (${violation.source})`);
114
+ }
115
+ }
116
+ return { unique, removed };
117
+ }
118
+ /**
119
+ * Remove same file+line duplicates (Level 3)
120
+ */
121
+ removeSameLineDuplicates(violations) {
122
+ const fileLineMap = new Map();
123
+ const uniqueMap = new Map();
124
+ let removed = 0;
125
+ for (const violation of violations) {
126
+ if (!violation.file || !violation.code_snippet) {
127
+ uniqueMap.set(`${violation.file}_${violation.originalIndex}`, violation);
128
+ continue;
129
+ }
130
+ const fileKey = violation.file;
131
+ const lineKey = this.normalizeCodeSnippet(violation.code_snippet);
132
+ const uniqueKey = `${violation.file}_${violation.originalIndex}`;
133
+ if (!fileLineMap.has(fileKey)) {
134
+ fileLineMap.set(fileKey, new Map());
135
+ }
136
+ const linesInFile = fileLineMap.get(fileKey);
137
+ if (linesInFile.has(lineKey)) {
138
+ // Duplicate found - resolve by severity and instance quality
139
+ const existing = linesInFile.get(lineKey);
140
+ const better = this.resolveDuplicateBySeverity([existing, violation]);
141
+ if (better === violation) {
142
+ // Replace existing with current
143
+ linesInFile.set(lineKey, violation);
144
+ // Remove existing from unique map and add current
145
+ const existingKey = `${existing.file}_${existing.originalIndex}`;
146
+ uniqueMap.delete(existingKey);
147
+ uniqueMap.set(uniqueKey, violation);
148
+ }
149
+ removed++;
150
+ logger.debug(`📍 Same-line duplicate resolved: ${violation.issue} (${violation.source})`);
151
+ }
152
+ else {
153
+ linesInFile.set(lineKey, violation);
154
+ uniqueMap.set(uniqueKey, violation);
155
+ }
156
+ }
157
+ return { unique: Array.from(uniqueMap.values()), removed };
158
+ }
159
+ /**
160
+ * Create hash for exact violation matching
161
+ */
162
+ createViolationHash(violation) {
163
+ const key = {
164
+ file: violation.file?.trim(),
165
+ code_snippet: violation.code_snippet?.trim(),
166
+ severity: violation.severity,
167
+ category: violation.category,
168
+ issue: violation.issue.trim(),
169
+ message: violation.message.trim(),
170
+ };
171
+ return createHash("sha256").update(JSON.stringify(key)).digest("hex");
172
+ }
173
+ /**
174
+ * Create hash for normalized violation matching
175
+ */
176
+ createNormalizedViolationHash(violation) {
177
+ const normalized = {
178
+ file: violation.file?.toLowerCase().trim(),
179
+ code_snippet: this.normalizeCodeSnippet(violation.code_snippet || ""),
180
+ severity: violation.severity,
181
+ category: violation.category,
182
+ issue: this.normalizeText(violation.issue),
183
+ message: this.normalizeText(violation.message),
184
+ };
185
+ return createHash("sha256")
186
+ .update(JSON.stringify(normalized))
187
+ .digest("hex");
188
+ }
189
+ /**
190
+ * Normalize code snippet for comparison
191
+ */
192
+ normalizeCodeSnippet(snippet) {
193
+ return snippet
194
+ .replace(/\s+/g, " ") // Normalize whitespace
195
+ .replace(/['"]/g, '"') // Normalize quotes
196
+ .replace(/;+$/, "") // Remove trailing semicolons
197
+ .replace(/[{}]/g, "") // Remove braces for comparison
198
+ .trim()
199
+ .toLowerCase();
200
+ }
201
+ /**
202
+ * Normalize text for comparison
203
+ */
204
+ normalizeText(text) {
205
+ return text
206
+ .toLowerCase()
207
+ .replace(/[^\w\s]/g, "") // Remove punctuation
208
+ .replace(/\s+/g, " ") // Normalize whitespace
209
+ .trim();
210
+ }
211
+ /**
212
+ * Resolve duplicate by severity (and potentially other factors)
213
+ */
214
+ resolveDuplicateBySeverity(duplicates) {
215
+ const severityOrder = {
216
+ CRITICAL: 4,
217
+ MAJOR: 3,
218
+ MINOR: 2,
219
+ SUGGESTION: 1,
220
+ };
221
+ return duplicates.reduce((best, current) => {
222
+ const bestScore = severityOrder[best.severity] || 0;
223
+ const currentScore = severityOrder[current.severity] || 0;
224
+ if (currentScore > bestScore) {
225
+ return current;
226
+ }
227
+ else if (currentScore === bestScore) {
228
+ // Same severity - could add more sophisticated logic here
229
+ // For now, prefer the first one (could be based on instance quality)
230
+ return best;
231
+ }
232
+ return best;
233
+ });
234
+ }
235
+ /**
236
+ * Track which instance contributed how many violations
237
+ */
238
+ trackContributions(violations) {
239
+ const contributions = new Map();
240
+ for (const violation of violations) {
241
+ const current = contributions.get(violation.source) || 0;
242
+ contributions.set(violation.source, current + 1);
243
+ }
244
+ return contributions;
245
+ }
246
+ /**
247
+ * Remove source tracking information from violation
248
+ */
249
+ stripSourceInfo(violation) {
250
+ const { source, originalIndex, ...cleanViolation } = violation;
251
+ return cleanViolation;
252
+ }
253
+ /**
254
+ * Remove violations that duplicate existing PR comments using semantic similarity
255
+ * Uses AI-powered ContentSimilarityService for intelligent deduplication
256
+ */
257
+ async removeAgainstExistingComments(newViolations, existingComments, aiConfig, similarityThreshold = 85) {
258
+ const startTime = Date.now();
259
+ logger.debug("🔍 Starting semantic comment deduplication process");
260
+ logger.debug(`📊 New violations: ${newViolations.length}, Existing comments: ${existingComments.length}`);
261
+ logger.debug(`🎯 Similarity threshold: ${similarityThreshold}%`);
262
+ if (newViolations.length === 0 || existingComments.length === 0) {
263
+ logger.debug("⏭️ No violations or comments to compare, skipping deduplication");
264
+ return {
265
+ uniqueViolations: newViolations,
266
+ duplicatesRemoved: 0,
267
+ semanticMatches: [],
268
+ };
269
+ }
270
+ try {
271
+ // Use ContentSimilarityService for semantic analysis
272
+ const { ContentSimilarityService } = await import("./ContentSimilarityService.js");
273
+ const similarityService = new ContentSimilarityService(aiConfig);
274
+ // Get similarity results
275
+ const similarityResults = await similarityService.batchCalculateSimilarity(newViolations, existingComments, 15);
276
+ // Filter violations based on similarity threshold
277
+ const duplicateViolationIndices = new Set();
278
+ const semanticMatches = [];
279
+ for (const result of similarityResults) {
280
+ if (result.similarityScore >= similarityThreshold) {
281
+ duplicateViolationIndices.add(result.violationIndex);
282
+ const violation = newViolations[result.violationIndex];
283
+ const comment = existingComments[result.commentIndex];
284
+ semanticMatches.push({
285
+ violation: violation.issue,
286
+ comment: `Comment ${comment.id}`,
287
+ similarityScore: result.similarityScore,
288
+ reasoning: result.reasoning,
289
+ });
290
+ logger.debug(`🎯 Semantic duplicate found: "${violation.issue}" matches comment ${comment.id} ` +
291
+ `(${result.similarityScore}% similarity)`);
292
+ }
293
+ }
294
+ // Create final list of unique violations
295
+ const uniqueViolations = newViolations.filter((_, index) => !duplicateViolationIndices.has(index));
296
+ const processingTime = Date.now() - startTime;
297
+ const duplicatesRemoved = duplicateViolationIndices.size;
298
+ logger.success(`✅ Semantic deduplication completed: ${newViolations.length} → ${uniqueViolations.length} violations ` +
299
+ `(${duplicatesRemoved} duplicates removed) in ${processingTime}ms`);
300
+ return {
301
+ uniqueViolations,
302
+ duplicatesRemoved,
303
+ semanticMatches,
304
+ };
305
+ }
306
+ catch (error) {
307
+ logger.error(`❌ Semantic deduplication failed: ${error.message}`);
308
+ logger.warn("⚠️ Falling back to no deduplication - returning all violations");
309
+ // Graceful fallback: return all violations if AI analysis fails
310
+ return {
311
+ uniqueViolations: newViolations,
312
+ duplicatesRemoved: 0,
313
+ semanticMatches: [],
314
+ };
315
+ }
316
+ }
317
+ /**
318
+ * Get detailed deduplication statistics
319
+ */
320
+ getDeduplicationStats(result) {
321
+ const metrics = result.processingMetrics;
322
+ const contributions = Array.from(result.instanceContributions.entries())
323
+ .map(([instance, count]) => `${instance}: ${count}`)
324
+ .join(", ");
325
+ return `
326
+ 📊 Deduplication Statistics:
327
+ • Input violations: ${metrics.totalViolationsInput}
328
+ • Exact duplicates removed: ${metrics.exactDuplicatesRemoved}
329
+ • Normalized duplicates removed: ${metrics.normalizedDuplicatesRemoved}
330
+ • Same-line duplicates removed: ${metrics.sameLineDuplicatesRemoved}
331
+ • Final unique violations: ${metrics.finalUniqueViolations}
332
+ • Deduplication rate: ${metrics.deduplicationRate.toFixed(1)}%
333
+ • Processing time: ${metrics.processingTimeMs}ms
334
+ • Instance contributions: ${contributions}
335
+ `.trim();
336
+ }
337
+ /**
338
+ * Get detailed comment deduplication statistics
339
+ */
340
+ getCommentDeduplicationStats(result) {
341
+ const averageSimilarity = result.semanticMatches.length > 0
342
+ ? result.semanticMatches.reduce((sum, match) => sum + match.similarityScore, 0) / result.semanticMatches.length
343
+ : 0;
344
+ return `
345
+ 📊 Comment Deduplication Statistics:
346
+ • Input violations: ${result.uniqueViolations.length + result.duplicatesRemoved}
347
+ • Unique violations: ${result.uniqueViolations.length}
348
+ • Duplicates removed: ${result.duplicatesRemoved}
349
+ • Deduplication rate: ${((result.duplicatesRemoved / (result.uniqueViolations.length + result.duplicatesRemoved)) * 100).toFixed(1)}%
350
+ • Semantic matches: ${result.semanticMatches.length}
351
+ • Average similarity score: ${averageSimilarity.toFixed(1)}%
352
+ `.trim();
353
+ }
354
+ }
355
+ /**
356
+ * Factory function to create ExactDuplicateRemover
357
+ */
358
+ export function createExactDuplicateRemover() {
359
+ return new ExactDuplicateRemover();
360
+ }
361
+ //# sourceMappingURL=ExactDuplicateRemover.js.map
@@ -5,7 +5,8 @@
5
5
  import { Logger as ILogger, LogLevel, LoggerOptions } from "../types/index.js";
6
6
  export declare class Logger implements ILogger {
7
7
  private options;
8
- constructor(options?: Partial<LoggerOptions>);
8
+ private showBanner;
9
+ constructor(options?: Partial<LoggerOptions>, showBanner?: boolean);
9
10
  private shouldLog;
10
11
  private formatMessage;
11
12
  private colorize;
@@ -26,5 +27,5 @@ export declare class Logger implements ILogger {
26
27
  getConfig(): LoggerOptions;
27
28
  }
28
29
  export declare const logger: Logger;
29
- export declare function createLogger(options?: Partial<LoggerOptions>): Logger;
30
+ export declare function createLogger(options?: Partial<LoggerOptions>, showBanner?: boolean): Logger;
30
31
  //# sourceMappingURL=Logger.d.ts.map