@juspay/yama 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Multi-Instance Processor for Parallel Code Review
3
+ * Orchestrates multiple Neurolink SDK instances for diverse code analysis
4
+ */
5
+ import { MultiInstanceConfig, MultiInstanceResult, CodeReviewConfig, ReviewOptions } from "../types/index.js";
6
+ import { UnifiedContext } from "../core/ContextGatherer.js";
7
+ import { BitbucketProvider } from "../core/providers/BitbucketProvider.js";
8
+ /**
9
+ * Multi-Instance Processor
10
+ * Manages parallel execution of multiple CodeReviewer instances
11
+ */
12
+ export declare class MultiInstanceProcessor {
13
+ private bitbucketProvider;
14
+ private baseReviewConfig;
15
+ private duplicateRemover;
16
+ constructor(bitbucketProvider: BitbucketProvider, baseReviewConfig: CodeReviewConfig);
17
+ /**
18
+ * Process code review using multiple instances
19
+ */
20
+ processWithMultipleInstances(context: UnifiedContext, options: ReviewOptions, multiInstanceConfig: MultiInstanceConfig): Promise<MultiInstanceResult>;
21
+ /**
22
+ * Execute all instances in parallel with concurrency control
23
+ */
24
+ private executeInstancesInParallel;
25
+ /**
26
+ * Execute a single instance with concurrency control
27
+ */
28
+ private executeInstanceWithConcurrency;
29
+ /**
30
+ * Validate provider string against allowed provider types
31
+ */
32
+ private validateProvider;
33
+ /**
34
+ * Execute a single instance
35
+ */
36
+ private executeInstance;
37
+ /**
38
+ * Validate multi-instance configuration
39
+ */
40
+ private validateMultiInstanceConfig;
41
+ /**
42
+ * Estimate tokens per instance based on context
43
+ */
44
+ private estimateTokensPerInstance;
45
+ /**
46
+ * Calculate total token budget for all instances
47
+ */
48
+ private calculateTotalTokenBudget;
49
+ /**
50
+ * Extract token usage from review result (if available)
51
+ */
52
+ private extractTokenUsage;
53
+ /**
54
+ * Create non-deduplicated result (when deduplication is disabled)
55
+ */
56
+ private createNonDeduplicatedResult;
57
+ /**
58
+ * Apply final filtering based on configuration
59
+ */
60
+ private applyFinalFiltering;
61
+ /**
62
+ * Prioritize violations based on strategy
63
+ */
64
+ private prioritizeViolations;
65
+ /**
66
+ * Create summary of multi-instance processing
67
+ */
68
+ private createSummary;
69
+ }
70
+ /**
71
+ * Factory function to create MultiInstanceProcessor
72
+ */
73
+ export declare function createMultiInstanceProcessor(bitbucketProvider: BitbucketProvider, baseReviewConfig: CodeReviewConfig): MultiInstanceProcessor;
74
+ //# sourceMappingURL=MultiInstanceProcessor.d.ts.map
@@ -0,0 +1,359 @@
1
+ /**
2
+ * Multi-Instance Processor for Parallel Code Review
3
+ * Orchestrates multiple Neurolink SDK instances for diverse code analysis
4
+ */
5
+ import { createCodeReviewer } from "./CodeReviewer.js";
6
+ import { createExactDuplicateRemover, } from "../utils/ExactDuplicateRemover.js";
7
+ import { Semaphore, TokenBudgetManager, calculateOptimalConcurrency, } from "../utils/ParallelProcessing.js";
8
+ import { getProviderTokenLimit } from "../utils/ProviderLimits.js";
9
+ import { logger } from "../utils/Logger.js";
10
+ /**
11
+ * Multi-Instance Processor
12
+ * Manages parallel execution of multiple CodeReviewer instances
13
+ */
14
+ export class MultiInstanceProcessor {
15
+ bitbucketProvider;
16
+ baseReviewConfig;
17
+ duplicateRemover;
18
+ constructor(bitbucketProvider, baseReviewConfig) {
19
+ this.bitbucketProvider = bitbucketProvider;
20
+ this.baseReviewConfig = baseReviewConfig;
21
+ this.duplicateRemover = createExactDuplicateRemover();
22
+ }
23
+ /**
24
+ * Process code review using multiple instances
25
+ */
26
+ async processWithMultipleInstances(context, options, multiInstanceConfig) {
27
+ const startTime = Date.now();
28
+ try {
29
+ logger.phase("🚀 Starting multi-instance code review processing");
30
+ logger.info(`🔄 Launching ${multiInstanceConfig.instanceCount} instances: ${multiInstanceConfig.instances.map((i) => i.name).join(", ")}`);
31
+ // Step 1: Validate configuration
32
+ this.validateMultiInstanceConfig(multiInstanceConfig);
33
+ // Step 2: Execute instances in parallel
34
+ const instanceResults = await this.executeInstancesInParallel(context, options, multiInstanceConfig);
35
+ // Step 3: Deduplicate results
36
+ const deduplicationResult = multiInstanceConfig.deduplication.enabled
37
+ ? this.duplicateRemover.removeDuplicates(instanceResults)
38
+ : this.createNonDeduplicatedResult(instanceResults);
39
+ // Step 4: Apply final filtering if configured
40
+ const finalViolations = this.applyFinalFiltering(deduplicationResult.uniqueViolations, multiInstanceConfig.deduplication);
41
+ // Step 5: Create summary
42
+ const totalProcessingTime = Date.now() - startTime;
43
+ const summary = this.createSummary(instanceResults, deduplicationResult, finalViolations, totalProcessingTime);
44
+ logger.success(`✅ Multi-instance processing completed: ${summary.totalViolationsFound} → ${summary.uniqueViolationsAfterDedup} violations ` +
45
+ `(${summary.deduplicationRate.toFixed(1)}% reduction) in ${Math.round(totalProcessingTime / 1000)}s`);
46
+ // Step 6: Log detailed statistics
47
+ if (logger.getConfig().verbose) {
48
+ logger.info(this.duplicateRemover.getDeduplicationStats(deduplicationResult));
49
+ }
50
+ return {
51
+ instances: instanceResults,
52
+ deduplication: deduplicationResult,
53
+ finalViolations,
54
+ summary,
55
+ };
56
+ }
57
+ catch (error) {
58
+ logger.error(`Multi-instance processing failed: ${error.message}`);
59
+ throw error;
60
+ }
61
+ }
62
+ /**
63
+ * Execute all instances in parallel with concurrency control
64
+ */
65
+ async executeInstancesInParallel(context, options, multiInstanceConfig) {
66
+ const instances = multiInstanceConfig.instances;
67
+ // Calculate optimal concurrency
68
+ const avgTokensPerInstance = this.estimateTokensPerInstance(context);
69
+ const totalTokenBudget = this.calculateTotalTokenBudget(instances);
70
+ const optimalConcurrency = calculateOptimalConcurrency(instances.length, Math.min(instances.length, 3), // Max 3 concurrent instances by default
71
+ avgTokensPerInstance, totalTokenBudget);
72
+ // Initialize concurrency control
73
+ const semaphore = new Semaphore(optimalConcurrency);
74
+ const tokenBudget = new TokenBudgetManager(totalTokenBudget);
75
+ logger.info(`🎯 Parallel execution: ${optimalConcurrency} concurrent instances, ${totalTokenBudget} total token budget`);
76
+ // Execute instances with controlled concurrency
77
+ const instancePromises = instances.map((instanceConfig, index) => this.executeInstanceWithConcurrency(instanceConfig, context, options, semaphore, tokenBudget, index, instances.length));
78
+ // Wait for all instances to complete
79
+ const results = await Promise.allSettled(instancePromises);
80
+ // Process results and handle failures
81
+ const instanceResults = [];
82
+ for (let i = 0; i < results.length; i++) {
83
+ const result = results[i];
84
+ const instanceConfig = instances[i];
85
+ if (result.status === "fulfilled") {
86
+ instanceResults.push(result.value);
87
+ }
88
+ else {
89
+ logger.error(`❌ Instance ${instanceConfig.name} failed: ${result.reason.message}`);
90
+ instanceResults.push({
91
+ instanceName: instanceConfig.name,
92
+ violations: [],
93
+ processingTime: 0,
94
+ error: result.reason.message,
95
+ success: false,
96
+ });
97
+ }
98
+ }
99
+ return instanceResults;
100
+ }
101
+ /**
102
+ * Execute a single instance with concurrency control
103
+ */
104
+ async executeInstanceWithConcurrency(instanceConfig, context, options, semaphore, tokenBudget, instanceIndex, totalInstances) {
105
+ // Acquire semaphore permit
106
+ await semaphore.acquire();
107
+ try {
108
+ const estimatedTokens = this.estimateTokensPerInstance(context);
109
+ // Check token budget
110
+ if (!tokenBudget.allocateForBatch(instanceIndex, estimatedTokens)) {
111
+ throw new Error(`Insufficient token budget for instance ${instanceConfig.name}`);
112
+ }
113
+ logger.info(`🔄 Processing instance ${instanceIndex + 1}/${totalInstances}: ${instanceConfig.name} ` +
114
+ `(${instanceConfig.provider}, temp: ${instanceConfig.temperature || "default"})`);
115
+ // Execute the instance
116
+ const result = await this.executeInstance(instanceConfig, context, options);
117
+ logger.info(`✅ Instance ${instanceConfig.name} completed: ${result.violations.length} violations ` +
118
+ `in ${Math.round(result.processingTime / 1000)}s`);
119
+ return result;
120
+ }
121
+ finally {
122
+ // Always release resources
123
+ tokenBudget.releaseBatch(instanceIndex);
124
+ semaphore.release();
125
+ }
126
+ }
127
+ /**
128
+ * Validate provider string against allowed provider types
129
+ */
130
+ validateProvider(provider) {
131
+ const validProviders = [
132
+ "auto",
133
+ "google-ai",
134
+ "openai",
135
+ "anthropic",
136
+ "azure",
137
+ "bedrock",
138
+ "vertex",
139
+ ];
140
+ if (!validProviders.includes(provider)) {
141
+ logger.warn(`Unknown provider '${provider}', falling back to 'auto'`);
142
+ return "auto";
143
+ }
144
+ return provider;
145
+ }
146
+ /**
147
+ * Execute a single instance
148
+ */
149
+ async executeInstance(instanceConfig, context, options) {
150
+ const startTime = Date.now();
151
+ try {
152
+ // Create instance-specific AI config
153
+ const aiConfig = {
154
+ provider: this.validateProvider(instanceConfig.provider),
155
+ model: instanceConfig.model,
156
+ temperature: instanceConfig.temperature,
157
+ maxTokens: instanceConfig.maxTokens,
158
+ timeout: instanceConfig.timeout,
159
+ enableAnalytics: true,
160
+ enableEvaluation: false,
161
+ };
162
+ // Create CodeReviewer for this instance
163
+ const codeReviewer = createCodeReviewer(this.bitbucketProvider, aiConfig, this.baseReviewConfig);
164
+ // Execute review with dry run to get violations without posting
165
+ const instanceOptions = { ...options, dryRun: true };
166
+ const reviewResult = await codeReviewer.reviewCodeWithContext(context, instanceOptions);
167
+ const processingTime = Date.now() - startTime;
168
+ return {
169
+ instanceName: instanceConfig.name,
170
+ violations: reviewResult.violations,
171
+ processingTime,
172
+ tokenUsage: this.extractTokenUsage(reviewResult),
173
+ success: true,
174
+ };
175
+ }
176
+ catch (error) {
177
+ const processingTime = Date.now() - startTime;
178
+ return {
179
+ instanceName: instanceConfig.name,
180
+ violations: [],
181
+ processingTime,
182
+ error: error.message,
183
+ success: false,
184
+ };
185
+ }
186
+ }
187
+ /**
188
+ * Validate multi-instance configuration
189
+ */
190
+ validateMultiInstanceConfig(config) {
191
+ if (!config.enabled) {
192
+ throw new Error("Multi-instance processing is not enabled");
193
+ }
194
+ if (config.instances.length === 0) {
195
+ throw new Error("No instances configured for multi-instance processing");
196
+ }
197
+ if (config.instances.length !== config.instanceCount) {
198
+ logger.warn(`Instance count mismatch: configured ${config.instanceCount}, found ${config.instances.length} instances`);
199
+ }
200
+ // Validate each instance
201
+ for (const instance of config.instances) {
202
+ if (!instance.name || !instance.provider) {
203
+ throw new Error(`Invalid instance configuration: name and provider are required`);
204
+ }
205
+ }
206
+ // Validate deduplication config
207
+ if (config.deduplication.enabled) {
208
+ if (config.deduplication.similarityThreshold < 0 ||
209
+ config.deduplication.similarityThreshold > 100) {
210
+ throw new Error("Similarity threshold must be between 0 and 100");
211
+ }
212
+ if (config.deduplication.maxCommentsToPost <= 0) {
213
+ throw new Error("Max comments to post must be greater than 0");
214
+ }
215
+ }
216
+ }
217
+ /**
218
+ * Estimate tokens per instance based on context
219
+ */
220
+ estimateTokensPerInstance(context) {
221
+ // Base estimation: context size + overhead
222
+ const contextSize = JSON.stringify(context).length;
223
+ const estimatedTokens = Math.ceil(contextSize / 4); // ~4 chars per token
224
+ // Add overhead for prompts and response
225
+ const overhead = 5000;
226
+ return estimatedTokens + overhead;
227
+ }
228
+ /**
229
+ * Calculate total token budget for all instances
230
+ */
231
+ calculateTotalTokenBudget(instances) {
232
+ // Use the most restrictive provider limit among all instances
233
+ let minLimit = Infinity;
234
+ for (const instance of instances) {
235
+ const providerLimit = getProviderTokenLimit(instance.provider, true);
236
+ const instanceLimit = instance.maxTokens || providerLimit;
237
+ minLimit = Math.min(minLimit, instanceLimit);
238
+ }
239
+ // Total budget is the sum of all instance limits, but with safety margin
240
+ const totalBudget = instances.length * minLimit * 0.8; // 80% safety margin
241
+ logger.debug(`Calculated total token budget: ${totalBudget} (${instances.length} instances × ${minLimit} × 0.8)`);
242
+ return totalBudget;
243
+ }
244
+ /**
245
+ * Extract token usage from review result (if available)
246
+ */
247
+ extractTokenUsage(reviewResult) {
248
+ // This would need to be implemented based on how NeuroLink returns usage data
249
+ // For now, return undefined as we don't have access to this data
250
+ return undefined;
251
+ }
252
+ /**
253
+ * Create non-deduplicated result (when deduplication is disabled)
254
+ */
255
+ createNonDeduplicatedResult(instanceResults) {
256
+ const allViolations = [];
257
+ const instanceContributions = new Map();
258
+ for (const result of instanceResults) {
259
+ if (result.success && result.violations) {
260
+ allViolations.push(...result.violations);
261
+ instanceContributions.set(result.instanceName, result.violations.length);
262
+ }
263
+ }
264
+ return {
265
+ uniqueViolations: allViolations,
266
+ duplicatesRemoved: {
267
+ exactDuplicates: 0,
268
+ normalizedDuplicates: 0,
269
+ sameLineDuplicates: 0,
270
+ },
271
+ instanceContributions,
272
+ processingMetrics: {
273
+ totalViolationsInput: allViolations.length,
274
+ exactDuplicatesRemoved: 0,
275
+ normalizedDuplicatesRemoved: 0,
276
+ sameLineDuplicatesRemoved: 0,
277
+ finalUniqueViolations: allViolations.length,
278
+ deduplicationRate: 0,
279
+ instanceContributions: Object.fromEntries(instanceContributions),
280
+ processingTimeMs: 0,
281
+ },
282
+ };
283
+ }
284
+ /**
285
+ * Apply final filtering based on configuration
286
+ */
287
+ applyFinalFiltering(violations, deduplicationConfig) {
288
+ if (!deduplicationConfig.maxCommentsToPost ||
289
+ violations.length <= deduplicationConfig.maxCommentsToPost) {
290
+ return violations;
291
+ }
292
+ logger.info(`📊 Applying final filtering: ${violations.length} → ${deduplicationConfig.maxCommentsToPost} violations`);
293
+ // Sort by priority based on configuration
294
+ const prioritized = this.prioritizeViolations(violations, deduplicationConfig.prioritizeBy);
295
+ // Take only the top N violations
296
+ const filtered = prioritized.slice(0, deduplicationConfig.maxCommentsToPost);
297
+ logger.info(`🎯 Final filtering applied: kept top ${filtered.length} violations prioritized by ${deduplicationConfig.prioritizeBy}`);
298
+ return filtered;
299
+ }
300
+ /**
301
+ * Prioritize violations based on strategy
302
+ */
303
+ prioritizeViolations(violations, strategy) {
304
+ const severityOrder = {
305
+ CRITICAL: 4,
306
+ MAJOR: 3,
307
+ MINOR: 2,
308
+ SUGGESTION: 1,
309
+ };
310
+ switch (strategy) {
311
+ case "severity":
312
+ return violations.sort((a, b) => {
313
+ const aScore = severityOrder[a.severity] || 0;
314
+ const bScore = severityOrder[b.severity] || 0;
315
+ return bScore - aScore; // Higher severity first
316
+ });
317
+ case "similarity":
318
+ case "confidence":
319
+ // For now, fall back to severity-based sorting
320
+ // These could be implemented with more sophisticated algorithms
321
+ logger.debug(`Prioritization strategy '${strategy}' not fully implemented, using severity`);
322
+ return this.prioritizeViolations(violations, "severity");
323
+ default:
324
+ logger.warn(`Unknown prioritization strategy: ${strategy}, using severity`);
325
+ return this.prioritizeViolations(violations, "severity");
326
+ }
327
+ }
328
+ /**
329
+ * Create summary of multi-instance processing
330
+ */
331
+ createSummary(instanceResults, deduplicationResult, finalViolations, totalProcessingTime) {
332
+ const successfulInstances = instanceResults.filter((r) => r.success).length;
333
+ const failedInstances = instanceResults.length - successfulInstances;
334
+ const totalViolationsFound = instanceResults
335
+ .filter((r) => r.success)
336
+ .reduce((sum, r) => sum + r.violations.length, 0);
337
+ const deduplicationRate = totalViolationsFound > 0
338
+ ? ((totalViolationsFound - finalViolations.length) /
339
+ totalViolationsFound) *
340
+ 100
341
+ : 0;
342
+ return {
343
+ totalInstances: instanceResults.length,
344
+ successfulInstances,
345
+ failedInstances,
346
+ totalViolationsFound,
347
+ uniqueViolationsAfterDedup: finalViolations.length,
348
+ deduplicationRate,
349
+ totalProcessingTime,
350
+ };
351
+ }
352
+ }
353
+ /**
354
+ * Factory function to create MultiInstanceProcessor
355
+ */
356
+ export function createMultiInstanceProcessor(bitbucketProvider, baseReviewConfig) {
357
+ return new MultiInstanceProcessor(bitbucketProvider, baseReviewConfig);
358
+ }
359
+ //# sourceMappingURL=MultiInstanceProcessor.js.map
@@ -163,7 +163,7 @@ export interface ReviewStatistics {
163
163
  minorCount: number;
164
164
  suggestionCount: number;
165
165
  batchCount?: number;
166
- processingStrategy?: "single-request" | "batch-processing";
166
+ processingStrategy?: "single-request" | "batch-processing" | "multi-instance";
167
167
  averageBatchSize?: number;
168
168
  totalProcessingTime?: number;
169
169
  }
@@ -280,6 +280,16 @@ export interface CodeReviewConfig {
280
280
  analysisTemplate?: string;
281
281
  focusAreas?: string[];
282
282
  batchProcessing?: BatchProcessingConfig;
283
+ multiInstance?: MultiInstanceConfig;
284
+ semanticDeduplication?: SemanticDeduplicationConfig;
285
+ }
286
+ export interface SemanticDeduplicationConfig {
287
+ enabled: boolean;
288
+ similarityThreshold: number;
289
+ batchSize: number;
290
+ timeout: string;
291
+ fallbackOnError: boolean;
292
+ logMatches: boolean;
283
293
  }
284
294
  export interface BatchProcessingConfig {
285
295
  enabled: boolean;
@@ -288,6 +298,93 @@ export interface BatchProcessingConfig {
288
298
  parallelBatches: boolean;
289
299
  batchDelayMs: number;
290
300
  singleRequestThreshold: number;
301
+ parallel?: {
302
+ enabled: boolean;
303
+ maxConcurrentBatches: number;
304
+ rateLimitStrategy: "fixed" | "adaptive";
305
+ tokenBudgetDistribution: "equal" | "weighted";
306
+ failureHandling: "stop-all" | "continue";
307
+ };
308
+ }
309
+ export interface MultiInstanceConfig {
310
+ enabled: boolean;
311
+ instanceCount: number;
312
+ instances: InstanceConfig[];
313
+ deduplication: DeduplicationConfig;
314
+ }
315
+ export interface InstanceConfig {
316
+ name: string;
317
+ provider: string;
318
+ model?: string;
319
+ temperature?: number;
320
+ maxTokens?: number;
321
+ weight?: number;
322
+ timeout?: string;
323
+ }
324
+ export interface DeduplicationConfig {
325
+ enabled: boolean;
326
+ similarityThreshold: number;
327
+ aiProvider?: string;
328
+ maxCommentsToPost: number;
329
+ prioritizeBy: "severity" | "similarity" | "confidence";
330
+ }
331
+ export interface InstanceResult {
332
+ instanceName: string;
333
+ violations: Violation[];
334
+ processingTime: number;
335
+ tokenUsage?: {
336
+ input: number;
337
+ output: number;
338
+ total: number;
339
+ };
340
+ error?: string;
341
+ success: boolean;
342
+ }
343
+ export interface DeduplicationResult {
344
+ uniqueViolations: Violation[];
345
+ duplicatesRemoved: {
346
+ exactDuplicates: number;
347
+ normalizedDuplicates: number;
348
+ sameLineDuplicates: number;
349
+ semanticDuplicates?: number;
350
+ };
351
+ instanceContributions: Map<string, number>;
352
+ processingMetrics: DeduplicationMetrics;
353
+ }
354
+ export interface DeduplicationMetrics {
355
+ totalViolationsInput: number;
356
+ exactDuplicatesRemoved: number;
357
+ normalizedDuplicatesRemoved: number;
358
+ sameLineDuplicatesRemoved: number;
359
+ semanticDuplicatesRemoved?: number;
360
+ finalUniqueViolations: number;
361
+ deduplicationRate: number;
362
+ instanceContributions: Record<string, number>;
363
+ processingTimeMs: number;
364
+ }
365
+ export interface CommentDeduplicationResult {
366
+ uniqueViolations: Violation[];
367
+ duplicatesRemoved: number;
368
+ semanticMatches: Array<{
369
+ violation: string;
370
+ comment: string;
371
+ similarityScore: number;
372
+ reasoning?: string;
373
+ }>;
374
+ }
375
+ export interface MultiInstanceResult {
376
+ instances: InstanceResult[];
377
+ deduplication: DeduplicationResult;
378
+ finalViolations: Violation[];
379
+ summary: {
380
+ totalInstances: number;
381
+ successfulInstances: number;
382
+ failedInstances: number;
383
+ totalViolationsFound: number;
384
+ uniqueViolationsAfterDedup: number;
385
+ deduplicationRate: number;
386
+ totalProcessingTime: number;
387
+ };
291
388
  }
292
389
  export interface DescriptionEnhancementConfig {
293
390
  enabled: boolean;
@@ -446,6 +543,28 @@ export interface Cache {
446
543
  size: number;
447
544
  };
448
545
  }
546
+ export interface ParallelProcessingMetrics {
547
+ totalBatches: number;
548
+ concurrentBatches: number;
549
+ parallelSpeedup: number;
550
+ tokenEfficiency: number;
551
+ failedBatches: number;
552
+ averageBatchTime: number;
553
+ totalProcessingTime: number;
554
+ serialProcessingTime?: number;
555
+ }
556
+ export interface SemaphoreInterface {
557
+ acquire(): Promise<void>;
558
+ release(): void;
559
+ getAvailablePermits(): number;
560
+ }
561
+ export interface TokenBudgetManagerInterface {
562
+ allocateForBatch(batchIndex: number, estimatedTokens: number): boolean;
563
+ releaseBatch(batchIndex: number): void;
564
+ getAvailableBudget(): number;
565
+ getTotalBudget(): number;
566
+ getUsedTokens(): number;
567
+ }
449
568
  export declare class GuardianError extends Error {
450
569
  code: string;
451
570
  context?: any | undefined;
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Content Similarity Service for Semantic Deduplication
3
+ * Uses AI to compare violations with existing PR comments for semantic similarity
4
+ */
5
+ import { Violation, PRComment, AIProviderConfig } from "../types/index.js";
6
+ export interface SimilarityResult {
7
+ violationIndex: number;
8
+ commentIndex: number;
9
+ violationId: string;
10
+ commentId: number;
11
+ similarityScore: number;
12
+ reasoning?: string;
13
+ }
14
+ export interface SimilarityBatch {
15
+ violations: Array<{
16
+ index: number;
17
+ id: string;
18
+ content: string;
19
+ }>;
20
+ comments: Array<{
21
+ index: number;
22
+ id: number;
23
+ content: string;
24
+ }>;
25
+ }
26
+ /**
27
+ * Service for calculating semantic similarity between violations and PR comments
28
+ */
29
+ export declare class ContentSimilarityService {
30
+ private neurolink;
31
+ private aiConfig;
32
+ constructor(aiConfig: AIProviderConfig);
33
+ /**
34
+ * Calculate similarity scores between violations and comments in batches
35
+ */
36
+ batchCalculateSimilarity(violations: Violation[], comments: PRComment[], batchSize?: number): Promise<SimilarityResult[]>;
37
+ /**
38
+ * Prepare violation content for AI analysis
39
+ */
40
+ private prepareViolationContent;
41
+ /**
42
+ * Prepare comment content for AI analysis
43
+ */
44
+ private prepareCommentContent;
45
+ /**
46
+ * Extract meaningful content from violation for comparison
47
+ */
48
+ private extractViolationContent;
49
+ /**
50
+ * Extract meaningful content from comment for comparison
51
+ */
52
+ private extractCommentContent;
53
+ /**
54
+ * Process a single batch of violations against all comments
55
+ */
56
+ private processSimilarityBatch;
57
+ /**
58
+ * Create AI prompt for similarity analysis
59
+ */
60
+ private createSimilarityPrompt;
61
+ /**
62
+ * Parse AI response to extract similarity results
63
+ */
64
+ private parseSimilarityResponse;
65
+ /**
66
+ * Simple delay utility for rate limiting
67
+ */
68
+ private delay;
69
+ }
70
+ /**
71
+ * Factory function to create ContentSimilarityService
72
+ */
73
+ export declare function createContentSimilarityService(aiConfig: AIProviderConfig): ContentSimilarityService;
74
+ //# sourceMappingURL=ContentSimilarityService.d.ts.map