npm - @sentry/warden - Versions diffs - 0.6.0 → 0.8.0 - Mend

@sentry/warden 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/README.md +1 -1
package/dist/cli/commands/add.js +1 -1
package/dist/cli/commands/add.js.map +1 -1
package/dist/cli/commands/init.d.ts.map +1 -1
package/dist/cli/commands/init.js +5 -2
package/dist/cli/commands/init.js.map +1 -1
package/dist/cli/commands/setup-app/browser.d.ts +1 -0
package/dist/cli/commands/setup-app/browser.d.ts.map +1 -1
package/dist/cli/commands/setup-app/browser.js +10 -5
package/dist/cli/commands/setup-app/browser.js.map +1 -1
package/dist/cli/git.js +24 -24
package/dist/cli/git.js.map +1 -1
package/dist/cli/index.js +5 -1
package/dist/cli/index.js.map +1 -1
package/dist/cli/main.d.ts.map +1 -1
package/dist/cli/main.js +29 -27
package/dist/cli/main.js.map +1 -1
package/dist/cli/output/ink-runner.d.ts.map +1 -1
package/dist/cli/output/ink-runner.js +5 -7
package/dist/cli/output/ink-runner.js.map +1 -1
package/dist/cli/output/tasks.d.ts +1 -1
package/dist/cli/output/tasks.d.ts.map +1 -1
package/dist/cli/output/tasks.js +194 -161
package/dist/cli/output/tasks.js.map +1 -1
package/dist/config/loader.d.ts +4 -0
package/dist/config/loader.d.ts.map +1 -1
package/dist/config/loader.js +41 -34
package/dist/config/loader.js.map +1 -1
package/dist/config/schema.d.ts +14 -0
package/dist/config/schema.d.ts.map +1 -1
package/dist/config/schema.js +12 -0
package/dist/config/schema.js.map +1 -1
package/dist/diff/context.d.ts.map +1 -1
package/dist/diff/context.js +5 -1
package/dist/diff/context.js.map +1 -1
package/dist/index.d.ts +2 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -1
package/dist/index.js.map +1 -1
package/dist/output/dedup.d.ts +22 -0
package/dist/output/dedup.d.ts.map +1 -1
package/dist/output/dedup.js +180 -0
package/dist/output/dedup.js.map +1 -1
package/dist/output/github-checks.d.ts +3 -1
package/dist/output/github-checks.d.ts.map +1 -1
package/dist/output/github-checks.js +3 -3
package/dist/output/github-checks.js.map +1 -1
package/dist/output/github-issues.d.ts.map +1 -1
package/dist/output/github-issues.js +8 -2
package/dist/output/github-issues.js.map +1 -1
package/dist/output/renderer.d.ts +3 -1
package/dist/output/renderer.d.ts.map +1 -1
package/dist/output/renderer.js +39 -9
package/dist/output/renderer.js.map +1 -1
package/dist/output/stale.d.ts +6 -2
package/dist/output/stale.d.ts.map +1 -1
package/dist/output/stale.js +4 -4
package/dist/output/stale.js.map +1 -1
package/dist/output/types.d.ts +2 -0
package/dist/output/types.d.ts.map +1 -1
package/dist/sdk/analyze.d.ts.map +1 -1
package/dist/sdk/analyze.js +294 -205
package/dist/sdk/analyze.js.map +1 -1
package/dist/sentry.d.ts +17 -0
package/dist/sentry.d.ts.map +1 -0
package/dist/sentry.js +119 -0
package/dist/sentry.js.map +1 -0
package/dist/skills/index.d.ts +4 -4
package/dist/skills/index.d.ts.map +1 -1
package/dist/skills/index.js +2 -2
package/dist/skills/index.js.map +1 -1
package/dist/skills/loader.d.ts +48 -6
package/dist/skills/loader.d.ts.map +1 -1
package/dist/skills/loader.js +134 -57
package/dist/skills/loader.js.map +1 -1
package/dist/skills/remote.d.ts +12 -0
package/dist/skills/remote.d.ts.map +1 -1
package/dist/skills/remote.js +81 -32
package/dist/skills/remote.js.map +1 -1
package/dist/utils/async.d.ts +14 -1
package/dist/utils/async.d.ts.map +1 -1
package/dist/utils/async.js +29 -7
package/dist/utils/async.js.map +1 -1
package/dist/utils/index.d.ts +1 -1
package/dist/utils/index.d.ts.map +1 -1
package/dist/utils/index.js +1 -1
package/dist/utils/index.js.map +1 -1
package/package.json +3 -2
package/plugins/warden/skills/warden/references/creating-skills.md +2 -3

package/dist/sdk/analyze.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { query } from '@anthropic-ai/claude-agent-sdk';
+import { Sentry, emitExtractionMetrics, emitRetryMetric, emitDedupMetrics } from '../sentry.js';
 import { SkillRunnerError, WardenAuthenticationError, isRetryableError, isAuthenticationError, isAuthenticationErrorMessage } from './errors.js';
 import { DEFAULT_RETRY_CONFIG, calculateRetryDelay, sleep } from './retry.js';
 import { extractUsage, aggregateUsage, emptyUsage, estimateTokens, aggregateAuxiliaryUsage } from './usage.js';
@@ -6,6 +7,7 @@ import { buildHunkSystemPrompt, buildHunkUserPrompt } from './prompt.js';
 import { extractFindingsJson, extractFindingsWithLLM, validateFindings, deduplicateFindings } from './extract.js';
 import { LARGE_PROMPT_THRESHOLD_CHARS, DEFAULT_FILE_CONCURRENCY, } from './types.js';
 import { prepareFiles } from './prepare.js';
+import { runPool } from '../utils/index.js';
 /**
  * Parse findings from a hunk analysis result.
  * Uses a two-tier extraction strategy:
@@ -43,180 +45,259 @@ async function parseHunkOutput(result, filename, apiKey) {
  */
 async function executeQuery(systemPrompt, userPrompt, repoPath, options) {
     const { maxTurns = 50, model, abortController, pathToClaudeCodeExecutable } = options;
-    // Capture stderr output for better error diagnostics
-    const stderrChunks = [];
-    const stream = query({
-        prompt: userPrompt,
-        options: {
-            maxTurns,
-            cwd: repoPath,
-            systemPrompt,
-            // Only allow read-only tools - context is already provided in the prompt
-            allowedTools: ['Read', 'Grep'],
-            // Explicitly block modification/side-effect tools as defense-in-depth
-            disallowedTools: ['Write', 'Edit', 'Bash', 'WebFetch', 'WebSearch', 'Task', 'TodoWrite'],
-            permissionMode: 'bypassPermissions',
-            model,
-            abortController,
-            pathToClaudeCodeExecutable,
-            stderr: (data) => {
-                stderrChunks.push(data);
-            },
+    const modelId = model ?? 'unknown';
+    return Sentry.startSpan({
+        op: 'gen_ai.invoke_agent',
+        name: `invoke_agent ${modelId}`,
+        attributes: {
+            'gen_ai.operation.name': 'invoke_agent',
+            'gen_ai.system': 'anthropic',
+            'gen_ai.provider.name': 'anthropic',
+            'gen_ai.agent.name': modelId,
+            'gen_ai.request.model': modelId,
+            'gen_ai.request.max_turns': maxTurns,
         },
-    });
-    let resultMessage;
-    let authError;
-    try {
-        for await (const message of stream) {
-            if (message.type === 'result') {
-                resultMessage = message;
+    }, async (span) => {
+        // Capture stderr output for better error diagnostics
+        const stderrChunks = [];
+        const stream = query({
+            prompt: userPrompt,
+            options: {
+                maxTurns,
+                cwd: repoPath,
+                systemPrompt,
+                // Only allow read-only tools - context is already provided in the prompt
+                allowedTools: ['Read', 'Grep'],
+                // Explicitly block modification/side-effect tools as defense-in-depth
+                disallowedTools: ['Write', 'Edit', 'Bash', 'WebFetch', 'WebSearch', 'Task', 'TodoWrite'],
+                permissionMode: 'bypassPermissions',
+                model,
+                abortController,
+                pathToClaudeCodeExecutable,
+                stderr: (data) => {
+                    stderrChunks.push(data);
+                },
+            },
+        });
+        let resultMessage;
+        let authError;
+        try {
+            for await (const message of stream) {
+                if (message.type === 'result') {
+                    resultMessage = message;
+                }
+                else if (message.type === 'auth_status' && message.error) {
+                    // Capture authentication errors from auth_status messages
+                    authError = message.error;
+                }
             }
-            else if (message.type === 'auth_status' && message.error) {
-                // Capture authentication errors from auth_status messages
-                authError = message.error;
+        }
+        catch (error) {
+            // Re-throw with stderr info if available
+            const stderr = stderrChunks.join('').trim();
+            if (stderr) {
+                const originalMessage = error instanceof Error ? error.message : String(error);
+                const enhancedError = new Error(`${originalMessage}\nClaude Code stderr: ${stderr}`);
+                enhancedError.cause = error;
+                throw enhancedError;
             }
+            throw error;
         }
-    }
-    catch (error) {
-        // Re-throw with stderr info if available
-        const stderr = stderrChunks.join('').trim();
-        if (stderr) {
-            const originalMessage = error instanceof Error ? error.message : String(error);
-            const enhancedError = new Error(`${originalMessage}\nClaude Code stderr: ${stderr}`);
-            enhancedError.cause = error;
-            throw enhancedError;
+        // Set response attributes from SDK result
+        if (resultMessage) {
+            const usage = resultMessage.usage;
+            if (usage) {
+                const inputTokens = usage.input_tokens ?? 0;
+                const outputTokens = usage.output_tokens ?? 0;
+                const cacheRead = usage.cache_read_input_tokens ?? 0;
+                const cacheWrite = usage.cache_creation_input_tokens ?? 0;
+                // Anthropic API's input_tokens is only the non-cached portion.
+                // OpenTelemetry gen_ai.usage.input_tokens expects the total input tokens.
+                const totalInputTokens = inputTokens + cacheRead + cacheWrite;
+                span.setAttribute('gen_ai.usage.input_tokens', totalInputTokens);
+                span.setAttribute('gen_ai.usage.output_tokens', outputTokens);
+                span.setAttribute('gen_ai.usage.input_tokens.cached', cacheRead);
+                span.setAttribute('gen_ai.usage.input_tokens.cache_write', cacheWrite);
+                span.setAttribute('gen_ai.usage.total_tokens', totalInputTokens + outputTokens);
+            }
+            if (resultMessage.total_cost_usd !== undefined) {
+                span.setAttribute('gen_ai.cost.total_tokens', resultMessage.total_cost_usd);
+            }
+            if (resultMessage.uuid) {
+                span.setAttribute('gen_ai.response.id', resultMessage.uuid);
+            }
+            if (resultMessage.modelUsage) {
+                const models = Object.keys(resultMessage.modelUsage);
+                if (models[0]) {
+                    span.setAttribute('gen_ai.response.model', models[0]);
+                }
+            }
+            // Optional SDK metadata attributes
+            const optionalAttrs = {
+                'sdk.session_id': resultMessage.session_id,
+                'sdk.duration_ms': resultMessage.duration_ms,
+                'sdk.duration_api_ms': resultMessage.duration_api_ms,
+                'sdk.num_turns': resultMessage.num_turns,
+            };
+            for (const [key, value] of Object.entries(optionalAttrs)) {
+                if (value !== undefined) {
+                    span.setAttribute(key, value);
+                }
+            }
         }
-        throw error;
-    }
-    const stderr = stderrChunks.join('').trim() || undefined;
-    return { result: resultMessage, authError, stderr };
+        const stderr = stderrChunks.join('').trim() || undefined;
+        return { result: resultMessage, authError, stderr };
+    });
 }
 /**
  * Analyze a single hunk with retry logic for transient failures.
  */
 async function analyzeHunk(skill, hunkCtx, repoPath, options, callbacks, prContext) {
-    const { apiKey, abortController, retry } = options;
-    const systemPrompt = buildHunkSystemPrompt(skill);
-    const userPrompt = buildHunkUserPrompt(skill, hunkCtx, prContext);
-    // Report prompt size information
-    const systemChars = systemPrompt.length;
-    const userChars = userPrompt.length;
-    const totalChars = systemChars + userChars;
-    const estimatedTokensCount = estimateTokens(totalChars);
-    // Always call onPromptSize if provided (for debug mode)
-    callbacks?.onPromptSize?.(callbacks.lineRange, systemChars, userChars, totalChars, estimatedTokensCount);
-    // Warn about large prompts
-    if (totalChars > LARGE_PROMPT_THRESHOLD_CHARS) {
-        callbacks?.onLargePrompt?.(callbacks.lineRange, totalChars, estimatedTokensCount);
-    }
-    // Merge retry config with defaults
-    const retryConfig = {
-        ...DEFAULT_RETRY_CONFIG,
-        ...retry,
-    };
-    let lastError;
-    // Track accumulated usage across retry attempts for accurate cost reporting
-    const accumulatedUsage = [];
-    for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) {
-        // Check for abort before each attempt
-        if (abortController?.signal.aborted) {
-            return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
+    const lineRange = callbacks?.lineRange ?? getHunkLineRange(hunkCtx);
+    return Sentry.startSpan({
+        op: 'skill.analyze_hunk',
+        name: `analyze hunk ${hunkCtx.filename}:${lineRange}`,
+        attributes: {
+            'code.filepath': hunkCtx.filename,
+            'hunk.line_range': lineRange,
+        },
+    }, async (span) => {
+        const { apiKey, abortController, retry } = options;
+        const systemPrompt = buildHunkSystemPrompt(skill);
+        const userPrompt = buildHunkUserPrompt(skill, hunkCtx, prContext);
+        // Report prompt size information
+        const systemChars = systemPrompt.length;
+        const userChars = userPrompt.length;
+        const totalChars = systemChars + userChars;
+        const estimatedTokensCount = estimateTokens(totalChars);
+        // Always call onPromptSize if provided (for debug mode)
+        callbacks?.onPromptSize?.(callbacks.lineRange, systemChars, userChars, totalChars, estimatedTokensCount);
+        // Warn about large prompts
+        if (totalChars > LARGE_PROMPT_THRESHOLD_CHARS) {
+            callbacks?.onLargePrompt?.(callbacks.lineRange, totalChars, estimatedTokensCount);
         }
-        try {
-            const { result: resultMessage, authError } = await executeQuery(systemPrompt, userPrompt, repoPath, options);
-            // Check for authentication errors from auth_status messages
-            // auth_status errors are always auth-related - throw immediately
-            if (authError) {
-                throw new WardenAuthenticationError(authError);
-            }
-            if (!resultMessage) {
-                console.error('SDK returned no result');
+        // Merge retry config with defaults
+        const retryConfig = {
+            ...DEFAULT_RETRY_CONFIG,
+            ...retry,
+        };
+        let lastError;
+        // Track accumulated usage across retry attempts for accurate cost reporting
+        const accumulatedUsage = [];
+        for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) {
+            // Check for abort before each attempt
+            if (abortController?.signal.aborted) {
                 return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
             }
-            // Extract usage from the result, regardless of success/error status
-            const usage = extractUsage(resultMessage);
-            accumulatedUsage.push(usage);
-            // Check if the SDK returned an error result (e.g., max turns, budget exceeded)
-            const isError = resultMessage.is_error || resultMessage.subtype !== 'success';
-            if (isError) {
-                // Extract error messages from SDK result
-                const errorMessages = 'errors' in resultMessage ? resultMessage.errors : [];
-                // Check if any error indicates authentication failure
-                for (const err of errorMessages) {
-                    if (isAuthenticationErrorMessage(err)) {
-                        throw new WardenAuthenticationError();
+            try {
+                const { result: resultMessage, authError } = await executeQuery(systemPrompt, userPrompt, repoPath, options);
+                // Check for authentication errors from auth_status messages
+                // auth_status errors are always auth-related - throw immediately
+                if (authError) {
+                    throw new WardenAuthenticationError(authError);
+                }
+                if (!resultMessage) {
+                    console.error('SDK returned no result');
+                    return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
+                }
+                // Extract usage from the result, regardless of success/error status
+                const usage = extractUsage(resultMessage);
+                accumulatedUsage.push(usage);
+                // Check if the SDK returned an error result (e.g., max turns, budget exceeded)
+                const isError = resultMessage.is_error || resultMessage.subtype !== 'success';
+                if (isError) {
+                    // Extract error messages from SDK result
+                    const errorMessages = 'errors' in resultMessage ? resultMessage.errors : [];
+                    // Check if any error indicates authentication failure
+                    for (const err of errorMessages) {
+                        if (isAuthenticationErrorMessage(err)) {
+                            throw new WardenAuthenticationError();
+                        }
                     }
+                    // SDK error - log and return failure with error details
+                    const errorSummary = errorMessages.length > 0
+                        ? errorMessages.join('; ')
+                        : `SDK error: ${resultMessage.subtype}`;
+                    console.error(`SDK execution failed: ${errorSummary}`);
+                    return {
+                        findings: [],
+                        usage: aggregateUsage(accumulatedUsage),
+                        failed: true,
+                        extractionFailed: false,
+                    };
+                }
+                const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, apiKey);
+                // Emit extraction metrics
+                emitExtractionMetrics(skill.name, parseResult.extractionMethod, parseResult.findings.length);
+                // Notify about extraction result (debug mode)
+                callbacks?.onExtractionResult?.(callbacks.lineRange, parseResult.findings.length, parseResult.extractionMethod);
+                // Notify about extraction failure if callback provided
+                if (parseResult.extractionFailed) {
+                    callbacks?.onExtractionFailure?.(callbacks.lineRange, parseResult.extractionError ?? 'unknown_error', parseResult.extractionPreview ?? '');
                 }
-                // SDK error - log and return failure with error details
-                const errorSummary = errorMessages.length > 0
-                    ? errorMessages.join('; ')
-                    : `SDK error: ${resultMessage.subtype}`;
-                console.error(`SDK execution failed: ${errorSummary}`);
+                span.setAttribute('hunk.failed', false);
+                span.setAttribute('finding.count', parseResult.findings.length);
                 return {
-                    findings: [],
+                    findings: parseResult.findings,
                     usage: aggregateUsage(accumulatedUsage),
-                    failed: true,
-                    extractionFailed: false,
+                    failed: false,
+                    extractionFailed: parseResult.extractionFailed,
+                    extractionError: parseResult.extractionError,
+                    extractionPreview: parseResult.extractionPreview,
+                    auxiliaryUsage: parseResult.extractionUsage
+                        ? [{ agent: 'extraction', usage: parseResult.extractionUsage }]
+                        : undefined,
                 };
             }
-            const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, apiKey);
-            // Notify about extraction result (debug mode)
-            callbacks?.onExtractionResult?.(callbacks.lineRange, parseResult.findings.length, parseResult.extractionMethod);
-            // Notify about extraction failure if callback provided
-            if (parseResult.extractionFailed) {
-                callbacks?.onExtractionFailure?.(callbacks.lineRange, parseResult.extractionError ?? 'unknown_error', parseResult.extractionPreview ?? '');
+            catch (error) {
+                lastError = error;
+                // Re-throw authentication errors (they shouldn't be retried)
+                if (error instanceof WardenAuthenticationError) {
+                    throw error;
+                }
+                // Authentication errors should surface immediately with helpful guidance
+                if (isAuthenticationError(error)) {
+                    throw new WardenAuthenticationError();
+                }
+                // Don't retry if not a retryable error or we've exhausted retries
+                if (!isRetryableError(error) || attempt >= retryConfig.maxRetries) {
+                    break;
+                }
+                // Calculate delay and wait before retry
+                const delayMs = calculateRetryDelay(attempt, retryConfig);
+                const errorMessage = error instanceof Error ? error.message : String(error);
+                Sentry.addBreadcrumb({
+                    category: 'retry',
+                    message: `Retrying hunk analysis`,
+                    data: { attempt: attempt + 1, error: errorMessage, delayMs },
+                    level: 'warning',
+                });
+                emitRetryMetric(skill.name, attempt + 1);
+                // Notify about retry in verbose mode
+                callbacks?.onRetry?.(callbacks.lineRange, attempt + 1, retryConfig.maxRetries, errorMessage, delayMs);
+                try {
+                    await sleep(delayMs, abortController?.signal);
+                }
+                catch {
+                    // Aborted during sleep
+                    return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
+                }
             }
-            return {
-                findings: parseResult.findings,
-                usage: aggregateUsage(accumulatedUsage),
-                failed: false,
-                extractionFailed: parseResult.extractionFailed,
-                extractionError: parseResult.extractionError,
-                extractionPreview: parseResult.extractionPreview,
-                auxiliaryUsage: parseResult.extractionUsage
-                    ? [{ agent: 'extraction', usage: parseResult.extractionUsage }]
-                    : undefined,
-            };
         }
-        catch (error) {
-            lastError = error;
-            // Re-throw authentication errors (they shouldn't be retried)
-            if (error instanceof WardenAuthenticationError) {
-                throw error;
-            }
-            // Authentication errors should surface immediately with helpful guidance
-            if (isAuthenticationError(error)) {
-                throw new WardenAuthenticationError();
-            }
-            // Don't retry if not a retryable error or we've exhausted retries
-            if (!isRetryableError(error) || attempt >= retryConfig.maxRetries) {
-                break;
-            }
-            // Calculate delay and wait before retry
-            const delayMs = calculateRetryDelay(attempt, retryConfig);
-            const errorMessage = error instanceof Error ? error.message : String(error);
-            // Notify about retry in verbose mode
-            callbacks?.onRetry?.(callbacks.lineRange, attempt + 1, retryConfig.maxRetries, errorMessage, delayMs);
-            try {
-                await sleep(delayMs, abortController?.signal);
-            }
-            catch {
-                // Aborted during sleep
-                return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
-            }
+        // All attempts failed - return failure with any accumulated usage
+        const finalError = lastError instanceof Error ? lastError.message : String(lastError);
+        // Log the final error
+        if (lastError) {
+            console.error(`All retry attempts failed: ${finalError}`);
         }
-    }
-    // All attempts failed - return failure with any accumulated usage
-    const finalError = lastError instanceof Error ? lastError.message : String(lastError);
-    // Log the final error
-    if (lastError) {
-        console.error(`All retry attempts failed: ${finalError}`);
-    }
-    // Also notify via callback if verbose
-    if (options.verbose) {
-        callbacks?.onRetry?.(callbacks.lineRange, retryConfig.maxRetries + 1, retryConfig.maxRetries, `Final failure: ${finalError}`, 0);
-    }
-    return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
+        // Also notify via callback if verbose
+        if (options.verbose) {
+            callbacks?.onRetry?.(callbacks.lineRange, retryConfig.maxRetries + 1, retryConfig.maxRetries, `Final failure: ${finalError}`, 0);
+        }
+        span.setAttribute('hunk.failed', true);
+        span.setAttribute('finding.count', 0);
+        return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
+    });
 }
 /**
  * Get line range string for a hunk.
@@ -241,50 +322,62 @@ function attachElapsedTime(findings, skillStartTime) {
  * Analyze a single prepared file's hunks.
  */
 export async function analyzeFile(skill, file, repoPath, options = {}, callbacks, prContext) {
-    const { abortController } = options;
-    const fileFindings = [];
-    const fileUsage = [];
-    const fileAuxiliaryUsage = [];
-    let failedHunks = 0;
-    let failedExtractions = 0;
-    for (const [hunkIndex, hunk] of file.hunks.entries()) {
-        if (abortController?.signal.aborted)
-            break;
-        const lineRange = getHunkLineRange(hunk);
-        callbacks?.onHunkStart?.(hunkIndex + 1, file.hunks.length, lineRange);
-        const hunkCallbacks = callbacks
-            ? {
-                lineRange,
-                onLargePrompt: callbacks.onLargePrompt,
-                onPromptSize: callbacks.onPromptSize,
-                onRetry: callbacks.onRetry,
-                onExtractionFailure: callbacks.onExtractionFailure,
-                onExtractionResult: callbacks.onExtractionResult,
+    return Sentry.startSpan({
+        op: 'skill.analyze_file',
+        name: `analyze file ${file.filename}`,
+        attributes: {
+            'code.filepath': file.filename,
+            'hunk.count': file.hunks.length,
+        },
+    }, async (span) => {
+        const { abortController } = options;
+        const fileFindings = [];
+        const fileUsage = [];
+        const fileAuxiliaryUsage = [];
+        let failedHunks = 0;
+        let failedExtractions = 0;
+        for (const [hunkIndex, hunk] of file.hunks.entries()) {
+            if (abortController?.signal.aborted)
+                break;
+            const lineRange = getHunkLineRange(hunk);
+            callbacks?.onHunkStart?.(hunkIndex + 1, file.hunks.length, lineRange);
+            const hunkCallbacks = callbacks
+                ? {
+                    lineRange,
+                    onLargePrompt: callbacks.onLargePrompt,
+                    onPromptSize: callbacks.onPromptSize,
+                    onRetry: callbacks.onRetry,
+                    onExtractionFailure: callbacks.onExtractionFailure,
+                    onExtractionResult: callbacks.onExtractionResult,
+                }
+                : undefined;
+            const result = await analyzeHunk(skill, hunk, repoPath, options, hunkCallbacks, prContext);
+            if (result.failed) {
+                failedHunks++;
+            }
+            if (result.extractionFailed) {
+                failedExtractions++;
+            }
+            attachElapsedTime(result.findings, callbacks?.skillStartTime);
+            callbacks?.onHunkComplete?.(hunkIndex + 1, result.findings);
+            fileFindings.push(...result.findings);
+            fileUsage.push(result.usage);
+            if (result.auxiliaryUsage) {
+                fileAuxiliaryUsage.push(...result.auxiliaryUsage);
             }
-            : undefined;
-        const result = await analyzeHunk(skill, hunk, repoPath, options, hunkCallbacks, prContext);
-        if (result.failed) {
-            failedHunks++;
-        }
-        if (result.extractionFailed) {
-            failedExtractions++;
-        }
-        attachElapsedTime(result.findings, callbacks?.skillStartTime);
-        callbacks?.onHunkComplete?.(hunkIndex + 1, result.findings);
-        fileFindings.push(...result.findings);
-        fileUsage.push(result.usage);
-        if (result.auxiliaryUsage) {
-            fileAuxiliaryUsage.push(...result.auxiliaryUsage);
         }
-    }
-    return {
-        filename: file.filename,
-        findings: fileFindings,
-        usage: aggregateUsage(fileUsage),
-        failedHunks,
-        failedExtractions,
-        auxiliaryUsage: fileAuxiliaryUsage.length > 0 ? fileAuxiliaryUsage : undefined,
-    };
+        span.setAttribute('finding.count', fileFindings.length);
+        span.setAttribute('hunk.failed_count', failedHunks);
+        span.setAttribute('extraction.failed_count', failedExtractions);
+        return {
+            filename: file.filename,
+            findings: fileFindings,
+            usage: aggregateUsage(fileUsage),
+            failedHunks,
+            failedExtractions,
+            auxiliaryUsage: fileAuxiliaryUsage.length > 0 ? fileAuxiliaryUsage : undefined,
+        };
+    });
 }
 /**
  * Generate a summary of findings.
@@ -408,21 +501,16 @@ export async function runSkill(skill, context, options = {}) {
     const fileResults = [];
     // Process files - parallel or sequential based on options
     if (parallel) {
-        // Process files in parallel with concurrency limit
+        // Process files with sliding-window concurrency pool
         const fileConcurrency = options.concurrency ?? DEFAULT_FILE_CONCURRENCY;
         const batchDelayMs = options.batchDelayMs ?? 0;
-        for (let i = 0; i < fileHunks.length; i += fileConcurrency) {
-            // Check for abort before starting new batch
-            if (abortController?.signal.aborted)
-                break;
-            // Apply rate limiting delay between batches (not before the first batch)
-            if (i > 0 && batchDelayMs > 0) {
+        fileResults.push(...await runPool(fileHunks, fileConcurrency, async (fileHunkEntry, index) => {
+            // Rate-limit: delay items beyond the first concurrent wave
+            if (index >= fileConcurrency && batchDelayMs > 0) {
                 await new Promise((resolve) => setTimeout(resolve, batchDelayMs));
             }
-            const batch = fileHunks.slice(i, i + fileConcurrency);
-            const batchResults = await Promise.all(batch.map((fileHunkEntry, batchIndex) => processFileWithTiming(fileHunkEntry, i + batchIndex)));
-            fileResults.push(...batchResults);
-        }
+            return processFileWithTiming(fileHunkEntry, index);
+        }, { shouldAbort: () => abortController?.signal.aborted ?? false }));
     }
     else {
         // Process files sequentially
@@ -451,6 +539,7 @@ export async function runSkill(skill, context, options = {}) {
     }
     // Deduplicate findings
     const uniqueFindings = deduplicateFindings(allFindings);
+    emitDedupMetrics(allFindings.length, uniqueFindings.length);
     // Generate summary
     const summary = generateSummary(skill.name, uniqueFindings);
     // Aggregate usage across all hunks