@sentry/warden 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +1 -1
  2. package/dist/cli/commands/add.js +1 -1
  3. package/dist/cli/commands/add.js.map +1 -1
  4. package/dist/cli/commands/init.js +1 -1
  5. package/dist/cli/commands/setup-app/browser.d.ts +1 -0
  6. package/dist/cli/commands/setup-app/browser.d.ts.map +1 -1
  7. package/dist/cli/commands/setup-app/browser.js +10 -5
  8. package/dist/cli/commands/setup-app/browser.js.map +1 -1
  9. package/dist/cli/git.js +24 -24
  10. package/dist/cli/git.js.map +1 -1
  11. package/dist/cli/index.js +5 -1
  12. package/dist/cli/index.js.map +1 -1
  13. package/dist/cli/main.d.ts.map +1 -1
  14. package/dist/cli/main.js +29 -27
  15. package/dist/cli/main.js.map +1 -1
  16. package/dist/cli/output/ink-runner.d.ts.map +1 -1
  17. package/dist/cli/output/ink-runner.js +5 -7
  18. package/dist/cli/output/ink-runner.js.map +1 -1
  19. package/dist/cli/output/tasks.d.ts +1 -1
  20. package/dist/cli/output/tasks.d.ts.map +1 -1
  21. package/dist/cli/output/tasks.js +194 -161
  22. package/dist/cli/output/tasks.js.map +1 -1
  23. package/dist/config/loader.d.ts +4 -0
  24. package/dist/config/loader.d.ts.map +1 -1
  25. package/dist/config/loader.js +41 -34
  26. package/dist/config/loader.js.map +1 -1
  27. package/dist/config/schema.d.ts +14 -0
  28. package/dist/config/schema.d.ts.map +1 -1
  29. package/dist/config/schema.js +12 -0
  30. package/dist/config/schema.js.map +1 -1
  31. package/dist/diff/context.d.ts.map +1 -1
  32. package/dist/diff/context.js +5 -1
  33. package/dist/diff/context.js.map +1 -1
  34. package/dist/index.d.ts +2 -1
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +1 -1
  37. package/dist/index.js.map +1 -1
  38. package/dist/output/dedup.d.ts +22 -0
  39. package/dist/output/dedup.d.ts.map +1 -1
  40. package/dist/output/dedup.js +180 -0
  41. package/dist/output/dedup.js.map +1 -1
  42. package/dist/output/github-checks.d.ts +3 -1
  43. package/dist/output/github-checks.d.ts.map +1 -1
  44. package/dist/output/github-checks.js +3 -3
  45. package/dist/output/github-checks.js.map +1 -1
  46. package/dist/output/github-issues.d.ts.map +1 -1
  47. package/dist/output/github-issues.js +8 -2
  48. package/dist/output/github-issues.js.map +1 -1
  49. package/dist/output/renderer.d.ts +3 -1
  50. package/dist/output/renderer.d.ts.map +1 -1
  51. package/dist/output/renderer.js +39 -9
  52. package/dist/output/renderer.js.map +1 -1
  53. package/dist/output/stale.d.ts +6 -2
  54. package/dist/output/stale.d.ts.map +1 -1
  55. package/dist/output/stale.js +4 -4
  56. package/dist/output/stale.js.map +1 -1
  57. package/dist/output/types.d.ts +2 -0
  58. package/dist/output/types.d.ts.map +1 -1
  59. package/dist/sdk/analyze.d.ts.map +1 -1
  60. package/dist/sdk/analyze.js +287 -205
  61. package/dist/sdk/analyze.js.map +1 -1
  62. package/dist/sentry.d.ts +17 -0
  63. package/dist/sentry.d.ts.map +1 -0
  64. package/dist/sentry.js +119 -0
  65. package/dist/sentry.js.map +1 -0
  66. package/dist/skills/index.d.ts +4 -4
  67. package/dist/skills/index.d.ts.map +1 -1
  68. package/dist/skills/index.js +2 -2
  69. package/dist/skills/index.js.map +1 -1
  70. package/dist/skills/loader.d.ts +48 -6
  71. package/dist/skills/loader.d.ts.map +1 -1
  72. package/dist/skills/loader.js +134 -57
  73. package/dist/skills/loader.js.map +1 -1
  74. package/dist/skills/remote.d.ts +12 -0
  75. package/dist/skills/remote.d.ts.map +1 -1
  76. package/dist/skills/remote.js +81 -32
  77. package/dist/skills/remote.js.map +1 -1
  78. package/dist/utils/async.d.ts +14 -1
  79. package/dist/utils/async.d.ts.map +1 -1
  80. package/dist/utils/async.js +29 -7
  81. package/dist/utils/async.js.map +1 -1
  82. package/dist/utils/index.d.ts +1 -1
  83. package/dist/utils/index.d.ts.map +1 -1
  84. package/dist/utils/index.js +1 -1
  85. package/dist/utils/index.js.map +1 -1
  86. package/package.json +3 -2
  87. package/plugins/warden/skills/warden/references/creating-skills.md +2 -3
@@ -1,4 +1,5 @@
1
1
  import { query } from '@anthropic-ai/claude-agent-sdk';
2
+ import { Sentry, emitExtractionMetrics, emitRetryMetric, emitDedupMetrics } from '../sentry.js';
2
3
  import { SkillRunnerError, WardenAuthenticationError, isRetryableError, isAuthenticationError, isAuthenticationErrorMessage } from './errors.js';
3
4
  import { DEFAULT_RETRY_CONFIG, calculateRetryDelay, sleep } from './retry.js';
4
5
  import { extractUsage, aggregateUsage, emptyUsage, estimateTokens, aggregateAuxiliaryUsage } from './usage.js';
@@ -6,6 +7,7 @@ import { buildHunkSystemPrompt, buildHunkUserPrompt } from './prompt.js';
6
7
  import { extractFindingsJson, extractFindingsWithLLM, validateFindings, deduplicateFindings } from './extract.js';
7
8
  import { LARGE_PROMPT_THRESHOLD_CHARS, DEFAULT_FILE_CONCURRENCY, } from './types.js';
8
9
  import { prepareFiles } from './prepare.js';
10
+ import { runPool } from '../utils/index.js';
9
11
  /**
10
12
  * Parse findings from a hunk analysis result.
11
13
  * Uses a two-tier extraction strategy:
@@ -43,180 +45,252 @@ async function parseHunkOutput(result, filename, apiKey) {
43
45
  */
44
46
  async function executeQuery(systemPrompt, userPrompt, repoPath, options) {
45
47
  const { maxTurns = 50, model, abortController, pathToClaudeCodeExecutable } = options;
46
- // Capture stderr output for better error diagnostics
47
- const stderrChunks = [];
48
- const stream = query({
49
- prompt: userPrompt,
50
- options: {
51
- maxTurns,
52
- cwd: repoPath,
53
- systemPrompt,
54
- // Only allow read-only tools - context is already provided in the prompt
55
- allowedTools: ['Read', 'Grep'],
56
- // Explicitly block modification/side-effect tools as defense-in-depth
57
- disallowedTools: ['Write', 'Edit', 'Bash', 'WebFetch', 'WebSearch', 'Task', 'TodoWrite'],
58
- permissionMode: 'bypassPermissions',
59
- model,
60
- abortController,
61
- pathToClaudeCodeExecutable,
62
- stderr: (data) => {
63
- stderrChunks.push(data);
64
- },
48
+ const modelId = model ?? 'unknown';
49
+ return Sentry.startSpan({
50
+ op: 'gen_ai.invoke_agent',
51
+ name: `invoke_agent ${modelId}`,
52
+ attributes: {
53
+ 'gen_ai.operation.name': 'invoke_agent',
54
+ 'gen_ai.system': 'anthropic',
55
+ 'gen_ai.provider.name': 'anthropic',
56
+ 'gen_ai.request.model': modelId,
57
+ 'gen_ai.request.max_turns': maxTurns,
65
58
  },
66
- });
67
- let resultMessage;
68
- let authError;
69
- try {
70
- for await (const message of stream) {
71
- if (message.type === 'result') {
72
- resultMessage = message;
59
+ }, async (span) => {
60
+ // Capture stderr output for better error diagnostics
61
+ const stderrChunks = [];
62
+ const stream = query({
63
+ prompt: userPrompt,
64
+ options: {
65
+ maxTurns,
66
+ cwd: repoPath,
67
+ systemPrompt,
68
+ // Only allow read-only tools - context is already provided in the prompt
69
+ allowedTools: ['Read', 'Grep'],
70
+ // Explicitly block modification/side-effect tools as defense-in-depth
71
+ disallowedTools: ['Write', 'Edit', 'Bash', 'WebFetch', 'WebSearch', 'Task', 'TodoWrite'],
72
+ permissionMode: 'bypassPermissions',
73
+ model,
74
+ abortController,
75
+ pathToClaudeCodeExecutable,
76
+ stderr: (data) => {
77
+ stderrChunks.push(data);
78
+ },
79
+ },
80
+ });
81
+ let resultMessage;
82
+ let authError;
83
+ try {
84
+ for await (const message of stream) {
85
+ if (message.type === 'result') {
86
+ resultMessage = message;
87
+ }
88
+ else if (message.type === 'auth_status' && message.error) {
89
+ // Capture authentication errors from auth_status messages
90
+ authError = message.error;
91
+ }
73
92
  }
74
- else if (message.type === 'auth_status' && message.error) {
75
- // Capture authentication errors from auth_status messages
76
- authError = message.error;
93
+ }
94
+ catch (error) {
95
+ // Re-throw with stderr info if available
96
+ const stderr = stderrChunks.join('').trim();
97
+ if (stderr) {
98
+ const originalMessage = error instanceof Error ? error.message : String(error);
99
+ const enhancedError = new Error(`${originalMessage}\nClaude Code stderr: ${stderr}`);
100
+ enhancedError.cause = error;
101
+ throw enhancedError;
77
102
  }
103
+ throw error;
78
104
  }
79
- }
80
- catch (error) {
81
- // Re-throw with stderr info if available
82
- const stderr = stderrChunks.join('').trim();
83
- if (stderr) {
84
- const originalMessage = error instanceof Error ? error.message : String(error);
85
- const enhancedError = new Error(`${originalMessage}\nClaude Code stderr: ${stderr}`);
86
- enhancedError.cause = error;
87
- throw enhancedError;
105
+ // Set response attributes from SDK result
106
+ if (resultMessage) {
107
+ const usage = resultMessage.usage;
108
+ if (usage) {
109
+ const inputTokens = usage.input_tokens ?? 0;
110
+ const outputTokens = usage.output_tokens ?? 0;
111
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
112
+ const cacheWrite = usage.cache_creation_input_tokens ?? 0;
113
+ span.setAttribute('gen_ai.usage.input_tokens', inputTokens);
114
+ span.setAttribute('gen_ai.usage.output_tokens', outputTokens);
115
+ span.setAttribute('gen_ai.usage.input_tokens.cached', cacheRead);
116
+ span.setAttribute('gen_ai.usage.input_tokens.cache_write', cacheWrite);
117
+ span.setAttribute('gen_ai.usage.total_tokens', inputTokens + outputTokens + cacheRead + cacheWrite);
118
+ }
119
+ if (resultMessage.uuid) {
120
+ span.setAttribute('gen_ai.response.id', resultMessage.uuid);
121
+ }
122
+ if (resultMessage.modelUsage) {
123
+ const models = Object.keys(resultMessage.modelUsage);
124
+ if (models[0]) {
125
+ span.setAttribute('gen_ai.response.model', models[0]);
126
+ }
127
+ }
128
+ // Optional SDK metadata attributes
129
+ const optionalAttrs = {
130
+ 'sdk.session_id': resultMessage.session_id,
131
+ 'sdk.duration_ms': resultMessage.duration_ms,
132
+ 'sdk.duration_api_ms': resultMessage.duration_api_ms,
133
+ 'sdk.num_turns': resultMessage.num_turns,
134
+ };
135
+ for (const [key, value] of Object.entries(optionalAttrs)) {
136
+ if (value !== undefined) {
137
+ span.setAttribute(key, value);
138
+ }
139
+ }
88
140
  }
89
- throw error;
90
- }
91
- const stderr = stderrChunks.join('').trim() || undefined;
92
- return { result: resultMessage, authError, stderr };
141
+ const stderr = stderrChunks.join('').trim() || undefined;
142
+ return { result: resultMessage, authError, stderr };
143
+ });
93
144
  }
94
145
  /**
95
146
  * Analyze a single hunk with retry logic for transient failures.
96
147
  */
97
148
  async function analyzeHunk(skill, hunkCtx, repoPath, options, callbacks, prContext) {
98
- const { apiKey, abortController, retry } = options;
99
- const systemPrompt = buildHunkSystemPrompt(skill);
100
- const userPrompt = buildHunkUserPrompt(skill, hunkCtx, prContext);
101
- // Report prompt size information
102
- const systemChars = systemPrompt.length;
103
- const userChars = userPrompt.length;
104
- const totalChars = systemChars + userChars;
105
- const estimatedTokensCount = estimateTokens(totalChars);
106
- // Always call onPromptSize if provided (for debug mode)
107
- callbacks?.onPromptSize?.(callbacks.lineRange, systemChars, userChars, totalChars, estimatedTokensCount);
108
- // Warn about large prompts
109
- if (totalChars > LARGE_PROMPT_THRESHOLD_CHARS) {
110
- callbacks?.onLargePrompt?.(callbacks.lineRange, totalChars, estimatedTokensCount);
111
- }
112
- // Merge retry config with defaults
113
- const retryConfig = {
114
- ...DEFAULT_RETRY_CONFIG,
115
- ...retry,
116
- };
117
- let lastError;
118
- // Track accumulated usage across retry attempts for accurate cost reporting
119
- const accumulatedUsage = [];
120
- for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) {
121
- // Check for abort before each attempt
122
- if (abortController?.signal.aborted) {
123
- return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
149
+ const lineRange = callbacks?.lineRange ?? getHunkLineRange(hunkCtx);
150
+ return Sentry.startSpan({
151
+ op: 'skill.analyze_hunk',
152
+ name: `analyze hunk ${hunkCtx.filename}:${lineRange}`,
153
+ attributes: {
154
+ 'code.filepath': hunkCtx.filename,
155
+ 'hunk.line_range': lineRange,
156
+ },
157
+ }, async (span) => {
158
+ const { apiKey, abortController, retry } = options;
159
+ const systemPrompt = buildHunkSystemPrompt(skill);
160
+ const userPrompt = buildHunkUserPrompt(skill, hunkCtx, prContext);
161
+ // Report prompt size information
162
+ const systemChars = systemPrompt.length;
163
+ const userChars = userPrompt.length;
164
+ const totalChars = systemChars + userChars;
165
+ const estimatedTokensCount = estimateTokens(totalChars);
166
+ // Always call onPromptSize if provided (for debug mode)
167
+ callbacks?.onPromptSize?.(callbacks.lineRange, systemChars, userChars, totalChars, estimatedTokensCount);
168
+ // Warn about large prompts
169
+ if (totalChars > LARGE_PROMPT_THRESHOLD_CHARS) {
170
+ callbacks?.onLargePrompt?.(callbacks.lineRange, totalChars, estimatedTokensCount);
124
171
  }
125
- try {
126
- const { result: resultMessage, authError } = await executeQuery(systemPrompt, userPrompt, repoPath, options);
127
- // Check for authentication errors from auth_status messages
128
- // auth_status errors are always auth-related - throw immediately
129
- if (authError) {
130
- throw new WardenAuthenticationError(authError);
131
- }
132
- if (!resultMessage) {
133
- console.error('SDK returned no result');
172
+ // Merge retry config with defaults
173
+ const retryConfig = {
174
+ ...DEFAULT_RETRY_CONFIG,
175
+ ...retry,
176
+ };
177
+ let lastError;
178
+ // Track accumulated usage across retry attempts for accurate cost reporting
179
+ const accumulatedUsage = [];
180
+ for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) {
181
+ // Check for abort before each attempt
182
+ if (abortController?.signal.aborted) {
134
183
  return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
135
184
  }
136
- // Extract usage from the result, regardless of success/error status
137
- const usage = extractUsage(resultMessage);
138
- accumulatedUsage.push(usage);
139
- // Check if the SDK returned an error result (e.g., max turns, budget exceeded)
140
- const isError = resultMessage.is_error || resultMessage.subtype !== 'success';
141
- if (isError) {
142
- // Extract error messages from SDK result
143
- const errorMessages = 'errors' in resultMessage ? resultMessage.errors : [];
144
- // Check if any error indicates authentication failure
145
- for (const err of errorMessages) {
146
- if (isAuthenticationErrorMessage(err)) {
147
- throw new WardenAuthenticationError();
185
+ try {
186
+ const { result: resultMessage, authError } = await executeQuery(systemPrompt, userPrompt, repoPath, options);
187
+ // Check for authentication errors from auth_status messages
188
+ // auth_status errors are always auth-related - throw immediately
189
+ if (authError) {
190
+ throw new WardenAuthenticationError(authError);
191
+ }
192
+ if (!resultMessage) {
193
+ console.error('SDK returned no result');
194
+ return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
195
+ }
196
+ // Extract usage from the result, regardless of success/error status
197
+ const usage = extractUsage(resultMessage);
198
+ accumulatedUsage.push(usage);
199
+ // Check if the SDK returned an error result (e.g., max turns, budget exceeded)
200
+ const isError = resultMessage.is_error || resultMessage.subtype !== 'success';
201
+ if (isError) {
202
+ // Extract error messages from SDK result
203
+ const errorMessages = 'errors' in resultMessage ? resultMessage.errors : [];
204
+ // Check if any error indicates authentication failure
205
+ for (const err of errorMessages) {
206
+ if (isAuthenticationErrorMessage(err)) {
207
+ throw new WardenAuthenticationError();
208
+ }
148
209
  }
210
+ // SDK error - log and return failure with error details
211
+ const errorSummary = errorMessages.length > 0
212
+ ? errorMessages.join('; ')
213
+ : `SDK error: ${resultMessage.subtype}`;
214
+ console.error(`SDK execution failed: ${errorSummary}`);
215
+ return {
216
+ findings: [],
217
+ usage: aggregateUsage(accumulatedUsage),
218
+ failed: true,
219
+ extractionFailed: false,
220
+ };
221
+ }
222
+ const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, apiKey);
223
+ // Emit extraction metrics
224
+ emitExtractionMetrics(skill.name, parseResult.extractionMethod, parseResult.findings.length);
225
+ // Notify about extraction result (debug mode)
226
+ callbacks?.onExtractionResult?.(callbacks.lineRange, parseResult.findings.length, parseResult.extractionMethod);
227
+ // Notify about extraction failure if callback provided
228
+ if (parseResult.extractionFailed) {
229
+ callbacks?.onExtractionFailure?.(callbacks.lineRange, parseResult.extractionError ?? 'unknown_error', parseResult.extractionPreview ?? '');
149
230
  }
150
- // SDK error - log and return failure with error details
151
- const errorSummary = errorMessages.length > 0
152
- ? errorMessages.join('; ')
153
- : `SDK error: ${resultMessage.subtype}`;
154
- console.error(`SDK execution failed: ${errorSummary}`);
231
+ span.setAttribute('hunk.failed', false);
232
+ span.setAttribute('finding.count', parseResult.findings.length);
155
233
  return {
156
- findings: [],
234
+ findings: parseResult.findings,
157
235
  usage: aggregateUsage(accumulatedUsage),
158
- failed: true,
159
- extractionFailed: false,
236
+ failed: false,
237
+ extractionFailed: parseResult.extractionFailed,
238
+ extractionError: parseResult.extractionError,
239
+ extractionPreview: parseResult.extractionPreview,
240
+ auxiliaryUsage: parseResult.extractionUsage
241
+ ? [{ agent: 'extraction', usage: parseResult.extractionUsage }]
242
+ : undefined,
160
243
  };
161
244
  }
162
- const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, apiKey);
163
- // Notify about extraction result (debug mode)
164
- callbacks?.onExtractionResult?.(callbacks.lineRange, parseResult.findings.length, parseResult.extractionMethod);
165
- // Notify about extraction failure if callback provided
166
- if (parseResult.extractionFailed) {
167
- callbacks?.onExtractionFailure?.(callbacks.lineRange, parseResult.extractionError ?? 'unknown_error', parseResult.extractionPreview ?? '');
245
+ catch (error) {
246
+ lastError = error;
247
+ // Re-throw authentication errors (they shouldn't be retried)
248
+ if (error instanceof WardenAuthenticationError) {
249
+ throw error;
250
+ }
251
+ // Authentication errors should surface immediately with helpful guidance
252
+ if (isAuthenticationError(error)) {
253
+ throw new WardenAuthenticationError();
254
+ }
255
+ // Don't retry if not a retryable error or we've exhausted retries
256
+ if (!isRetryableError(error) || attempt >= retryConfig.maxRetries) {
257
+ break;
258
+ }
259
+ // Calculate delay and wait before retry
260
+ const delayMs = calculateRetryDelay(attempt, retryConfig);
261
+ const errorMessage = error instanceof Error ? error.message : String(error);
262
+ Sentry.addBreadcrumb({
263
+ category: 'retry',
264
+ message: `Retrying hunk analysis`,
265
+ data: { attempt: attempt + 1, error: errorMessage, delayMs },
266
+ level: 'warning',
267
+ });
268
+ emitRetryMetric(skill.name, attempt + 1);
269
+ // Notify about retry in verbose mode
270
+ callbacks?.onRetry?.(callbacks.lineRange, attempt + 1, retryConfig.maxRetries, errorMessage, delayMs);
271
+ try {
272
+ await sleep(delayMs, abortController?.signal);
273
+ }
274
+ catch {
275
+ // Aborted during sleep
276
+ return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
277
+ }
168
278
  }
169
- return {
170
- findings: parseResult.findings,
171
- usage: aggregateUsage(accumulatedUsage),
172
- failed: false,
173
- extractionFailed: parseResult.extractionFailed,
174
- extractionError: parseResult.extractionError,
175
- extractionPreview: parseResult.extractionPreview,
176
- auxiliaryUsage: parseResult.extractionUsage
177
- ? [{ agent: 'extraction', usage: parseResult.extractionUsage }]
178
- : undefined,
179
- };
180
279
  }
181
- catch (error) {
182
- lastError = error;
183
- // Re-throw authentication errors (they shouldn't be retried)
184
- if (error instanceof WardenAuthenticationError) {
185
- throw error;
186
- }
187
- // Authentication errors should surface immediately with helpful guidance
188
- if (isAuthenticationError(error)) {
189
- throw new WardenAuthenticationError();
190
- }
191
- // Don't retry if not a retryable error or we've exhausted retries
192
- if (!isRetryableError(error) || attempt >= retryConfig.maxRetries) {
193
- break;
194
- }
195
- // Calculate delay and wait before retry
196
- const delayMs = calculateRetryDelay(attempt, retryConfig);
197
- const errorMessage = error instanceof Error ? error.message : String(error);
198
- // Notify about retry in verbose mode
199
- callbacks?.onRetry?.(callbacks.lineRange, attempt + 1, retryConfig.maxRetries, errorMessage, delayMs);
200
- try {
201
- await sleep(delayMs, abortController?.signal);
202
- }
203
- catch {
204
- // Aborted during sleep
205
- return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
206
- }
280
+ // All attempts failed - return failure with any accumulated usage
281
+ const finalError = lastError instanceof Error ? lastError.message : String(lastError);
282
+ // Log the final error
283
+ if (lastError) {
284
+ console.error(`All retry attempts failed: ${finalError}`);
207
285
  }
208
- }
209
- // All attempts failed - return failure with any accumulated usage
210
- const finalError = lastError instanceof Error ? lastError.message : String(lastError);
211
- // Log the final error
212
- if (lastError) {
213
- console.error(`All retry attempts failed: ${finalError}`);
214
- }
215
- // Also notify via callback if verbose
216
- if (options.verbose) {
217
- callbacks?.onRetry?.(callbacks.lineRange, retryConfig.maxRetries + 1, retryConfig.maxRetries, `Final failure: ${finalError}`, 0);
218
- }
219
- return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
286
+ // Also notify via callback if verbose
287
+ if (options.verbose) {
288
+ callbacks?.onRetry?.(callbacks.lineRange, retryConfig.maxRetries + 1, retryConfig.maxRetries, `Final failure: ${finalError}`, 0);
289
+ }
290
+ span.setAttribute('hunk.failed', true);
291
+ span.setAttribute('finding.count', 0);
292
+ return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
293
+ });
220
294
  }
221
295
  /**
222
296
  * Get line range string for a hunk.
@@ -241,50 +315,62 @@ function attachElapsedTime(findings, skillStartTime) {
241
315
  * Analyze a single prepared file's hunks.
242
316
  */
243
317
  export async function analyzeFile(skill, file, repoPath, options = {}, callbacks, prContext) {
244
- const { abortController } = options;
245
- const fileFindings = [];
246
- const fileUsage = [];
247
- const fileAuxiliaryUsage = [];
248
- let failedHunks = 0;
249
- let failedExtractions = 0;
250
- for (const [hunkIndex, hunk] of file.hunks.entries()) {
251
- if (abortController?.signal.aborted)
252
- break;
253
- const lineRange = getHunkLineRange(hunk);
254
- callbacks?.onHunkStart?.(hunkIndex + 1, file.hunks.length, lineRange);
255
- const hunkCallbacks = callbacks
256
- ? {
257
- lineRange,
258
- onLargePrompt: callbacks.onLargePrompt,
259
- onPromptSize: callbacks.onPromptSize,
260
- onRetry: callbacks.onRetry,
261
- onExtractionFailure: callbacks.onExtractionFailure,
262
- onExtractionResult: callbacks.onExtractionResult,
318
+ return Sentry.startSpan({
319
+ op: 'skill.analyze_file',
320
+ name: `analyze file ${file.filename}`,
321
+ attributes: {
322
+ 'code.filepath': file.filename,
323
+ 'hunk.count': file.hunks.length,
324
+ },
325
+ }, async (span) => {
326
+ const { abortController } = options;
327
+ const fileFindings = [];
328
+ const fileUsage = [];
329
+ const fileAuxiliaryUsage = [];
330
+ let failedHunks = 0;
331
+ let failedExtractions = 0;
332
+ for (const [hunkIndex, hunk] of file.hunks.entries()) {
333
+ if (abortController?.signal.aborted)
334
+ break;
335
+ const lineRange = getHunkLineRange(hunk);
336
+ callbacks?.onHunkStart?.(hunkIndex + 1, file.hunks.length, lineRange);
337
+ const hunkCallbacks = callbacks
338
+ ? {
339
+ lineRange,
340
+ onLargePrompt: callbacks.onLargePrompt,
341
+ onPromptSize: callbacks.onPromptSize,
342
+ onRetry: callbacks.onRetry,
343
+ onExtractionFailure: callbacks.onExtractionFailure,
344
+ onExtractionResult: callbacks.onExtractionResult,
345
+ }
346
+ : undefined;
347
+ const result = await analyzeHunk(skill, hunk, repoPath, options, hunkCallbacks, prContext);
348
+ if (result.failed) {
349
+ failedHunks++;
350
+ }
351
+ if (result.extractionFailed) {
352
+ failedExtractions++;
353
+ }
354
+ attachElapsedTime(result.findings, callbacks?.skillStartTime);
355
+ callbacks?.onHunkComplete?.(hunkIndex + 1, result.findings);
356
+ fileFindings.push(...result.findings);
357
+ fileUsage.push(result.usage);
358
+ if (result.auxiliaryUsage) {
359
+ fileAuxiliaryUsage.push(...result.auxiliaryUsage);
263
360
  }
264
- : undefined;
265
- const result = await analyzeHunk(skill, hunk, repoPath, options, hunkCallbacks, prContext);
266
- if (result.failed) {
267
- failedHunks++;
268
- }
269
- if (result.extractionFailed) {
270
- failedExtractions++;
271
- }
272
- attachElapsedTime(result.findings, callbacks?.skillStartTime);
273
- callbacks?.onHunkComplete?.(hunkIndex + 1, result.findings);
274
- fileFindings.push(...result.findings);
275
- fileUsage.push(result.usage);
276
- if (result.auxiliaryUsage) {
277
- fileAuxiliaryUsage.push(...result.auxiliaryUsage);
278
361
  }
279
- }
280
- return {
281
- filename: file.filename,
282
- findings: fileFindings,
283
- usage: aggregateUsage(fileUsage),
284
- failedHunks,
285
- failedExtractions,
286
- auxiliaryUsage: fileAuxiliaryUsage.length > 0 ? fileAuxiliaryUsage : undefined,
287
- };
362
+ span.setAttribute('finding.count', fileFindings.length);
363
+ span.setAttribute('hunk.failed_count', failedHunks);
364
+ span.setAttribute('extraction.failed_count', failedExtractions);
365
+ return {
366
+ filename: file.filename,
367
+ findings: fileFindings,
368
+ usage: aggregateUsage(fileUsage),
369
+ failedHunks,
370
+ failedExtractions,
371
+ auxiliaryUsage: fileAuxiliaryUsage.length > 0 ? fileAuxiliaryUsage : undefined,
372
+ };
373
+ });
288
374
  }
289
375
  /**
290
376
  * Generate a summary of findings.
@@ -408,21 +494,16 @@ export async function runSkill(skill, context, options = {}) {
408
494
  const fileResults = [];
409
495
  // Process files - parallel or sequential based on options
410
496
  if (parallel) {
411
- // Process files in parallel with concurrency limit
497
+ // Process files with sliding-window concurrency pool
412
498
  const fileConcurrency = options.concurrency ?? DEFAULT_FILE_CONCURRENCY;
413
499
  const batchDelayMs = options.batchDelayMs ?? 0;
414
- for (let i = 0; i < fileHunks.length; i += fileConcurrency) {
415
- // Check for abort before starting new batch
416
- if (abortController?.signal.aborted)
417
- break;
418
- // Apply rate limiting delay between batches (not before the first batch)
419
- if (i > 0 && batchDelayMs > 0) {
500
+ fileResults.push(...await runPool(fileHunks, fileConcurrency, async (fileHunkEntry, index) => {
501
+ // Rate-limit: delay items beyond the first concurrent wave
502
+ if (index >= fileConcurrency && batchDelayMs > 0) {
420
503
  await new Promise((resolve) => setTimeout(resolve, batchDelayMs));
421
504
  }
422
- const batch = fileHunks.slice(i, i + fileConcurrency);
423
- const batchResults = await Promise.all(batch.map((fileHunkEntry, batchIndex) => processFileWithTiming(fileHunkEntry, i + batchIndex)));
424
- fileResults.push(...batchResults);
425
- }
505
+ return processFileWithTiming(fileHunkEntry, index);
506
+ }, { shouldAbort: () => abortController?.signal.aborted ?? false }));
426
507
  }
427
508
  else {
428
509
  // Process files sequentially
@@ -451,6 +532,7 @@ export async function runSkill(skill, context, options = {}) {
451
532
  }
452
533
  // Deduplicate findings
453
534
  const uniqueFindings = deduplicateFindings(allFindings);
535
+ emitDedupMetrics(allFindings.length, uniqueFindings.length);
454
536
  // Generate summary
455
537
  const summary = generateSummary(skill.name, uniqueFindings);
456
538
  // Aggregate usage across all hunks