@sentry/warden 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +1 -1
  2. package/dist/cli/commands/add.js +1 -1
  3. package/dist/cli/commands/add.js.map +1 -1
  4. package/dist/cli/commands/init.d.ts.map +1 -1
  5. package/dist/cli/commands/init.js +5 -2
  6. package/dist/cli/commands/init.js.map +1 -1
  7. package/dist/cli/commands/setup-app/browser.d.ts +1 -0
  8. package/dist/cli/commands/setup-app/browser.d.ts.map +1 -1
  9. package/dist/cli/commands/setup-app/browser.js +10 -5
  10. package/dist/cli/commands/setup-app/browser.js.map +1 -1
  11. package/dist/cli/git.js +24 -24
  12. package/dist/cli/git.js.map +1 -1
  13. package/dist/cli/index.js +5 -1
  14. package/dist/cli/index.js.map +1 -1
  15. package/dist/cli/main.d.ts.map +1 -1
  16. package/dist/cli/main.js +29 -27
  17. package/dist/cli/main.js.map +1 -1
  18. package/dist/cli/output/ink-runner.d.ts.map +1 -1
  19. package/dist/cli/output/ink-runner.js +5 -7
  20. package/dist/cli/output/ink-runner.js.map +1 -1
  21. package/dist/cli/output/tasks.d.ts +1 -1
  22. package/dist/cli/output/tasks.d.ts.map +1 -1
  23. package/dist/cli/output/tasks.js +194 -161
  24. package/dist/cli/output/tasks.js.map +1 -1
  25. package/dist/config/loader.d.ts +4 -0
  26. package/dist/config/loader.d.ts.map +1 -1
  27. package/dist/config/loader.js +41 -34
  28. package/dist/config/loader.js.map +1 -1
  29. package/dist/config/schema.d.ts +14 -0
  30. package/dist/config/schema.d.ts.map +1 -1
  31. package/dist/config/schema.js +12 -0
  32. package/dist/config/schema.js.map +1 -1
  33. package/dist/diff/context.d.ts.map +1 -1
  34. package/dist/diff/context.js +5 -1
  35. package/dist/diff/context.js.map +1 -1
  36. package/dist/index.d.ts +2 -1
  37. package/dist/index.d.ts.map +1 -1
  38. package/dist/index.js +1 -1
  39. package/dist/index.js.map +1 -1
  40. package/dist/output/dedup.d.ts +22 -0
  41. package/dist/output/dedup.d.ts.map +1 -1
  42. package/dist/output/dedup.js +180 -0
  43. package/dist/output/dedup.js.map +1 -1
  44. package/dist/output/github-checks.d.ts +3 -1
  45. package/dist/output/github-checks.d.ts.map +1 -1
  46. package/dist/output/github-checks.js +3 -3
  47. package/dist/output/github-checks.js.map +1 -1
  48. package/dist/output/github-issues.d.ts.map +1 -1
  49. package/dist/output/github-issues.js +8 -2
  50. package/dist/output/github-issues.js.map +1 -1
  51. package/dist/output/renderer.d.ts +3 -1
  52. package/dist/output/renderer.d.ts.map +1 -1
  53. package/dist/output/renderer.js +39 -9
  54. package/dist/output/renderer.js.map +1 -1
  55. package/dist/output/stale.d.ts +6 -2
  56. package/dist/output/stale.d.ts.map +1 -1
  57. package/dist/output/stale.js +4 -4
  58. package/dist/output/stale.js.map +1 -1
  59. package/dist/output/types.d.ts +2 -0
  60. package/dist/output/types.d.ts.map +1 -1
  61. package/dist/sdk/analyze.d.ts.map +1 -1
  62. package/dist/sdk/analyze.js +294 -205
  63. package/dist/sdk/analyze.js.map +1 -1
  64. package/dist/sentry.d.ts +17 -0
  65. package/dist/sentry.d.ts.map +1 -0
  66. package/dist/sentry.js +119 -0
  67. package/dist/sentry.js.map +1 -0
  68. package/dist/skills/index.d.ts +4 -4
  69. package/dist/skills/index.d.ts.map +1 -1
  70. package/dist/skills/index.js +2 -2
  71. package/dist/skills/index.js.map +1 -1
  72. package/dist/skills/loader.d.ts +48 -6
  73. package/dist/skills/loader.d.ts.map +1 -1
  74. package/dist/skills/loader.js +134 -57
  75. package/dist/skills/loader.js.map +1 -1
  76. package/dist/skills/remote.d.ts +12 -0
  77. package/dist/skills/remote.d.ts.map +1 -1
  78. package/dist/skills/remote.js +81 -32
  79. package/dist/skills/remote.js.map +1 -1
  80. package/dist/utils/async.d.ts +14 -1
  81. package/dist/utils/async.d.ts.map +1 -1
  82. package/dist/utils/async.js +29 -7
  83. package/dist/utils/async.js.map +1 -1
  84. package/dist/utils/index.d.ts +1 -1
  85. package/dist/utils/index.d.ts.map +1 -1
  86. package/dist/utils/index.js +1 -1
  87. package/dist/utils/index.js.map +1 -1
  88. package/package.json +3 -2
  89. package/plugins/warden/skills/warden/references/creating-skills.md +2 -3
@@ -1,4 +1,5 @@
1
1
  import { query } from '@anthropic-ai/claude-agent-sdk';
2
+ import { Sentry, emitExtractionMetrics, emitRetryMetric, emitDedupMetrics } from '../sentry.js';
2
3
  import { SkillRunnerError, WardenAuthenticationError, isRetryableError, isAuthenticationError, isAuthenticationErrorMessage } from './errors.js';
3
4
  import { DEFAULT_RETRY_CONFIG, calculateRetryDelay, sleep } from './retry.js';
4
5
  import { extractUsage, aggregateUsage, emptyUsage, estimateTokens, aggregateAuxiliaryUsage } from './usage.js';
@@ -6,6 +7,7 @@ import { buildHunkSystemPrompt, buildHunkUserPrompt } from './prompt.js';
6
7
  import { extractFindingsJson, extractFindingsWithLLM, validateFindings, deduplicateFindings } from './extract.js';
7
8
  import { LARGE_PROMPT_THRESHOLD_CHARS, DEFAULT_FILE_CONCURRENCY, } from './types.js';
8
9
  import { prepareFiles } from './prepare.js';
10
+ import { runPool } from '../utils/index.js';
9
11
  /**
10
12
  * Parse findings from a hunk analysis result.
11
13
  * Uses a two-tier extraction strategy:
@@ -43,180 +45,259 @@ async function parseHunkOutput(result, filename, apiKey) {
43
45
  */
44
46
  async function executeQuery(systemPrompt, userPrompt, repoPath, options) {
45
47
  const { maxTurns = 50, model, abortController, pathToClaudeCodeExecutable } = options;
46
- // Capture stderr output for better error diagnostics
47
- const stderrChunks = [];
48
- const stream = query({
49
- prompt: userPrompt,
50
- options: {
51
- maxTurns,
52
- cwd: repoPath,
53
- systemPrompt,
54
- // Only allow read-only tools - context is already provided in the prompt
55
- allowedTools: ['Read', 'Grep'],
56
- // Explicitly block modification/side-effect tools as defense-in-depth
57
- disallowedTools: ['Write', 'Edit', 'Bash', 'WebFetch', 'WebSearch', 'Task', 'TodoWrite'],
58
- permissionMode: 'bypassPermissions',
59
- model,
60
- abortController,
61
- pathToClaudeCodeExecutable,
62
- stderr: (data) => {
63
- stderrChunks.push(data);
64
- },
48
+ const modelId = model ?? 'unknown';
49
+ return Sentry.startSpan({
50
+ op: 'gen_ai.invoke_agent',
51
+ name: `invoke_agent ${modelId}`,
52
+ attributes: {
53
+ 'gen_ai.operation.name': 'invoke_agent',
54
+ 'gen_ai.system': 'anthropic',
55
+ 'gen_ai.provider.name': 'anthropic',
56
+ 'gen_ai.agent.name': modelId,
57
+ 'gen_ai.request.model': modelId,
58
+ 'gen_ai.request.max_turns': maxTurns,
65
59
  },
66
- });
67
- let resultMessage;
68
- let authError;
69
- try {
70
- for await (const message of stream) {
71
- if (message.type === 'result') {
72
- resultMessage = message;
60
+ }, async (span) => {
61
+ // Capture stderr output for better error diagnostics
62
+ const stderrChunks = [];
63
+ const stream = query({
64
+ prompt: userPrompt,
65
+ options: {
66
+ maxTurns,
67
+ cwd: repoPath,
68
+ systemPrompt,
69
+ // Only allow read-only tools - context is already provided in the prompt
70
+ allowedTools: ['Read', 'Grep'],
71
+ // Explicitly block modification/side-effect tools as defense-in-depth
72
+ disallowedTools: ['Write', 'Edit', 'Bash', 'WebFetch', 'WebSearch', 'Task', 'TodoWrite'],
73
+ permissionMode: 'bypassPermissions',
74
+ model,
75
+ abortController,
76
+ pathToClaudeCodeExecutable,
77
+ stderr: (data) => {
78
+ stderrChunks.push(data);
79
+ },
80
+ },
81
+ });
82
+ let resultMessage;
83
+ let authError;
84
+ try {
85
+ for await (const message of stream) {
86
+ if (message.type === 'result') {
87
+ resultMessage = message;
88
+ }
89
+ else if (message.type === 'auth_status' && message.error) {
90
+ // Capture authentication errors from auth_status messages
91
+ authError = message.error;
92
+ }
73
93
  }
74
- else if (message.type === 'auth_status' && message.error) {
75
- // Capture authentication errors from auth_status messages
76
- authError = message.error;
94
+ }
95
+ catch (error) {
96
+ // Re-throw with stderr info if available
97
+ const stderr = stderrChunks.join('').trim();
98
+ if (stderr) {
99
+ const originalMessage = error instanceof Error ? error.message : String(error);
100
+ const enhancedError = new Error(`${originalMessage}\nClaude Code stderr: ${stderr}`);
101
+ enhancedError.cause = error;
102
+ throw enhancedError;
77
103
  }
104
+ throw error;
78
105
  }
79
- }
80
- catch (error) {
81
- // Re-throw with stderr info if available
82
- const stderr = stderrChunks.join('').trim();
83
- if (stderr) {
84
- const originalMessage = error instanceof Error ? error.message : String(error);
85
- const enhancedError = new Error(`${originalMessage}\nClaude Code stderr: ${stderr}`);
86
- enhancedError.cause = error;
87
- throw enhancedError;
106
+ // Set response attributes from SDK result
107
+ if (resultMessage) {
108
+ const usage = resultMessage.usage;
109
+ if (usage) {
110
+ const inputTokens = usage.input_tokens ?? 0;
111
+ const outputTokens = usage.output_tokens ?? 0;
112
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
113
+ const cacheWrite = usage.cache_creation_input_tokens ?? 0;
114
+ // Anthropic API's input_tokens is only the non-cached portion.
115
+ // OpenTelemetry gen_ai.usage.input_tokens expects the total input tokens.
116
+ const totalInputTokens = inputTokens + cacheRead + cacheWrite;
117
+ span.setAttribute('gen_ai.usage.input_tokens', totalInputTokens);
118
+ span.setAttribute('gen_ai.usage.output_tokens', outputTokens);
119
+ span.setAttribute('gen_ai.usage.input_tokens.cached', cacheRead);
120
+ span.setAttribute('gen_ai.usage.input_tokens.cache_write', cacheWrite);
121
+ span.setAttribute('gen_ai.usage.total_tokens', totalInputTokens + outputTokens);
122
+ }
123
+ if (resultMessage.total_cost_usd !== undefined) {
124
+ span.setAttribute('gen_ai.cost.total_tokens', resultMessage.total_cost_usd);
125
+ }
126
+ if (resultMessage.uuid) {
127
+ span.setAttribute('gen_ai.response.id', resultMessage.uuid);
128
+ }
129
+ if (resultMessage.modelUsage) {
130
+ const models = Object.keys(resultMessage.modelUsage);
131
+ if (models[0]) {
132
+ span.setAttribute('gen_ai.response.model', models[0]);
133
+ }
134
+ }
135
+ // Optional SDK metadata attributes
136
+ const optionalAttrs = {
137
+ 'sdk.session_id': resultMessage.session_id,
138
+ 'sdk.duration_ms': resultMessage.duration_ms,
139
+ 'sdk.duration_api_ms': resultMessage.duration_api_ms,
140
+ 'sdk.num_turns': resultMessage.num_turns,
141
+ };
142
+ for (const [key, value] of Object.entries(optionalAttrs)) {
143
+ if (value !== undefined) {
144
+ span.setAttribute(key, value);
145
+ }
146
+ }
88
147
  }
89
- throw error;
90
- }
91
- const stderr = stderrChunks.join('').trim() || undefined;
92
- return { result: resultMessage, authError, stderr };
148
+ const stderr = stderrChunks.join('').trim() || undefined;
149
+ return { result: resultMessage, authError, stderr };
150
+ });
93
151
  }
94
152
  /**
95
153
  * Analyze a single hunk with retry logic for transient failures.
96
154
  */
97
155
  async function analyzeHunk(skill, hunkCtx, repoPath, options, callbacks, prContext) {
98
- const { apiKey, abortController, retry } = options;
99
- const systemPrompt = buildHunkSystemPrompt(skill);
100
- const userPrompt = buildHunkUserPrompt(skill, hunkCtx, prContext);
101
- // Report prompt size information
102
- const systemChars = systemPrompt.length;
103
- const userChars = userPrompt.length;
104
- const totalChars = systemChars + userChars;
105
- const estimatedTokensCount = estimateTokens(totalChars);
106
- // Always call onPromptSize if provided (for debug mode)
107
- callbacks?.onPromptSize?.(callbacks.lineRange, systemChars, userChars, totalChars, estimatedTokensCount);
108
- // Warn about large prompts
109
- if (totalChars > LARGE_PROMPT_THRESHOLD_CHARS) {
110
- callbacks?.onLargePrompt?.(callbacks.lineRange, totalChars, estimatedTokensCount);
111
- }
112
- // Merge retry config with defaults
113
- const retryConfig = {
114
- ...DEFAULT_RETRY_CONFIG,
115
- ...retry,
116
- };
117
- let lastError;
118
- // Track accumulated usage across retry attempts for accurate cost reporting
119
- const accumulatedUsage = [];
120
- for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) {
121
- // Check for abort before each attempt
122
- if (abortController?.signal.aborted) {
123
- return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
156
+ const lineRange = callbacks?.lineRange ?? getHunkLineRange(hunkCtx);
157
+ return Sentry.startSpan({
158
+ op: 'skill.analyze_hunk',
159
+ name: `analyze hunk ${hunkCtx.filename}:${lineRange}`,
160
+ attributes: {
161
+ 'code.filepath': hunkCtx.filename,
162
+ 'hunk.line_range': lineRange,
163
+ },
164
+ }, async (span) => {
165
+ const { apiKey, abortController, retry } = options;
166
+ const systemPrompt = buildHunkSystemPrompt(skill);
167
+ const userPrompt = buildHunkUserPrompt(skill, hunkCtx, prContext);
168
+ // Report prompt size information
169
+ const systemChars = systemPrompt.length;
170
+ const userChars = userPrompt.length;
171
+ const totalChars = systemChars + userChars;
172
+ const estimatedTokensCount = estimateTokens(totalChars);
173
+ // Always call onPromptSize if provided (for debug mode)
174
+ callbacks?.onPromptSize?.(callbacks.lineRange, systemChars, userChars, totalChars, estimatedTokensCount);
175
+ // Warn about large prompts
176
+ if (totalChars > LARGE_PROMPT_THRESHOLD_CHARS) {
177
+ callbacks?.onLargePrompt?.(callbacks.lineRange, totalChars, estimatedTokensCount);
124
178
  }
125
- try {
126
- const { result: resultMessage, authError } = await executeQuery(systemPrompt, userPrompt, repoPath, options);
127
- // Check for authentication errors from auth_status messages
128
- // auth_status errors are always auth-related - throw immediately
129
- if (authError) {
130
- throw new WardenAuthenticationError(authError);
131
- }
132
- if (!resultMessage) {
133
- console.error('SDK returned no result');
179
+ // Merge retry config with defaults
180
+ const retryConfig = {
181
+ ...DEFAULT_RETRY_CONFIG,
182
+ ...retry,
183
+ };
184
+ let lastError;
185
+ // Track accumulated usage across retry attempts for accurate cost reporting
186
+ const accumulatedUsage = [];
187
+ for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) {
188
+ // Check for abort before each attempt
189
+ if (abortController?.signal.aborted) {
134
190
  return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
135
191
  }
136
- // Extract usage from the result, regardless of success/error status
137
- const usage = extractUsage(resultMessage);
138
- accumulatedUsage.push(usage);
139
- // Check if the SDK returned an error result (e.g., max turns, budget exceeded)
140
- const isError = resultMessage.is_error || resultMessage.subtype !== 'success';
141
- if (isError) {
142
- // Extract error messages from SDK result
143
- const errorMessages = 'errors' in resultMessage ? resultMessage.errors : [];
144
- // Check if any error indicates authentication failure
145
- for (const err of errorMessages) {
146
- if (isAuthenticationErrorMessage(err)) {
147
- throw new WardenAuthenticationError();
192
+ try {
193
+ const { result: resultMessage, authError } = await executeQuery(systemPrompt, userPrompt, repoPath, options);
194
+ // Check for authentication errors from auth_status messages
195
+ // auth_status errors are always auth-related - throw immediately
196
+ if (authError) {
197
+ throw new WardenAuthenticationError(authError);
198
+ }
199
+ if (!resultMessage) {
200
+ console.error('SDK returned no result');
201
+ return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
202
+ }
203
+ // Extract usage from the result, regardless of success/error status
204
+ const usage = extractUsage(resultMessage);
205
+ accumulatedUsage.push(usage);
206
+ // Check if the SDK returned an error result (e.g., max turns, budget exceeded)
207
+ const isError = resultMessage.is_error || resultMessage.subtype !== 'success';
208
+ if (isError) {
209
+ // Extract error messages from SDK result
210
+ const errorMessages = 'errors' in resultMessage ? resultMessage.errors : [];
211
+ // Check if any error indicates authentication failure
212
+ for (const err of errorMessages) {
213
+ if (isAuthenticationErrorMessage(err)) {
214
+ throw new WardenAuthenticationError();
215
+ }
148
216
  }
217
+ // SDK error - log and return failure with error details
218
+ const errorSummary = errorMessages.length > 0
219
+ ? errorMessages.join('; ')
220
+ : `SDK error: ${resultMessage.subtype}`;
221
+ console.error(`SDK execution failed: ${errorSummary}`);
222
+ return {
223
+ findings: [],
224
+ usage: aggregateUsage(accumulatedUsage),
225
+ failed: true,
226
+ extractionFailed: false,
227
+ };
228
+ }
229
+ const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, apiKey);
230
+ // Emit extraction metrics
231
+ emitExtractionMetrics(skill.name, parseResult.extractionMethod, parseResult.findings.length);
232
+ // Notify about extraction result (debug mode)
233
+ callbacks?.onExtractionResult?.(callbacks.lineRange, parseResult.findings.length, parseResult.extractionMethod);
234
+ // Notify about extraction failure if callback provided
235
+ if (parseResult.extractionFailed) {
236
+ callbacks?.onExtractionFailure?.(callbacks.lineRange, parseResult.extractionError ?? 'unknown_error', parseResult.extractionPreview ?? '');
149
237
  }
150
- // SDK error - log and return failure with error details
151
- const errorSummary = errorMessages.length > 0
152
- ? errorMessages.join('; ')
153
- : `SDK error: ${resultMessage.subtype}`;
154
- console.error(`SDK execution failed: ${errorSummary}`);
238
+ span.setAttribute('hunk.failed', false);
239
+ span.setAttribute('finding.count', parseResult.findings.length);
155
240
  return {
156
- findings: [],
241
+ findings: parseResult.findings,
157
242
  usage: aggregateUsage(accumulatedUsage),
158
- failed: true,
159
- extractionFailed: false,
243
+ failed: false,
244
+ extractionFailed: parseResult.extractionFailed,
245
+ extractionError: parseResult.extractionError,
246
+ extractionPreview: parseResult.extractionPreview,
247
+ auxiliaryUsage: parseResult.extractionUsage
248
+ ? [{ agent: 'extraction', usage: parseResult.extractionUsage }]
249
+ : undefined,
160
250
  };
161
251
  }
162
- const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, apiKey);
163
- // Notify about extraction result (debug mode)
164
- callbacks?.onExtractionResult?.(callbacks.lineRange, parseResult.findings.length, parseResult.extractionMethod);
165
- // Notify about extraction failure if callback provided
166
- if (parseResult.extractionFailed) {
167
- callbacks?.onExtractionFailure?.(callbacks.lineRange, parseResult.extractionError ?? 'unknown_error', parseResult.extractionPreview ?? '');
252
+ catch (error) {
253
+ lastError = error;
254
+ // Re-throw authentication errors (they shouldn't be retried)
255
+ if (error instanceof WardenAuthenticationError) {
256
+ throw error;
257
+ }
258
+ // Authentication errors should surface immediately with helpful guidance
259
+ if (isAuthenticationError(error)) {
260
+ throw new WardenAuthenticationError();
261
+ }
262
+ // Don't retry if not a retryable error or we've exhausted retries
263
+ if (!isRetryableError(error) || attempt >= retryConfig.maxRetries) {
264
+ break;
265
+ }
266
+ // Calculate delay and wait before retry
267
+ const delayMs = calculateRetryDelay(attempt, retryConfig);
268
+ const errorMessage = error instanceof Error ? error.message : String(error);
269
+ Sentry.addBreadcrumb({
270
+ category: 'retry',
271
+ message: `Retrying hunk analysis`,
272
+ data: { attempt: attempt + 1, error: errorMessage, delayMs },
273
+ level: 'warning',
274
+ });
275
+ emitRetryMetric(skill.name, attempt + 1);
276
+ // Notify about retry in verbose mode
277
+ callbacks?.onRetry?.(callbacks.lineRange, attempt + 1, retryConfig.maxRetries, errorMessage, delayMs);
278
+ try {
279
+ await sleep(delayMs, abortController?.signal);
280
+ }
281
+ catch {
282
+ // Aborted during sleep
283
+ return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
284
+ }
168
285
  }
169
- return {
170
- findings: parseResult.findings,
171
- usage: aggregateUsage(accumulatedUsage),
172
- failed: false,
173
- extractionFailed: parseResult.extractionFailed,
174
- extractionError: parseResult.extractionError,
175
- extractionPreview: parseResult.extractionPreview,
176
- auxiliaryUsage: parseResult.extractionUsage
177
- ? [{ agent: 'extraction', usage: parseResult.extractionUsage }]
178
- : undefined,
179
- };
180
286
  }
181
- catch (error) {
182
- lastError = error;
183
- // Re-throw authentication errors (they shouldn't be retried)
184
- if (error instanceof WardenAuthenticationError) {
185
- throw error;
186
- }
187
- // Authentication errors should surface immediately with helpful guidance
188
- if (isAuthenticationError(error)) {
189
- throw new WardenAuthenticationError();
190
- }
191
- // Don't retry if not a retryable error or we've exhausted retries
192
- if (!isRetryableError(error) || attempt >= retryConfig.maxRetries) {
193
- break;
194
- }
195
- // Calculate delay and wait before retry
196
- const delayMs = calculateRetryDelay(attempt, retryConfig);
197
- const errorMessage = error instanceof Error ? error.message : String(error);
198
- // Notify about retry in verbose mode
199
- callbacks?.onRetry?.(callbacks.lineRange, attempt + 1, retryConfig.maxRetries, errorMessage, delayMs);
200
- try {
201
- await sleep(delayMs, abortController?.signal);
202
- }
203
- catch {
204
- // Aborted during sleep
205
- return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
206
- }
287
+ // All attempts failed - return failure with any accumulated usage
288
+ const finalError = lastError instanceof Error ? lastError.message : String(lastError);
289
+ // Log the final error
290
+ if (lastError) {
291
+ console.error(`All retry attempts failed: ${finalError}`);
207
292
  }
208
- }
209
- // All attempts failed - return failure with any accumulated usage
210
- const finalError = lastError instanceof Error ? lastError.message : String(lastError);
211
- // Log the final error
212
- if (lastError) {
213
- console.error(`All retry attempts failed: ${finalError}`);
214
- }
215
- // Also notify via callback if verbose
216
- if (options.verbose) {
217
- callbacks?.onRetry?.(callbacks.lineRange, retryConfig.maxRetries + 1, retryConfig.maxRetries, `Final failure: ${finalError}`, 0);
218
- }
219
- return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
293
+ // Also notify via callback if verbose
294
+ if (options.verbose) {
295
+ callbacks?.onRetry?.(callbacks.lineRange, retryConfig.maxRetries + 1, retryConfig.maxRetries, `Final failure: ${finalError}`, 0);
296
+ }
297
+ span.setAttribute('hunk.failed', true);
298
+ span.setAttribute('finding.count', 0);
299
+ return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false };
300
+ });
220
301
  }
221
302
  /**
222
303
  * Get line range string for a hunk.
@@ -241,50 +322,62 @@ function attachElapsedTime(findings, skillStartTime) {
241
322
  * Analyze a single prepared file's hunks.
242
323
  */
243
324
  export async function analyzeFile(skill, file, repoPath, options = {}, callbacks, prContext) {
244
- const { abortController } = options;
245
- const fileFindings = [];
246
- const fileUsage = [];
247
- const fileAuxiliaryUsage = [];
248
- let failedHunks = 0;
249
- let failedExtractions = 0;
250
- for (const [hunkIndex, hunk] of file.hunks.entries()) {
251
- if (abortController?.signal.aborted)
252
- break;
253
- const lineRange = getHunkLineRange(hunk);
254
- callbacks?.onHunkStart?.(hunkIndex + 1, file.hunks.length, lineRange);
255
- const hunkCallbacks = callbacks
256
- ? {
257
- lineRange,
258
- onLargePrompt: callbacks.onLargePrompt,
259
- onPromptSize: callbacks.onPromptSize,
260
- onRetry: callbacks.onRetry,
261
- onExtractionFailure: callbacks.onExtractionFailure,
262
- onExtractionResult: callbacks.onExtractionResult,
325
+ return Sentry.startSpan({
326
+ op: 'skill.analyze_file',
327
+ name: `analyze file ${file.filename}`,
328
+ attributes: {
329
+ 'code.filepath': file.filename,
330
+ 'hunk.count': file.hunks.length,
331
+ },
332
+ }, async (span) => {
333
+ const { abortController } = options;
334
+ const fileFindings = [];
335
+ const fileUsage = [];
336
+ const fileAuxiliaryUsage = [];
337
+ let failedHunks = 0;
338
+ let failedExtractions = 0;
339
+ for (const [hunkIndex, hunk] of file.hunks.entries()) {
340
+ if (abortController?.signal.aborted)
341
+ break;
342
+ const lineRange = getHunkLineRange(hunk);
343
+ callbacks?.onHunkStart?.(hunkIndex + 1, file.hunks.length, lineRange);
344
+ const hunkCallbacks = callbacks
345
+ ? {
346
+ lineRange,
347
+ onLargePrompt: callbacks.onLargePrompt,
348
+ onPromptSize: callbacks.onPromptSize,
349
+ onRetry: callbacks.onRetry,
350
+ onExtractionFailure: callbacks.onExtractionFailure,
351
+ onExtractionResult: callbacks.onExtractionResult,
352
+ }
353
+ : undefined;
354
+ const result = await analyzeHunk(skill, hunk, repoPath, options, hunkCallbacks, prContext);
355
+ if (result.failed) {
356
+ failedHunks++;
357
+ }
358
+ if (result.extractionFailed) {
359
+ failedExtractions++;
360
+ }
361
+ attachElapsedTime(result.findings, callbacks?.skillStartTime);
362
+ callbacks?.onHunkComplete?.(hunkIndex + 1, result.findings);
363
+ fileFindings.push(...result.findings);
364
+ fileUsage.push(result.usage);
365
+ if (result.auxiliaryUsage) {
366
+ fileAuxiliaryUsage.push(...result.auxiliaryUsage);
263
367
  }
264
- : undefined;
265
- const result = await analyzeHunk(skill, hunk, repoPath, options, hunkCallbacks, prContext);
266
- if (result.failed) {
267
- failedHunks++;
268
- }
269
- if (result.extractionFailed) {
270
- failedExtractions++;
271
- }
272
- attachElapsedTime(result.findings, callbacks?.skillStartTime);
273
- callbacks?.onHunkComplete?.(hunkIndex + 1, result.findings);
274
- fileFindings.push(...result.findings);
275
- fileUsage.push(result.usage);
276
- if (result.auxiliaryUsage) {
277
- fileAuxiliaryUsage.push(...result.auxiliaryUsage);
278
368
  }
279
- }
280
- return {
281
- filename: file.filename,
282
- findings: fileFindings,
283
- usage: aggregateUsage(fileUsage),
284
- failedHunks,
285
- failedExtractions,
286
- auxiliaryUsage: fileAuxiliaryUsage.length > 0 ? fileAuxiliaryUsage : undefined,
287
- };
369
+ span.setAttribute('finding.count', fileFindings.length);
370
+ span.setAttribute('hunk.failed_count', failedHunks);
371
+ span.setAttribute('extraction.failed_count', failedExtractions);
372
+ return {
373
+ filename: file.filename,
374
+ findings: fileFindings,
375
+ usage: aggregateUsage(fileUsage),
376
+ failedHunks,
377
+ failedExtractions,
378
+ auxiliaryUsage: fileAuxiliaryUsage.length > 0 ? fileAuxiliaryUsage : undefined,
379
+ };
380
+ });
288
381
  }
289
382
  /**
290
383
  * Generate a summary of findings.
@@ -408,21 +501,16 @@ export async function runSkill(skill, context, options = {}) {
408
501
  const fileResults = [];
409
502
  // Process files - parallel or sequential based on options
410
503
  if (parallel) {
411
- // Process files in parallel with concurrency limit
504
+ // Process files with sliding-window concurrency pool
412
505
  const fileConcurrency = options.concurrency ?? DEFAULT_FILE_CONCURRENCY;
413
506
  const batchDelayMs = options.batchDelayMs ?? 0;
414
- for (let i = 0; i < fileHunks.length; i += fileConcurrency) {
415
- // Check for abort before starting new batch
416
- if (abortController?.signal.aborted)
417
- break;
418
- // Apply rate limiting delay between batches (not before the first batch)
419
- if (i > 0 && batchDelayMs > 0) {
507
+ fileResults.push(...await runPool(fileHunks, fileConcurrency, async (fileHunkEntry, index) => {
508
+ // Rate-limit: delay items beyond the first concurrent wave
509
+ if (index >= fileConcurrency && batchDelayMs > 0) {
420
510
  await new Promise((resolve) => setTimeout(resolve, batchDelayMs));
421
511
  }
422
- const batch = fileHunks.slice(i, i + fileConcurrency);
423
- const batchResults = await Promise.all(batch.map((fileHunkEntry, batchIndex) => processFileWithTiming(fileHunkEntry, i + batchIndex)));
424
- fileResults.push(...batchResults);
425
- }
512
+ return processFileWithTiming(fileHunkEntry, index);
513
+ }, { shouldAbort: () => abortController?.signal.aborted ?? false }));
426
514
  }
427
515
  else {
428
516
  // Process files sequentially
@@ -451,6 +539,7 @@ export async function runSkill(skill, context, options = {}) {
451
539
  }
452
540
  // Deduplicate findings
453
541
  const uniqueFindings = deduplicateFindings(allFindings);
542
+ emitDedupMetrics(allFindings.length, uniqueFindings.length);
454
543
  // Generate summary
455
544
  const summary = generateSummary(skill.name, uniqueFindings);
456
545
  // Aggregate usage across all hunks