@compilr-dev/agents 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.js CHANGED
@@ -2499,6 +2499,13 @@ export class Agent {
2499
2499
  // Capture usage data from done chunk
2500
2500
  usage = chunk.usage;
2501
2501
  model = chunk.model;
2502
+ // Handle special stop reasons (Claude 4.5+)
2503
+ if (chunk.stopReason === 'refusal') {
2504
+ text += '\n\n[Model refused this request]';
2505
+ }
2506
+ else if (chunk.stopReason === 'context_window_exceeded') {
2507
+ text += '\n\n[Response truncated — context window limit reached]';
2508
+ }
2502
2509
  break;
2503
2510
  }
2504
2511
  }
@@ -87,6 +87,10 @@ export declare class ClaudeProvider implements LLMProvider {
87
87
  * Count tokens in messages using tiktoken (cl100k_base encoding)
88
88
  */
89
89
  countTokens(messages: Message[]): Promise<number>;
90
+ /**
91
+ * Check if a model is Claude 4+ (token-efficient tools are built-in, no header needed).
92
+ */
93
+ private isClaude4Plus;
90
94
  /**
91
95
  * Build request options with optional abort signal and beta headers.
92
96
  * Combines multiple beta features (comma-separated per Anthropic API spec).
@@ -105,7 +109,10 @@ export declare class ClaudeProvider implements LLMProvider {
105
109
  */
106
110
  private convertTools;
107
111
  /**
108
- * Convert thinking config to Anthropic API format
112
+ * Convert thinking config to Anthropic API format.
113
+ *
114
+ * Supports both legacy (enabled + budget_tokens) and adaptive (Claude 4.6+) modes.
115
+ * Adaptive mode uses output_config.effort instead of budget_tokens.
109
116
  */
110
117
  private convertThinking;
111
118
  /**
@@ -79,13 +79,16 @@ export class ClaudeProvider {
79
79
  temperature: options?.temperature,
80
80
  stop_sequences: options?.stopSequences,
81
81
  };
82
- // Add thinking if enabled (Claude-specific)
82
+ // Add thinking + effort if configured (Claude-specific)
83
83
  // Note: Extended thinking types not yet in SDK, using Object.assign
84
84
  if (thinking) {
85
- Object.assign(params, { thinking });
85
+ Object.assign(params, { thinking: thinking.thinking });
86
+ if (thinking.outputConfig) {
87
+ Object.assign(params, { output_config: thinking.outputConfig });
88
+ }
86
89
  }
87
90
  // Pass abort signal and optional beta header to SDK
88
- const requestOptions = this.buildRequestOptions(options?.signal, tools.length > 0);
91
+ const requestOptions = this.buildRequestOptions(options?.signal, tools.length > 0, options?.model);
89
92
  const stream = this.client.messages.stream(params, requestOptions);
90
93
  const model = options?.model ?? this.defaultModel;
91
94
  let currentToolId = '';
@@ -118,9 +121,16 @@ export class ClaudeProvider {
118
121
  // Access optional cache token fields via type coercion
119
122
  // These fields are present in newer SDK versions but not in the type definitions
120
123
  const usageWithCache = usage;
124
+ // Map Anthropic stop_reason to our stopReason
125
+ // Cast to string — SDK types may not include newer stop reasons (refusal, model_context_window_exceeded)
126
+ const rawStopReason = finalMessage.stop_reason;
127
+ const stopReason = rawStopReason === 'model_context_window_exceeded'
128
+ ? 'context_window_exceeded'
129
+ : (rawStopReason ?? 'end_turn');
121
130
  yield {
122
131
  type: 'done',
123
132
  model,
133
+ stopReason,
124
134
  usage: {
125
135
  inputTokens: usage.input_tokens,
126
136
  outputTokens: usage.output_tokens,
@@ -140,13 +150,22 @@ export class ClaudeProvider {
140
150
  countTokens(messages) {
141
151
  return Promise.resolve(countMessageTokens(messages));
142
152
  }
153
+ /**
154
+ * Check if a model is Claude 4+ (token-efficient tools are built-in, no header needed).
155
+ */
156
+ isClaude4Plus(model) {
157
+ // Claude 4+ model IDs: claude-opus-4-*, claude-sonnet-4-*, claude-haiku-4-*
158
+ return /^claude-(opus|sonnet|haiku)-4/.test(model);
159
+ }
143
160
  /**
144
161
  * Build request options with optional abort signal and beta headers.
145
162
  * Combines multiple beta features (comma-separated per Anthropic API spec).
146
163
  */
147
- buildRequestOptions(signal, hasTools) {
164
+ buildRequestOptions(signal, hasTools, model) {
165
+ const resolvedModel = model ?? this.defaultModel;
148
166
  const betas = [];
149
- if (this.enableTokenEfficientTools && hasTools) {
167
+ // token-efficient-tools is built-in for Claude 4+ — only send for older models
168
+ if (this.enableTokenEfficientTools && hasTools && !this.isClaude4Plus(resolvedModel)) {
150
169
  betas.push('token-efficient-tools-2025-02-19');
151
170
  }
152
171
  if (this.enableExtendedContext) {
@@ -235,19 +254,30 @@ export class ClaudeProvider {
235
254
  }));
236
255
  }
237
256
  /**
238
- * Convert thinking config to Anthropic API format
257
+ * Convert thinking config to Anthropic API format.
258
+ *
259
+ * Supports both legacy (enabled + budget_tokens) and adaptive (Claude 4.6+) modes.
260
+ * Adaptive mode uses output_config.effort instead of budget_tokens.
239
261
  */
240
262
  convertThinking(thinking) {
241
263
  if (!thinking || thinking.type === 'disabled') {
242
264
  return undefined;
243
265
  }
244
- // Validate budget_tokens minimum (1024)
245
- if (thinking.budgetTokens < 1024) {
246
- throw new ProviderError(`Extended thinking budget_tokens must be at least 1024, got ${String(thinking.budgetTokens)}`, 'claude');
266
+ if (thinking.type === 'adaptive') {
267
+ return {
268
+ thinking: { type: 'adaptive' },
269
+ outputConfig: thinking.effort ? { effort: thinking.effort } : undefined,
270
+ };
271
+ }
272
+ // Legacy enabled mode (deprecated on Claude 4.6, still works on older)
273
+ if (!thinking.budgetTokens || thinking.budgetTokens < 1024) {
274
+ throw new ProviderError(`Extended thinking budget_tokens must be at least 1024, got ${String(thinking.budgetTokens ?? 0)}`, 'claude');
247
275
  }
248
276
  return {
249
- type: thinking.type,
250
- budget_tokens: thinking.budgetTokens,
277
+ thinking: {
278
+ type: 'enabled',
279
+ budget_tokens: thinking.budgetTokens,
280
+ },
251
281
  };
252
282
  }
253
283
  /**
@@ -110,6 +110,15 @@ export interface StreamChunk {
110
110
  * Model that generated this response (only present on 'done' chunks)
111
111
  */
112
112
  model?: string;
113
+ /**
114
+ * Stop reason (only present on 'done' chunks).
115
+ * - 'end_turn': Normal completion
116
+ * - 'max_tokens': Hit max_tokens limit
117
+ * - 'refusal': Model refused the request (Claude 4.5+)
118
+ * - 'context_window_exceeded': Hit context window limit (Claude 4.5+)
119
+ * - 'tool_use': Model wants to call a tool
120
+ */
121
+ stopReason?: string;
113
122
  }
114
123
  /**
115
124
  * Extended thinking configuration
@@ -118,13 +127,26 @@ export interface StreamChunk {
118
127
  */
119
128
  export interface ThinkingConfig {
120
129
  /**
121
- * Enable or disable extended thinking
130
+ * Thinking mode:
131
+ * - 'enabled': Manual budget (legacy, deprecated on Claude 4.6)
132
+ * - 'adaptive': Model decides when/how much to think (Claude 4.6+)
133
+ * - 'disabled': No thinking
134
+ */
135
+ type: 'enabled' | 'adaptive' | 'disabled';
136
+ /**
137
+ * Token budget for thinking (minimum 1024, must be less than maxTokens).
138
+ * Only used with type: 'enabled'. Ignored for 'adaptive'.
122
139
  */
123
- type: 'enabled' | 'disabled';
140
+ budgetTokens?: number;
124
141
  /**
125
- * Token budget for thinking (minimum 1024, must be less than maxTokens)
142
+ * Effort level for adaptive thinking (Claude 4.6+).
143
+ * Controls how much the model thinks before responding.
144
+ * - 'low': Minimal thinking, fastest response
145
+ * - 'medium': Balanced
146
+ * - 'high': Deep thinking, most thorough (Sonnet 4.6 default)
147
+ * Only used with type: 'adaptive'. Ignored for 'enabled'.
126
148
  */
127
- budgetTokens: number;
149
+ effort?: 'low' | 'medium' | 'high';
128
150
  }
129
151
  /**
130
152
  * Options for chat requests
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@compilr-dev/agents",
3
- "version": "0.4.1",
3
+ "version": "0.5.0",
4
4
  "description": "Lightweight multi-LLM agent library for building CLI AI assistants",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",