aws-cli-agent 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.js CHANGED
@@ -1,6 +1,8 @@
1
1
  import { streamText, stepCountIs } from 'ai';
2
+ import { getActiveModel } from './config.js';
2
3
  import { createModel } from './providers.js';
3
4
  import { createTools } from './tools/index.js';
5
+ import { FatalAwsCliError, UserCancelledError } from './errors.js';
4
6
  const SYSTEM_PROMPT = `You are aws-cli-agent (aca), an agentic assistant that translates natural-language requests into AWS CLI commands and executes them locally on the user's machine.
5
7
 
6
8
  Capabilities (via tools):
@@ -37,7 +39,8 @@ Operating rules:
37
39
  8. Interactive commands: some AWS CLI commands require a real terminal — SSM Session Manager shells (\`ssm start-session\`), port-forwarding sessions (the same command with --document-name AWS-StartPortForwardingSession*), ECS Exec (\`ecs execute-command\`), log tails with --follow. For these, set \`interactive: true\` on the execute_aws_command call. The host will connect the user's terminal directly to the command and you will receive no stdout — DO NOT try to summarize or describe the output afterwards, since you can't see it. Common patterns auto-detect, but setting the flag explicitly is safer.
38
40
  9. The final action of a successful run MUST be either execute_aws_command (the user-requested action) or execute_bash_script. If the user cancels via prompt_user, stop gracefully and explain in one sentence.
39
41
  10. NEVER include credentials, API keys, secrets, or session tokens in commands or scripts. AWS credentials come from the user's existing profile.
40
- 11. Keep your reasoning concise one or two sentences per step. DO NOT summarize, restate, reformat, or describe the output of the AWS CLI. The CLI's stdout is shown to the user directly by the host program. Your only post-execution job is to stop. If anything went wrong, say so briefly; if it succeeded, you may stop without further commentary.`;
42
+ 11. Handling AWS CLI errors: if execute_aws_command returns a result with \`ok: false\` (and a non-zero exitCode), you may retry ONCE with a different approach if it's clearly worth trying wrong region, wrong profile, missing flag, fixable typo. Don't loop trying minor variations. The host caps total run length via maxSteps; respect it. Note: unrecoverable errors (auth failure, missing credentials, permission denied, malformed request, AWS service errors) terminate the run before you'd see them, so you don't need to handle those cases — they're handled for you.
43
+ 12. Keep your reasoning concise — one or two sentences per step. DO NOT summarize, restate, reformat, or describe the output of the AWS CLI. The CLI's stdout is shown to the user directly by the host program. Your only post-execution job is to stop. If anything went wrong, say so briefly; if it succeeded, you may stop without further commentary.`;
41
44
  export async function runAgent(opts) {
42
45
  const { input, config, logger, history, audit, reasoning, usage } = opts;
43
46
  const executions = [];
@@ -58,8 +61,9 @@ export async function runAgent(opts) {
58
61
  // the tools array is sent at full cost on every request.
59
62
  const useCaching = config.caching && (config.provider === 'anthropic' || config.provider === 'bedrock');
60
63
  const tools = createTools({ logger, config, history, audit, record });
61
- const model = createModel(config);
62
- logger.info(`Starting agent (provider=${config.provider}, model=${config.model})`);
64
+ const model = createModel(config, logger);
65
+ const activeModel = getActiveModel(config);
66
+ logger.info(`Starting agent (provider=${config.provider}, model=${activeModel})`);
63
67
  logger.debug('User input', input);
64
68
  reasoning.beginRun(input);
65
69
  // Inline a small recent-history hint so the model has soft context even
@@ -144,61 +148,159 @@ export async function runAgent(opts) {
144
148
  // Two execution sites collaborate to print one step:
145
149
  // 1. text-end (here) → reasoning text line
146
150
  // 2. onToolCallStart (callback above) → tool: line, then execute()
147
- for await (const part of result.fullStream) {
148
- switch (part.type) {
149
- case 'start-step': {
150
- stepCounter += 1;
151
- toolCallStepNumber = stepCounter; // visible to onToolCallStart
152
- currentReasoning = '';
153
- currentToolCalls = [];
154
- reasoningEchoed = false;
155
- break;
156
- }
157
- case 'text-delta': {
158
- currentReasoning += part.text;
159
- break;
160
- }
161
- case 'text-end': {
162
- if (!reasoningEchoed) {
163
- reasoning.echoReasoning(stepCounter, currentReasoning);
164
- reasoningEchoed = true;
151
+ // Terminal state for the run. The for-await loop transitions us out of
152
+ // 'completed' (the default) into 'cancelled' on Ctrl-C, or 'fatal' on
153
+ // an unrecoverable AWS CLI failure. cli.ts uses endReason to pick the
154
+ // exit code and the user-facing message.
155
+ let endReason = 'completed';
156
+ try {
157
+ for await (const part of result.fullStream) {
158
+ switch (part.type) {
159
+ case 'start-step': {
160
+ stepCounter += 1;
161
+ toolCallStepNumber = stepCounter; // visible to onToolCallStart
162
+ currentReasoning = '';
163
+ currentToolCalls = [];
164
+ reasoningEchoed = false;
165
+ break;
165
166
  }
166
- break;
167
- }
168
- case 'tool-call': {
169
- // Backup echo path: if text-end didn't fire (provider variant or
170
- // text-less step), echo whatever reasoning we have when we see
171
- // tool-call. The tool-call LINE itself is NOT printed here — it's
172
- // printed by experimental_onToolCallStart, which fires
173
- // synchronously before execute() and guarantees ordering above
174
- // any approval prompt.
175
- if (!reasoningEchoed) {
176
- reasoning.echoReasoning(stepCounter, currentReasoning);
177
- reasoningEchoed = true;
167
+ case 'text-delta': {
168
+ currentReasoning += part.text;
169
+ break;
170
+ }
171
+ case 'text-end': {
172
+ if (!reasoningEchoed) {
173
+ reasoning.echoReasoning(stepCounter, currentReasoning);
174
+ reasoningEchoed = true;
175
+ }
176
+ break;
178
177
  }
179
- break;
178
+ case 'tool-call': {
179
+ // Backup echo path: if text-end didn't fire (provider variant or
180
+ // text-less step), echo whatever reasoning we have when we see
181
+ // tool-call. The tool-call LINE itself is NOT printed here — it's
182
+ // printed by experimental_onToolCallStart, which fires
183
+ // synchronously before execute() and guarantees ordering above
184
+ // any approval prompt.
185
+ if (!reasoningEchoed) {
186
+ reasoning.echoReasoning(stepCounter, currentReasoning);
187
+ reasoningEchoed = true;
188
+ }
189
+ break;
190
+ }
191
+ case 'tool-error': {
192
+ // The SDK catches errors thrown from tool.execute() and emits
193
+ // them as tool-error events instead of rejecting the stream. So
194
+ // we inspect every tool-error for our sentinels:
195
+ //
196
+ // - UserCancelledError → throw out of the loop so the outer
197
+ // catch propagates it to cli.ts for "cancelled by user" + exit 130.
198
+ // - FatalAwsCliError → set endReason='fatal' and stop iterating.
199
+ // The failed call has already been recorded in executions[]
200
+ // by the tool (audit + record fire before the throw), so
201
+ // finalError further down will pick up the stderr naturally.
202
+ // - Anything else: ignore. Soft failures shouldn't be thrown
203
+ // (tools return them as results), and any other thrown error
204
+ // is treated as a tool-level failure the model can decide
205
+ // how to handle.
206
+ if (part.error instanceof UserCancelledError) {
207
+ throw part.error;
208
+ }
209
+ if (part.error instanceof FatalAwsCliError) {
210
+ endReason = 'fatal';
211
+ logger.warn(`Run ended on fatal AWS CLI error (exit ${part.error.exitCode}).`);
212
+ // Flush this step's reasoning to the file log; the tool-call
213
+ // event for this step already fired, so currentToolCalls is
214
+ // populated. We need to break out cleanly without waiting
215
+ // for finish-step (the SDK may still emit it, may not).
216
+ reasoning.logStepToFile({
217
+ step: stepCounter,
218
+ reasoning: currentReasoning,
219
+ toolCalls: currentToolCalls,
220
+ finishReason: 'fatal-error',
221
+ });
222
+ // Stop processing the stream. We don't break out of the
223
+ // for-await directly because we want to drain remaining events
224
+ // for the SDK's internal cleanup; but we set a flag so we
225
+ // don't process them.
226
+ // Simplest: just let the loop continue. finish-step / finish
227
+ // events will pass through harmlessly.
228
+ }
229
+ break;
230
+ }
231
+ case 'finish-step': {
232
+ // After a fatal tool-error, finish-step still arrives for the
233
+ // same step. The reasoning was already flushed in the tool-error
234
+ // handler — don't double-flush. For normal steps, this is the
235
+ // path that flushes.
236
+ if (endReason !== 'fatal') {
237
+ reasoning.logStepToFile({
238
+ step: stepCounter,
239
+ reasoning: currentReasoning,
240
+ toolCalls: currentToolCalls,
241
+ finishReason: part.finishReason,
242
+ });
243
+ }
244
+ logger.debug(`Step ${stepCounter} finished (finishReason=${part.finishReason})`);
245
+ break;
246
+ }
247
+ // Other event types (reasoning-delta for thinking-models,
248
+ // tool-input-delta, source, file, raw, etc.) are ignored —
249
+ // fullStream is forward-compatible.
180
250
  }
181
- case 'finish-step': {
251
+ }
252
+ }
253
+ catch (err) {
254
+ // The for-await loop throws when we re-throw UserCancelledError above.
255
+ // It can also throw on genuine SDK / provider failures. We distinguish:
256
+ if (err instanceof UserCancelledError) {
257
+ // No endReason='cancelled' assignment here: we throw immediately
258
+ // and the post-stream code in this function never runs. cli.ts is
259
+ // the one that recognizes UserCancelledError and exits 130 — it
260
+ // doesn't need RunResult.endReason for that.
261
+ if (currentReasoning.trim().length > 0 || currentToolCalls.length > 0) {
182
262
  reasoning.logStepToFile({
183
263
  step: stepCounter,
184
264
  reasoning: currentReasoning,
185
265
  toolCalls: currentToolCalls,
186
- finishReason: part.finishReason,
266
+ finishReason: 'cancelled',
187
267
  });
188
- logger.debug(`Step ${stepCounter} finished (finishReason=${part.finishReason})`);
189
- break;
190
268
  }
191
- // Other event types (reasoning-delta for thinking-models,
192
- // tool-input-delta, source, file, raw, etc.) are ignored —
193
- // fullStream is forward-compatible.
269
+ logger.info('Run cancelled by user.');
270
+ throw err;
194
271
  }
272
+ // Genuine bug or provider failure. Let it bubble.
273
+ throw err;
274
+ }
275
+ // After the stream completes (normally OR via FatalAwsCliError), pull
276
+ // the post-stream promises. Most runs reach here with all three already
277
+ // resolved (the stream completion is the signal). But when we caught a
278
+ // FatalAwsCliError mid-stream, the SDK may have left these in a rejected
279
+ // state — the stream didn't naturally complete. Defensive try/await
280
+ // around each so we degrade gracefully: a partial RunResult with
281
+ // whatever usage we got from steps that did complete is better than
282
+ // crashing on a downstream `await` and losing the failure context.
283
+ let finalText = '';
284
+ let finalSteps = [];
285
+ let totalUsage;
286
+ try {
287
+ finalText = await result.text;
288
+ }
289
+ catch (err) {
290
+ logger.debug('result.text rejected (expected after fatal/cancel)', err);
291
+ }
292
+ try {
293
+ finalSteps = await result.steps;
294
+ }
295
+ catch (err) {
296
+ logger.debug('result.steps rejected (expected after fatal/cancel)', err);
297
+ }
298
+ try {
299
+ totalUsage = await result.totalUsage;
300
+ }
301
+ catch (err) {
302
+ logger.debug('result.totalUsage rejected (expected after fatal/cancel)', err);
195
303
  }
196
- // Wait for all the post-stream promises to resolve. They're already
197
- // ready by the time fullStream finishes (the stream completion is the
198
- // signal), so these awaits are effectively synchronous.
199
- const finalText = await result.text;
200
- const finalSteps = await result.steps;
201
- const totalUsage = await result.totalUsage;
202
304
  logger.info(`Agent finished after ${finalSteps.length} step(s)`);
203
305
  logger.debug('Final text', finalText);
204
306
  // Token usage for this invocation.
@@ -228,7 +330,7 @@ export async function runAgent(opts) {
228
330
  usage.log({
229
331
  input,
230
332
  provider: config.provider,
231
- model: config.model,
333
+ model: activeModel,
232
334
  steps: finalSteps.length,
233
335
  promptTokens: totalUsage?.inputTokens ?? 0,
234
336
  completionTokens: totalUsage?.outputTokens ?? 0,
@@ -278,6 +380,7 @@ export async function runAgent(opts) {
278
380
  finalOutput,
279
381
  finalError,
280
382
  ranCommand,
383
+ endReason,
281
384
  };
282
385
  }
283
386
  /**
package/dist/cli.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { Command } from 'commander';
2
2
  import chalk from 'chalk';
3
- import { loadConfig, writeDefaultConfig } from './config.js';
3
+ import { loadConfig, validateActiveProvider, writeDefaultConfig } from './config.js';
4
4
  import { Logger } from './logger.js';
5
5
  import { AuditLogger } from './audit.js';
6
6
  import { ReasoningLogger } from './reasoning.js';
@@ -8,7 +8,8 @@ import { UsageLogger } from './usage.js';
8
8
  import { History } from './history.js';
9
9
  import { runAgent } from './agent.js';
10
10
  import { FILES, PATHS, DEFAULT_SCRIPT_FOLDER } from './paths.js';
11
- const VERSION = '0.4.0';
11
+ import { UserCancelledError } from './errors.js';
12
+ const VERSION = '0.6.0';
12
13
  /**
13
14
  * Apply CLI flags on top of the loaded config. Flags only override; they
14
15
  * never widen or compose with each other implicitly.
@@ -102,6 +103,19 @@ export async function main(argv) {
102
103
  return;
103
104
  }
104
105
  const cfg = applyCliOverrides(loadConfig(), globalOpts);
106
+ // Strict validation: the active provider must have a config block with
107
+ // a `model` set. Deferred from loadConfig so subcommands like `paths`
108
+ // and `history` work even without a config file. The run command is
109
+ // the one that actually needs a complete provider, so we check here.
110
+ try {
111
+ validateActiveProvider(cfg);
112
+ }
113
+ catch (err) {
114
+ const msg = err instanceof Error ? err.message : String(err);
115
+ process.stderr.write(chalk.red('Config error: ') + msg + '\n');
116
+ process.exitCode = 1;
117
+ return;
118
+ }
105
119
  const logger = new Logger(cfg.logging.level);
106
120
  const audit = new AuditLogger(cfg.logging.auditLog);
107
121
  const reasoning = new ReasoningLogger({
@@ -154,7 +168,13 @@ export async function main(argv) {
154
168
  // Footer counts only commands that actually executed. Declined or
155
169
  // cancelled commands appear in `result.commands` for the history
156
170
  // log but don't count as "ran" since no subprocess was started.
157
- if (result.executedCommandCount > 0) {
171
+ //
172
+ // Gated on `cfg.verbose`: the footer is supplementary information
173
+ // ("here's what happened during the run") that's useful while you're
174
+ // watching the agent work, but noisy for scripted/pipeline use. With
175
+ // verbose off, nothing aca generates reaches the terminal — only the
176
+ // AWS CLI's verbatim output does.
177
+ if (cfg.verbose && result.executedCommandCount > 0) {
158
178
  const tag = result.profile ? `[${result.profile}]` : '';
159
179
  const cmds = result.executedCommandCount === 1
160
180
  ? '1 command'
@@ -163,10 +183,19 @@ export async function main(argv) {
163
183
  }
164
184
  }
165
185
  catch (err) {
166
- const msg = err instanceof Error ? err.message : String(err);
167
- logger.error('Agent failed', msg);
168
- process.stderr.write(chalk.red('Error: ') + msg + '\n');
169
- process.exitCode = 1;
186
+ // User cancelled (Ctrl-C at a prompt). Print a calm message,
187
+ // exit 130 (SIGINT convention), no red error, no "ran N" footer,
188
+ // no stack trace.
189
+ if (err instanceof UserCancelledError) {
190
+ process.stderr.write(chalk.dim('cancelled by user\n'));
191
+ process.exitCode = 130;
192
+ }
193
+ else {
194
+ const msg = err instanceof Error ? err.message : String(err);
195
+ logger.error('Agent failed', msg);
196
+ process.stderr.write(chalk.red('Error: ') + msg + '\n');
197
+ process.exitCode = 1;
198
+ }
170
199
  }
171
200
  finally {
172
201
  logger.close();
package/dist/config.d.ts CHANGED
@@ -1,14 +1,28 @@
1
1
  import { z } from 'zod';
2
2
  export declare const ConfigSchema: z.ZodObject<{
3
3
  provider: z.ZodDefault<z.ZodEnum<{
4
- bedrock: "bedrock";
5
4
  anthropic: "anthropic";
6
5
  openai: "openai";
7
6
  google: "google";
7
+ bedrock: "bedrock";
8
8
  }>>;
9
- model: z.ZodDefault<z.ZodString>;
10
- apiKeyEnv: z.ZodOptional<z.ZodString>;
9
+ anthropic: z.ZodOptional<z.ZodObject<{
10
+ model: z.ZodOptional<z.ZodString>;
11
+ apiKey: z.ZodOptional<z.ZodString>;
12
+ apiKeyEnv: z.ZodOptional<z.ZodString>;
13
+ }, z.core.$strip>>;
14
+ openai: z.ZodOptional<z.ZodObject<{
15
+ model: z.ZodOptional<z.ZodString>;
16
+ apiKey: z.ZodOptional<z.ZodString>;
17
+ apiKeyEnv: z.ZodOptional<z.ZodString>;
18
+ }, z.core.$strip>>;
19
+ google: z.ZodOptional<z.ZodObject<{
20
+ model: z.ZodOptional<z.ZodString>;
21
+ apiKey: z.ZodOptional<z.ZodString>;
22
+ apiKeyEnv: z.ZodOptional<z.ZodString>;
23
+ }, z.core.$strip>>;
11
24
  bedrock: z.ZodOptional<z.ZodObject<{
25
+ model: z.ZodOptional<z.ZodString>;
12
26
  region: z.ZodOptional<z.ZodString>;
13
27
  profile: z.ZodOptional<z.ZodString>;
14
28
  }, z.core.$strip>>;
@@ -38,6 +52,33 @@ export declare const ConfigSchema: z.ZodObject<{
38
52
  scriptFolder: z.ZodOptional<z.ZodString>;
39
53
  }, z.core.$strip>;
40
54
  export type Config = z.infer<typeof ConfigSchema>;
55
+ /**
56
+ * Resolve the active provider's model. The schema marks `model` optional
57
+ * per-block so that we can produce a single coherent error message in
58
+ * `validateActiveProvider` rather than zod's multi-issue tree. Call this
59
+ * only after validateActiveProvider has passed.
60
+ */
61
+ export declare function getActiveModel(config: Config): string;
62
+ /**
63
+ * Strict post-parse validation for the active provider's block. The active
64
+ * provider's block must exist and must contain a `model`. Pre-1.0 we treat
65
+ * this as a hard error rather than scaffolding defaults, so the user always
66
+ * knows exactly what's being called and at what cost.
67
+ *
68
+ * Call this from code paths that actually run the agent — the `run` command.
69
+ * Subcommands that don't need a provider (`paths`, `config`, `history`)
70
+ * skip this check, so a user with no config file can still use them.
71
+ */
72
+ export declare function validateActiveProvider(config: Config): void;
41
73
  export declare function loadConfig(): Config;
42
- /** Write a default config file if none exists. Returns the path either way. */
74
+ /**
75
+ * Write a default config file if none exists. Scaffolds only the active
76
+ * provider's block (just `model`), deliberately not creating slots for
77
+ * other providers (less to read) and not scaffolding `apiKey` (less
78
+ * temptation to put secrets on disk).
79
+ *
80
+ * Sets mode 0600 on the file. This doesn't protect against a user editing
81
+ * with `cp` or moving the file later, but ensures that the file as we
82
+ * create it isn't world-readable.
83
+ */
43
84
  export declare function writeDefaultConfig(): string;