npm - aws-cli-agent - Versions diffs - 0.4.0 → 0.6.0 - Mend

aws-cli-agent 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/agent.js CHANGED Viewed

@@ -1,6 +1,8 @@
 import { streamText, stepCountIs } from 'ai';
+import { getActiveModel } from './config.js';
 import { createModel } from './providers.js';
 import { createTools } from './tools/index.js';
+import { FatalAwsCliError, UserCancelledError } from './errors.js';
 const SYSTEM_PROMPT = `You are aws-cli-agent (aca), an agentic assistant that translates natural-language requests into AWS CLI commands and executes them locally on the user's machine.
 Capabilities (via tools):
@@ -37,7 +39,8 @@ Operating rules:
 8. Interactive commands: some AWS CLI commands require a real terminal — SSM Session Manager shells (\`ssm start-session\`), port-forwarding sessions (the same command with --document-name AWS-StartPortForwardingSession*), ECS Exec (\`ecs execute-command\`), log tails with --follow. For these, set \`interactive: true\` on the execute_aws_command call. The host will connect the user's terminal directly to the command and you will receive no stdout — DO NOT try to summarize or describe the output afterwards, since you can't see it. Common patterns auto-detect, but setting the flag explicitly is safer.
 9. The final action of a successful run MUST be either execute_aws_command (the user-requested action) or execute_bash_script. If the user cancels via prompt_user, stop gracefully and explain in one sentence.
 10. NEVER include credentials, API keys, secrets, or session tokens in commands or scripts. AWS credentials come from the user's existing profile.
-11. Keep your reasoning concise — one or two sentences per step. DO NOT summarize, restate, reformat, or describe the output of the AWS CLI. The CLI's stdout is shown to the user directly by the host program. Your only post-execution job is to stop. If anything went wrong, say so briefly; if it succeeded, you may stop without further commentary.`;
+11. Handling AWS CLI errors: if execute_aws_command returns a result with \`ok: false\` (and a non-zero exitCode), you may retry ONCE with a different approach if it's clearly worth trying — wrong region, wrong profile, missing flag, fixable typo. Don't loop trying minor variations. The host caps total run length via maxSteps; respect it. Note: unrecoverable errors (auth failure, missing credentials, permission denied, malformed request, AWS service errors) terminate the run before you'd see them, so you don't need to handle those cases — they're handled for you.
+12. Keep your reasoning concise — one or two sentences per step. DO NOT summarize, restate, reformat, or describe the output of the AWS CLI. The CLI's stdout is shown to the user directly by the host program. Your only post-execution job is to stop. If anything went wrong, say so briefly; if it succeeded, you may stop without further commentary.`;
 export async function runAgent(opts) {
     const { input, config, logger, history, audit, reasoning, usage } = opts;
     const executions = [];
@@ -58,8 +61,9 @@ export async function runAgent(opts) {
     // the tools array is sent at full cost on every request.
     const useCaching = config.caching && (config.provider === 'anthropic' || config.provider === 'bedrock');
     const tools = createTools({ logger, config, history, audit, record });
-    const model = createModel(config);
-    logger.info(`Starting agent (provider=${config.provider}, model=${config.model})`);
+    const model = createModel(config, logger);
+    const activeModel = getActiveModel(config);
+    logger.info(`Starting agent (provider=${config.provider}, model=${activeModel})`);
     logger.debug('User input', input);
     reasoning.beginRun(input);
     // Inline a small recent-history hint so the model has soft context even
@@ -144,61 +148,159 @@ export async function runAgent(opts) {
     // Two execution sites collaborate to print one step:
     //   1. text-end (here) → reasoning text line
     //   2. onToolCallStart (callback above) → tool: line, then execute()
-    for await (const part of result.fullStream) {
-        switch (part.type) {
-            case 'start-step': {
-                stepCounter += 1;
-                toolCallStepNumber = stepCounter; // visible to onToolCallStart
-                currentReasoning = '';
-                currentToolCalls = [];
-                reasoningEchoed = false;
-                break;
-            }
-            case 'text-delta': {
-                currentReasoning += part.text;
-                break;
-            }
-            case 'text-end': {
-                if (!reasoningEchoed) {
-                    reasoning.echoReasoning(stepCounter, currentReasoning);
-                    reasoningEchoed = true;
+    // Terminal state for the run. The for-await loop transitions us out of
+    // 'completed' (the default) into 'cancelled' on Ctrl-C, or 'fatal' on
+    // an unrecoverable AWS CLI failure. cli.ts uses endReason to pick the
+    // exit code and the user-facing message.
+    let endReason = 'completed';
+    try {
+        for await (const part of result.fullStream) {
+            switch (part.type) {
+                case 'start-step': {
+                    stepCounter += 1;
+                    toolCallStepNumber = stepCounter; // visible to onToolCallStart
+                    currentReasoning = '';
+                    currentToolCalls = [];
+                    reasoningEchoed = false;
+                    break;
                 }
-                break;
-            }
-            case 'tool-call': {
-                // Backup echo path: if text-end didn't fire (provider variant or
-                // text-less step), echo whatever reasoning we have when we see
-                // tool-call. The tool-call LINE itself is NOT printed here — it's
-                // printed by experimental_onToolCallStart, which fires
-                // synchronously before execute() and guarantees ordering above
-                // any approval prompt.
-                if (!reasoningEchoed) {
-                    reasoning.echoReasoning(stepCounter, currentReasoning);
-                    reasoningEchoed = true;
+                case 'text-delta': {
+                    currentReasoning += part.text;
+                    break;
+                }
+                case 'text-end': {
+                    if (!reasoningEchoed) {
+                        reasoning.echoReasoning(stepCounter, currentReasoning);
+                        reasoningEchoed = true;
+                    }
+                    break;
                 }
-                break;
+                case 'tool-call': {
+                    // Backup echo path: if text-end didn't fire (provider variant or
+                    // text-less step), echo whatever reasoning we have when we see
+                    // tool-call. The tool-call LINE itself is NOT printed here — it's
+                    // printed by experimental_onToolCallStart, which fires
+                    // synchronously before execute() and guarantees ordering above
+                    // any approval prompt.
+                    if (!reasoningEchoed) {
+                        reasoning.echoReasoning(stepCounter, currentReasoning);
+                        reasoningEchoed = true;
+                    }
+                    break;
+                }
+                case 'tool-error': {
+                    // The SDK catches errors thrown from tool.execute() and emits
+                    // them as tool-error events instead of rejecting the stream. So
+                    // we inspect every tool-error for our sentinels:
+                    //
+                    //   - UserCancelledError → throw out of the loop so the outer
+                    //     catch propagates it to cli.ts for "cancelled by user" + exit 130.
+                    //   - FatalAwsCliError → set endReason='fatal' and stop iterating.
+                    //     The failed call has already been recorded in executions[]
+                    //     by the tool (audit + record fire before the throw), so
+                    //     finalError further down will pick up the stderr naturally.
+                    //   - Anything else: ignore. Soft failures shouldn't be thrown
+                    //     (tools return them as results), and any other thrown error
+                    //     is treated as a tool-level failure the model can decide
+                    //     how to handle.
+                    if (part.error instanceof UserCancelledError) {
+                        throw part.error;
+                    }
+                    if (part.error instanceof FatalAwsCliError) {
+                        endReason = 'fatal';
+                        logger.warn(`Run ended on fatal AWS CLI error (exit ${part.error.exitCode}).`);
+                        // Flush this step's reasoning to the file log; the tool-call
+                        // event for this step already fired, so currentToolCalls is
+                        // populated. We need to break out cleanly without waiting
+                        // for finish-step (the SDK may still emit it, may not).
+                        reasoning.logStepToFile({
+                            step: stepCounter,
+                            reasoning: currentReasoning,
+                            toolCalls: currentToolCalls,
+                            finishReason: 'fatal-error',
+                        });
+                        // Stop processing the stream. We don't break out of the
+                        // for-await directly because we want to drain remaining events
+                        // for the SDK's internal cleanup; but we set a flag so we
+                        // don't process them.
+                        // Simplest: just let the loop continue. finish-step / finish
+                        // events will pass through harmlessly.
+                    }
+                    break;
+                }
+                case 'finish-step': {
+                    // After a fatal tool-error, finish-step still arrives for the
+                    // same step. The reasoning was already flushed in the tool-error
+                    // handler — don't double-flush. For normal steps, this is the
+                    // path that flushes.
+                    if (endReason !== 'fatal') {
+                        reasoning.logStepToFile({
+                            step: stepCounter,
+                            reasoning: currentReasoning,
+                            toolCalls: currentToolCalls,
+                            finishReason: part.finishReason,
+                        });
+                    }
+                    logger.debug(`Step ${stepCounter} finished (finishReason=${part.finishReason})`);
+                    break;
+                }
+                // Other event types (reasoning-delta for thinking-models,
+                // tool-input-delta, source, file, raw, etc.) are ignored —
+                // fullStream is forward-compatible.
             }
-            case 'finish-step': {
+        }
+    }
+    catch (err) {
+        // The for-await loop throws when we re-throw UserCancelledError above.
+        // It can also throw on genuine SDK / provider failures. We distinguish:
+        if (err instanceof UserCancelledError) {
+            // No endReason='cancelled' assignment here: we throw immediately
+            // and the post-stream code in this function never runs. cli.ts is
+            // the one that recognizes UserCancelledError and exits 130 — it
+            // doesn't need RunResult.endReason for that.
+            if (currentReasoning.trim().length > 0 || currentToolCalls.length > 0) {
                 reasoning.logStepToFile({
                     step: stepCounter,
                     reasoning: currentReasoning,
                     toolCalls: currentToolCalls,
-                    finishReason: part.finishReason,
+                    finishReason: 'cancelled',
                 });
-                logger.debug(`Step ${stepCounter} finished (finishReason=${part.finishReason})`);
-                break;
             }
-            // Other event types (reasoning-delta for thinking-models,
-            // tool-input-delta, source, file, raw, etc.) are ignored —
-            // fullStream is forward-compatible.
+            logger.info('Run cancelled by user.');
+            throw err;
         }
+        // Genuine bug or provider failure. Let it bubble.
+        throw err;
+    }
+    // After the stream completes (normally OR via FatalAwsCliError), pull
+    // the post-stream promises. Most runs reach here with all three already
+    // resolved (the stream completion is the signal). But when we caught a
+    // FatalAwsCliError mid-stream, the SDK may have left these in a rejected
+    // state — the stream didn't naturally complete. Defensive try/await
+    // around each so we degrade gracefully: a partial RunResult with
+    // whatever usage we got from steps that did complete is better than
+    // crashing on a downstream `await` and losing the failure context.
+    let finalText = '';
+    let finalSteps = [];
+    let totalUsage;
+    try {
+        finalText = await result.text;
+    }
+    catch (err) {
+        logger.debug('result.text rejected (expected after fatal/cancel)', err);
+    }
+    try {
+        finalSteps = await result.steps;
+    }
+    catch (err) {
+        logger.debug('result.steps rejected (expected after fatal/cancel)', err);
+    }
+    try {
+        totalUsage = await result.totalUsage;
+    }
+    catch (err) {
+        logger.debug('result.totalUsage rejected (expected after fatal/cancel)', err);
     }
-    // Wait for all the post-stream promises to resolve. They're already
-    // ready by the time fullStream finishes (the stream completion is the
-    // signal), so these awaits are effectively synchronous.
-    const finalText = await result.text;
-    const finalSteps = await result.steps;
-    const totalUsage = await result.totalUsage;
     logger.info(`Agent finished after ${finalSteps.length} step(s)`);
     logger.debug('Final text', finalText);
     // Token usage for this invocation.
@@ -228,7 +330,7 @@ export async function runAgent(opts) {
     usage.log({
         input,
         provider: config.provider,
-        model: config.model,
+        model: activeModel,
         steps: finalSteps.length,
         promptTokens: totalUsage?.inputTokens ?? 0,
         completionTokens: totalUsage?.outputTokens ?? 0,
@@ -278,6 +380,7 @@ export async function runAgent(opts) {
         finalOutput,
         finalError,
         ranCommand,
+        endReason,
     };
 }
 /**

package/dist/cli.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { Command } from 'commander';
 import chalk from 'chalk';
-import { loadConfig, writeDefaultConfig } from './config.js';
+import { loadConfig, validateActiveProvider, writeDefaultConfig } from './config.js';
 import { Logger } from './logger.js';
 import { AuditLogger } from './audit.js';
 import { ReasoningLogger } from './reasoning.js';
@@ -8,7 +8,8 @@ import { UsageLogger } from './usage.js';
 import { History } from './history.js';
 import { runAgent } from './agent.js';
 import { FILES, PATHS, DEFAULT_SCRIPT_FOLDER } from './paths.js';
-const VERSION = '0.4.0';
+import { UserCancelledError } from './errors.js';
+const VERSION = '0.6.0';
 /**
  * Apply CLI flags on top of the loaded config. Flags only override; they
  * never widen or compose with each other implicitly.
@@ -102,6 +103,19 @@ export async function main(argv) {
             return;
         }
         const cfg = applyCliOverrides(loadConfig(), globalOpts);
+        // Strict validation: the active provider must have a config block with
+        // a `model` set. Deferred from loadConfig so subcommands like `paths`
+        // and `history` work even without a config file. The run command is
+        // the one that actually needs a complete provider, so we check here.
+        try {
+            validateActiveProvider(cfg);
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            process.stderr.write(chalk.red('Config error: ') + msg + '\n');
+            process.exitCode = 1;
+            return;
+        }
         const logger = new Logger(cfg.logging.level);
         const audit = new AuditLogger(cfg.logging.auditLog);
         const reasoning = new ReasoningLogger({
@@ -154,7 +168,13 @@ export async function main(argv) {
             // Footer counts only commands that actually executed. Declined or
             // cancelled commands appear in `result.commands` for the history
             // log but don't count as "ran" since no subprocess was started.
-            if (result.executedCommandCount > 0) {
+            //
+            // Gated on `cfg.verbose`: the footer is supplementary information
+            // ("here's what happened during the run") that's useful while you're
+            // watching the agent work, but noisy for scripted/pipeline use. With
+            // verbose off, nothing aca generates reaches the terminal — only the
+            // AWS CLI's verbatim output does.
+            if (cfg.verbose && result.executedCommandCount > 0) {
                 const tag = result.profile ? `[${result.profile}]` : '';
                 const cmds = result.executedCommandCount === 1
                     ? '1 command'
@@ -163,10 +183,19 @@ export async function main(argv) {
             }
         }
         catch (err) {
-            const msg = err instanceof Error ? err.message : String(err);
-            logger.error('Agent failed', msg);
-            process.stderr.write(chalk.red('Error: ') + msg + '\n');
-            process.exitCode = 1;
+            // User cancelled (Ctrl-C at a prompt). Print a calm message,
+            // exit 130 (SIGINT convention), no red error, no "ran N" footer,
+            // no stack trace.
+            if (err instanceof UserCancelledError) {
+                process.stderr.write(chalk.dim('cancelled by user\n'));
+                process.exitCode = 130;
+            }
+            else {
+                const msg = err instanceof Error ? err.message : String(err);
+                logger.error('Agent failed', msg);
+                process.stderr.write(chalk.red('Error: ') + msg + '\n');
+                process.exitCode = 1;
+            }
         }
         finally {
             logger.close();

package/dist/config.d.ts CHANGED Viewed

@@ -1,14 +1,28 @@
 import { z } from 'zod';
 export declare const ConfigSchema: z.ZodObject<{
     provider: z.ZodDefault<z.ZodEnum<{
-        bedrock: "bedrock";
         anthropic: "anthropic";
         openai: "openai";
         google: "google";
+        bedrock: "bedrock";
     }>>;
-    model: z.ZodDefault<z.ZodString>;
-    apiKeyEnv: z.ZodOptional<z.ZodString>;
+    anthropic: z.ZodOptional<z.ZodObject<{
+        model: z.ZodOptional<z.ZodString>;
+        apiKey: z.ZodOptional<z.ZodString>;
+        apiKeyEnv: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    openai: z.ZodOptional<z.ZodObject<{
+        model: z.ZodOptional<z.ZodString>;
+        apiKey: z.ZodOptional<z.ZodString>;
+        apiKeyEnv: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    google: z.ZodOptional<z.ZodObject<{
+        model: z.ZodOptional<z.ZodString>;
+        apiKey: z.ZodOptional<z.ZodString>;
+        apiKeyEnv: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
     bedrock: z.ZodOptional<z.ZodObject<{
+        model: z.ZodOptional<z.ZodString>;
         region: z.ZodOptional<z.ZodString>;
         profile: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;
@@ -38,6 +52,33 @@ export declare const ConfigSchema: z.ZodObject<{
     scriptFolder: z.ZodOptional<z.ZodString>;
 }, z.core.$strip>;
 export type Config = z.infer<typeof ConfigSchema>;
+/**
+ * Resolve the active provider's model. The schema marks `model` optional
+ * per-block so that we can produce a single coherent error message in
+ * `validateActiveProvider` rather than zod's multi-issue tree. Call this
+ * only after validateActiveProvider has passed.
+ */
+export declare function getActiveModel(config: Config): string;
+/**
+ * Strict post-parse validation for the active provider's block. The active
+ * provider's block must exist and must contain a `model`. Pre-1.0 we treat
+ * this as a hard error rather than scaffolding defaults, so the user always
+ * knows exactly what's being called and at what cost.
+ *
+ * Call this from code paths that actually run the agent — the `run` command.
+ * Subcommands that don't need a provider (`paths`, `config`, `history`)
+ * skip this check, so a user with no config file can still use them.
+ */
+export declare function validateActiveProvider(config: Config): void;
 export declare function loadConfig(): Config;
-/** Write a default config file if none exists. Returns the path either way. */
+/**
+ * Write a default config file if none exists. Scaffolds only the active
+ * provider's block (just `model`), deliberately not creating slots for
+ * other providers (less to read) and not scaffolding `apiKey` (less
+ * temptation to put secrets on disk).
+ *
+ * Sets mode 0600 on the file. This doesn't protect against a user editing
+ * with `cp` or moving the file later, but ensures that the file as we
+ * create it isn't world-readable.
+ */
 export declare function writeDefaultConfig(): string;