aws-cli-agent 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/LICENSE +21 -0
- package/README.md +373 -0
- package/dist/agent.d.ts +40 -0
- package/dist/agent.js +293 -0
- package/dist/audit.d.ts +39 -0
- package/dist/audit.js +33 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +179 -0
- package/dist/config.d.ts +43 -0
- package/dist/config.js +131 -0
- package/dist/history.d.ts +34 -0
- package/dist/history.js +71 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +6 -0
- package/dist/logger.d.ts +29 -0
- package/dist/logger.js +68 -0
- package/dist/paths.d.ts +23 -0
- package/dist/paths.js +36 -0
- package/dist/providers.d.ts +16 -0
- package/dist/providers.js +59 -0
- package/dist/reasoning.d.ts +59 -0
- package/dist/reasoning.js +125 -0
- package/dist/tools/aws-cli.d.ts +48 -0
- package/dist/tools/aws-cli.js +279 -0
- package/dist/tools/bash.d.ts +47 -0
- package/dist/tools/bash.js +197 -0
- package/dist/tools/history.d.ts +18 -0
- package/dist/tools/history.js +27 -0
- package/dist/tools/index.d.ts +157 -0
- package/dist/tools/index.js +43 -0
- package/dist/tools/profiles.d.ts +7 -0
- package/dist/tools/profiles.js +37 -0
- package/dist/tools/prompt.d.ts +37 -0
- package/dist/tools/prompt.js +145 -0
- package/dist/usage.d.ts +39 -0
- package/dist/usage.js +28 -0
- package/package.json +73 -0
package/dist/agent.js
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
import { streamText, stepCountIs } from 'ai';
|
|
2
|
+
import { createModel } from './providers.js';
|
|
3
|
+
import { createTools } from './tools/index.js';
|
|
4
|
+
const SYSTEM_PROMPT = `You are aws-cli-agent (aca), an agentic assistant that translates natural-language requests into AWS CLI commands and executes them locally on the user's machine.
|
|
5
|
+
|
|
6
|
+
Capabilities (via tools):
|
|
7
|
+
- query_history: search local past commands to recover context (profiles, bucket/instance/cluster names).
|
|
8
|
+
- list_aws_profiles: enumerate ~/.aws profiles to map account names to profiles.
|
|
9
|
+
- execute_aws_command: run an AWS CLI call. Read-only calls (describe-/list-/get-/s3 ls) may auto-approve; mutating calls always prompt the user.
|
|
10
|
+
- prompt_user: ask the user ONE question (kind: text | choice | confirm | secret) to fill in missing information mid-reasoning.
|
|
11
|
+
- prompt_user_multi: ask several related questions in one round (e.g. "source profile + destination profile + region").
|
|
12
|
+
- execute_bash_script: run a bash script. Use for multi-account / loop / jq workflows.
|
|
13
|
+
|
|
14
|
+
CARDINAL RULE — DO NOT GUESS. If you don't know a value that's required for the user's task, ASK the user via prompt_user (or prompt_user_multi). This is non-negotiable. Concrete examples:
|
|
15
|
+
|
|
16
|
+
- The user said "list buckets" but didn't say which account, and history has no obvious match → call list_aws_profiles, then prompt_user with kind="choice" listing the profiles.
|
|
17
|
+
- A "describe-instances" call returned 3 instances with the requested tag → prompt_user with kind="choice" listing the 3 candidates. Do NOT pick one yourself.
|
|
18
|
+
- The user said "delete the old logs bucket" but several buckets contain "logs" → prompt_user with kind="choice" showing the matches.
|
|
19
|
+
- You're about to run a destructive command (delete-, terminate-, remove-, drop-, etc.) and have any doubt about the right target → prompt_user with kind="confirm" stating exactly what will be deleted.
|
|
20
|
+
- The user asked for an MFA-protected action and you need the code → prompt_user with kind="secret".
|
|
21
|
+
|
|
22
|
+
When you DON'T need to ask:
|
|
23
|
+
- The value is unambiguous in the user's request ("in account abc-xyz" → profile is abc-xyz).
|
|
24
|
+
- query_history returned a single clean match for the relevant token.
|
|
25
|
+
- The value is determinable by a read-only AWS CLI call (e.g. instance id by tag, when there's exactly one match).
|
|
26
|
+
|
|
27
|
+
Asking earns trust. Guessing wrong and acting on it is much worse than one extra question.
|
|
28
|
+
|
|
29
|
+
Operating rules:
|
|
30
|
+
1. ALWAYS start by calling query_history with the most informative tokens from the user request. Use the results to infer profile and common parameters.
|
|
31
|
+
2. If the user names an account that history did not resolve, call list_aws_profiles. If still ambiguous, prompt_user with kind="choice" listing the available profiles.
|
|
32
|
+
3. For multi-step requests (e.g. "ssm session to instance NAME in ACCOUNT"), first run a read-only describe/list call to resolve the resource (instance id, cluster endpoint, etc.). If exactly one match, proceed. If multiple, prompt_user with choices. If zero, prompt_user kind="text" asking for a more specific name (and offer to retry).
|
|
33
|
+
4. When you need multiple unrelated parameters up front (e.g. source profile, target profile, region), call prompt_user_multi once instead of three separate prompt_user calls. When the answer to A would determine what to ask for B, use separate prompt_user calls in sequence.
|
|
34
|
+
5. For tasks that span multiple AWS accounts or require composition (jq, loops), build a bash script with "set -euo pipefail" at the top and invoke execute_bash_script.
|
|
35
|
+
6. Default to the user's preferred output format. For listings the user will read directly (e.g. "list buckets", "list instances"), use the AWS CLI's default text/table output, NOT JSON. Only use "--output json" when you specifically need to parse fields for a subsequent step.
|
|
36
|
+
7. Region handling: if the user names a region in the request, pass it explicitly with --region. If they don't, omit --region entirely — the host CLI will inject the user's configured defaultRegion automatically when one is set. Never invent a region.
|
|
37
|
+
8. Interactive commands: some AWS CLI commands require a real terminal — SSM Session Manager shells (\`ssm start-session\`), port-forwarding sessions (the same command with --document-name AWS-StartPortForwardingSession*), ECS Exec (\`ecs execute-command\`), log tails with --follow. For these, set \`interactive: true\` on the execute_aws_command call. The host will connect the user's terminal directly to the command and you will receive no stdout — DO NOT try to summarize or describe the output afterwards, since you can't see it. Common patterns auto-detect, but setting the flag explicitly is safer.
|
|
38
|
+
9. The final action of a successful run MUST be either execute_aws_command (the user-requested action) or execute_bash_script. If the user cancels via prompt_user, stop gracefully and explain in one sentence.
|
|
39
|
+
10. NEVER include credentials, API keys, secrets, or session tokens in commands or scripts. AWS credentials come from the user's existing profile.
|
|
40
|
+
11. Keep your reasoning concise — one or two sentences per step. DO NOT summarize, restate, reformat, or describe the output of the AWS CLI. The CLI's stdout is shown to the user directly by the host program. Your only post-execution job is to stop. If anything went wrong, say so briefly; if it succeeded, you may stop without further commentary.`;
|
|
41
|
+
export async function runAgent(opts) {
|
|
42
|
+
const { input, config, logger, history, audit, reasoning, usage } = opts;
|
|
43
|
+
const executions = [];
|
|
44
|
+
const record = (entry) => {
|
|
45
|
+
executions.push(entry);
|
|
46
|
+
};
|
|
47
|
+
// Whether to enable prompt caching for this run. Anthropic and Bedrock
|
|
48
|
+
// support an explicit `cacheControl` / `cachePoint` marker on the system
|
|
49
|
+
// message — see `systemMessageProviderOptions` below. OpenAI auto-caches
|
|
50
|
+
// prompts over 1,024 tokens with no opt-in needed; Google Gemini's caching
|
|
51
|
+
// API is structurally different and not wired up. If caching=false in
|
|
52
|
+
// config, we don't send markers anywhere.
|
|
53
|
+
//
|
|
54
|
+
// Note: only the system message gets cached. Marking individual tool
|
|
55
|
+
// definitions does not work — the Bedrock provider drops tool-level
|
|
56
|
+
// providerOptions before serializing the request. See the comment in
|
|
57
|
+
// tools/index.ts. So the cached prefix is the system prompt only;
|
|
58
|
+
// the tools array is sent at full cost on every request.
|
|
59
|
+
const useCaching = config.caching && (config.provider === 'anthropic' || config.provider === 'bedrock');
|
|
60
|
+
const tools = createTools({ logger, config, history, audit, record });
|
|
61
|
+
const model = createModel(config);
|
|
62
|
+
logger.info(`Starting agent (provider=${config.provider}, model=${config.model})`);
|
|
63
|
+
logger.debug('User input', input);
|
|
64
|
+
reasoning.beginRun(input);
|
|
65
|
+
// Inline a small recent-history hint so the model has soft context even
|
|
66
|
+
// before it explicitly calls query_history. Statelessness on the server
|
|
67
|
+
// side is preserved: we send the full prompt each call.
|
|
68
|
+
const recent = history.recent(5);
|
|
69
|
+
const historyHint = recent.length
|
|
70
|
+
? '\n\nRecent past requests (most recent first):\n' +
|
|
71
|
+
recent
|
|
72
|
+
.map((e, i) => `${i + 1}. "${e.input}" -> profile=${e.profile ?? 'n/a'}` +
|
|
73
|
+
(Object.keys(e.resources).length ? ` resources=${JSON.stringify(e.resources)}` : ''))
|
|
74
|
+
.join('\n')
|
|
75
|
+
: '';
|
|
76
|
+
// The cached prefix is the SYSTEM PROMPT only — kept byte-stable across
|
|
77
|
+
// invocations. The per-invocation history hint goes in the user message
|
|
78
|
+
// where it can't invalidate the cache. Tool definitions are part of the
|
|
79
|
+
// request prefix the providers cache implicitly when the system message
|
|
80
|
+
// is marked, so we don't need a separate marker for those.
|
|
81
|
+
const systemMessageProviderOptions = useCaching
|
|
82
|
+
? {
|
|
83
|
+
anthropic: { cacheControl: { type: 'ephemeral' } },
|
|
84
|
+
bedrock: { cachePoint: { type: 'default' } },
|
|
85
|
+
}
|
|
86
|
+
: undefined;
|
|
87
|
+
// Build the user-side content. Prepend the history hint so it stays
|
|
88
|
+
// OUTSIDE the cached system message (it varies per invocation and would
|
|
89
|
+
// bust the cache if included in the system prompt).
|
|
90
|
+
const userContent = historyHint
|
|
91
|
+
? `${historyHint}\n\n---\n\nUser request: ${input}`
|
|
92
|
+
: input;
|
|
93
|
+
// Closure variables shared between the streamText callback and the
|
|
94
|
+
// for-await loop below. Hoisted above streamText so the callback can read
|
|
95
|
+
// them. start-step sets toolCallStepNumber to the current step number so
|
|
96
|
+
// onToolCallStart knows which step to label the tool-call line with.
|
|
97
|
+
let stepCounter = 0;
|
|
98
|
+
let toolCallStepNumber = 0;
|
|
99
|
+
let currentReasoning = '';
|
|
100
|
+
let currentToolCalls = [];
|
|
101
|
+
let reasoningEchoed = false;
|
|
102
|
+
const result = streamText({
|
|
103
|
+
model,
|
|
104
|
+
messages: [
|
|
105
|
+
{
|
|
106
|
+
role: 'system',
|
|
107
|
+
content: SYSTEM_PROMPT,
|
|
108
|
+
providerOptions: systemMessageProviderOptions,
|
|
109
|
+
},
|
|
110
|
+
{ role: 'user', content: userContent },
|
|
111
|
+
],
|
|
112
|
+
// The SDK warns when role:'system' messages appear in the messages array
|
|
113
|
+
// because that field is a potential prompt-injection vector for callers
|
|
114
|
+
// who template the system message from user input. In our case the
|
|
115
|
+
// system message is a hardcoded string literal (SYSTEM_PROMPT) and we
|
|
116
|
+
// need it in the messages array — not the top-level `system:` param —
|
|
117
|
+
// so we can attach providerOptions for prompt caching. Setting this
|
|
118
|
+
// flag is the SDK's documented way of saying "I'm aware, my system
|
|
119
|
+
// message is trusted."
|
|
120
|
+
allowSystemInMessages: true,
|
|
121
|
+
tools,
|
|
122
|
+
// AI SDK v5+ replaced the `maxSteps: number` setting with `stopWhen`,
|
|
123
|
+
// which accepts one or more stop conditions. stepCountIs(n) is the
|
|
124
|
+
// straight equivalent.
|
|
125
|
+
stopWhen: stepCountIs(config.maxSteps),
|
|
126
|
+
// Print the tool-call line synchronously before execute() runs. We use
|
|
127
|
+
// this callback rather than the `tool-call` event in fullStream because
|
|
128
|
+
// the SDK launches execute() as a concurrent task — by the time our
|
|
129
|
+
// for-await loop sees `tool-call` in the stream, execute may already
|
|
130
|
+
// be running (or done). This callback fires inline, immediately before
|
|
131
|
+
// execute(), guaranteeing the tool-call line appears above any
|
|
132
|
+
// approval prompt the tool's execute() shows.
|
|
133
|
+
experimental_onToolCallStart: (event) => {
|
|
134
|
+
const input = 'input' in event.toolCall ? event.toolCall.input : undefined;
|
|
135
|
+
reasoning.echoToolCall(toolCallStepNumber, event.toolCall.toolName, input);
|
|
136
|
+
currentToolCalls.push({ toolName: event.toolCall.toolName, args: input });
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
// Drive the agent by consuming the full stream. The reasoning text
|
|
140
|
+
// streams as text-delta events; we accumulate it and echo on text-end
|
|
141
|
+
// so the user sees it BEFORE the tool-call line (which prints from the
|
|
142
|
+
// onToolCallStart callback above, synchronously before execute()).
|
|
143
|
+
//
|
|
144
|
+
// Two execution sites collaborate to print one step:
|
|
145
|
+
// 1. text-end (here) → reasoning text line
|
|
146
|
+
// 2. onToolCallStart (callback above) → tool: line, then execute()
|
|
147
|
+
for await (const part of result.fullStream) {
|
|
148
|
+
switch (part.type) {
|
|
149
|
+
case 'start-step': {
|
|
150
|
+
stepCounter += 1;
|
|
151
|
+
toolCallStepNumber = stepCounter; // visible to onToolCallStart
|
|
152
|
+
currentReasoning = '';
|
|
153
|
+
currentToolCalls = [];
|
|
154
|
+
reasoningEchoed = false;
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
case 'text-delta': {
|
|
158
|
+
currentReasoning += part.text;
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
case 'text-end': {
|
|
162
|
+
if (!reasoningEchoed) {
|
|
163
|
+
reasoning.echoReasoning(stepCounter, currentReasoning);
|
|
164
|
+
reasoningEchoed = true;
|
|
165
|
+
}
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
case 'tool-call': {
|
|
169
|
+
// Backup echo path: if text-end didn't fire (provider variant or
|
|
170
|
+
// text-less step), echo whatever reasoning we have when we see
|
|
171
|
+
// tool-call. The tool-call LINE itself is NOT printed here — it's
|
|
172
|
+
// printed by experimental_onToolCallStart, which fires
|
|
173
|
+
// synchronously before execute() and guarantees ordering above
|
|
174
|
+
// any approval prompt.
|
|
175
|
+
if (!reasoningEchoed) {
|
|
176
|
+
reasoning.echoReasoning(stepCounter, currentReasoning);
|
|
177
|
+
reasoningEchoed = true;
|
|
178
|
+
}
|
|
179
|
+
break;
|
|
180
|
+
}
|
|
181
|
+
case 'finish-step': {
|
|
182
|
+
reasoning.logStepToFile({
|
|
183
|
+
step: stepCounter,
|
|
184
|
+
reasoning: currentReasoning,
|
|
185
|
+
toolCalls: currentToolCalls,
|
|
186
|
+
finishReason: part.finishReason,
|
|
187
|
+
});
|
|
188
|
+
logger.debug(`Step ${stepCounter} finished (finishReason=${part.finishReason})`);
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
// Other event types (reasoning-delta for thinking-models,
|
|
192
|
+
// tool-input-delta, source, file, raw, etc.) are ignored —
|
|
193
|
+
// fullStream is forward-compatible.
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// Wait for all the post-stream promises to resolve. They're already
|
|
197
|
+
// ready by the time fullStream finishes (the stream completion is the
|
|
198
|
+
// signal), so these awaits are effectively synchronous.
|
|
199
|
+
const finalText = await result.text;
|
|
200
|
+
const finalSteps = await result.steps;
|
|
201
|
+
const totalUsage = await result.totalUsage;
|
|
202
|
+
logger.info(`Agent finished after ${finalSteps.length} step(s)`);
|
|
203
|
+
logger.debug('Final text', finalText);
|
|
204
|
+
// Token usage for this invocation.
|
|
205
|
+
//
|
|
206
|
+
// In AI SDK v5/v6, `result.usage` is only the LAST step's tokens — confusingly
|
|
207
|
+
// named. `result.totalUsage` is the sum across all steps. We want totalUsage.
|
|
208
|
+
//
|
|
209
|
+
// Cache hit/miss counts live in `totalUsage.inputTokenDetails`. The SDK
|
|
210
|
+
// normalizes these across providers — no need to dig into provider-specific
|
|
211
|
+
// metadata. The previous code path that read providerMetadata.{anthropic,
|
|
212
|
+
// bedrock}.* was looking in the wrong place; cache counts in providerMetadata
|
|
213
|
+
// are raw, per-provider, and located differently per provider (Bedrock nests
|
|
214
|
+
// them under `usage`, Anthropic doesn't). inputTokenDetails is the
|
|
215
|
+
// recommended cross-provider surface.
|
|
216
|
+
//
|
|
217
|
+
// We still dump per-step providerMetadata at trace level for debugging —
|
|
218
|
+
// useful when caching numbers look wrong and you want to see exactly what
|
|
219
|
+
// the provider returned.
|
|
220
|
+
for (const step of finalSteps) {
|
|
221
|
+
const pm = step.providerMetadata;
|
|
222
|
+
if (pm)
|
|
223
|
+
logger.trace(`step ${step.stepNumber} providerMetadata`, pm);
|
|
224
|
+
}
|
|
225
|
+
const td = totalUsage?.inputTokenDetails;
|
|
226
|
+
const cacheReadTokens = toNumber(td?.cacheReadTokens);
|
|
227
|
+
const cacheWriteTokens = toNumber(td?.cacheWriteTokens);
|
|
228
|
+
usage.log({
|
|
229
|
+
input,
|
|
230
|
+
provider: config.provider,
|
|
231
|
+
model: config.model,
|
|
232
|
+
steps: finalSteps.length,
|
|
233
|
+
promptTokens: totalUsage?.inputTokens ?? 0,
|
|
234
|
+
completionTokens: totalUsage?.outputTokens ?? 0,
|
|
235
|
+
totalTokens: totalUsage?.totalTokens ?? 0,
|
|
236
|
+
cacheReadTokens,
|
|
237
|
+
cacheWriteTokens,
|
|
238
|
+
});
|
|
239
|
+
logger.debug('Usage', { ...totalUsage, cacheReadTokens, cacheWriteTokens });
|
|
240
|
+
// Determine what to show the user as the final output. Rule: the LAST
|
|
241
|
+
// execution wins, regardless of success — and if it failed or was
|
|
242
|
+
// declined, no stdout is printed at all (we only have intermediate
|
|
243
|
+
// scaffolding output left, which the user didn't ask for).
|
|
244
|
+
//
|
|
245
|
+
// Previously we used `find((e) => e.ok)`, which selected the most recent
|
|
246
|
+
// *successful* call. That was wrong when the final intended action was
|
|
247
|
+
// declined or failed: the heuristic fell back to an earlier discovery
|
|
248
|
+
// call (describe-instances, list-buckets, etc.) and printed its JSON as
|
|
249
|
+
// if it were the user's answer — confusing because it wasn't.
|
|
250
|
+
//
|
|
251
|
+
// For an empty run (no executions, e.g. the agent just talked) we have
|
|
252
|
+
// nothing to print and `finalOutput` stays null.
|
|
253
|
+
const last = executions.length > 0 ? executions[executions.length - 1] : null;
|
|
254
|
+
const lastProfile = [...executions].reverse().find((e) => e.profile)?.profile ?? null;
|
|
255
|
+
const finalOutput = last?.ok ? last.stdout : null;
|
|
256
|
+
const finalError = last && !last.ok ? last.stderr : null;
|
|
257
|
+
const ranCommand = last?.ok === true;
|
|
258
|
+
const entry = {
|
|
259
|
+
timestamp: new Date().toISOString(),
|
|
260
|
+
input,
|
|
261
|
+
commands: executions.map((e) => e.cmd),
|
|
262
|
+
profile: lastProfile,
|
|
263
|
+
resources: {},
|
|
264
|
+
success: ranCommand,
|
|
265
|
+
};
|
|
266
|
+
history.append(entry);
|
|
267
|
+
// "Executed" = the subprocess actually ran. Declines/cancellations use
|
|
268
|
+
// exitCode -1 by convention (no process was ever spawned); successes use
|
|
269
|
+
// 0, real failures use a non-zero exit. We count anything that has a real
|
|
270
|
+
// exit code (≥ 0), so the user-facing footer reflects reality.
|
|
271
|
+
const executedCommandCount = executions.filter((e) => e.exitCode >= 0).length;
|
|
272
|
+
return {
|
|
273
|
+
text: finalText,
|
|
274
|
+
steps: finalSteps.length,
|
|
275
|
+
commands: executions.map((e) => e.cmd),
|
|
276
|
+
executedCommandCount,
|
|
277
|
+
profile: lastProfile,
|
|
278
|
+
finalOutput,
|
|
279
|
+
finalError,
|
|
280
|
+
ranCommand,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Coerce an unknown metadata value to a non-negative integer. Providers
|
|
285
|
+
* sometimes return null/undefined when no cache event occurred; the Bedrock
|
|
286
|
+
* provider in particular returns NaN for missing fields. All those should
|
|
287
|
+
* funnel to 0 in the usage log.
|
|
288
|
+
*/
|
|
289
|
+
function toNumber(v) {
|
|
290
|
+
if (typeof v !== 'number' || !Number.isFinite(v) || v < 0)
|
|
291
|
+
return 0;
|
|
292
|
+
return v;
|
|
293
|
+
}
|
package/dist/audit.d.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audit log: append-only JSONL of every command and script the agent ran on
|
|
3
|
+
* the user's behalf. The audit log is intentionally exhaustive — it captures
|
|
4
|
+
* the verbatim stdout/stderr so that, after the fact, you can reconstruct
|
|
5
|
+
* exactly what the agent did and what AWS returned. For bash scripts the full
|
|
6
|
+
* script source is included.
|
|
7
|
+
*
|
|
8
|
+
* Disable via `audit.enabled = false` in config; the writer becomes a no-op.
|
|
9
|
+
*/
|
|
10
|
+
export type AuditCommandEntry = {
|
|
11
|
+
timestamp: string;
|
|
12
|
+
type: 'aws_command';
|
|
13
|
+
cmd: string;
|
|
14
|
+
profile: string | null;
|
|
15
|
+
exitCode: number;
|
|
16
|
+
ok: boolean;
|
|
17
|
+
stdout: string;
|
|
18
|
+
stderr: string;
|
|
19
|
+
};
|
|
20
|
+
export type AuditScriptEntry = {
|
|
21
|
+
timestamp: string;
|
|
22
|
+
type: 'bash_script';
|
|
23
|
+
cmd: string;
|
|
24
|
+
profile: string | null;
|
|
25
|
+
exitCode: number;
|
|
26
|
+
ok: boolean;
|
|
27
|
+
stdout: string;
|
|
28
|
+
stderr: string;
|
|
29
|
+
script: string;
|
|
30
|
+
};
|
|
31
|
+
export type AuditEntry = AuditCommandEntry | AuditScriptEntry;
|
|
32
|
+
export declare class AuditLogger {
|
|
33
|
+
private readonly stream;
|
|
34
|
+
constructor(enabled: boolean);
|
|
35
|
+
logCommand(entry: Omit<AuditCommandEntry, 'timestamp' | 'type'>): void;
|
|
36
|
+
logScript(entry: Omit<AuditScriptEntry, 'timestamp' | 'type'>): void;
|
|
37
|
+
private write;
|
|
38
|
+
close(): void;
|
|
39
|
+
}
|
package/dist/audit.js
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { FILES, PATHS } from './paths.js';
|
|
3
|
+
export class AuditLogger {
|
|
4
|
+
stream;
|
|
5
|
+
constructor(enabled) {
|
|
6
|
+
if (!enabled) {
|
|
7
|
+
this.stream = null;
|
|
8
|
+
return;
|
|
9
|
+
}
|
|
10
|
+
fs.mkdirSync(PATHS.state, { recursive: true });
|
|
11
|
+
this.stream = fs.createWriteStream(FILES.audit, { flags: 'a' });
|
|
12
|
+
}
|
|
13
|
+
logCommand(entry) {
|
|
14
|
+
this.write({ timestamp: new Date().toISOString(), type: 'aws_command', ...entry });
|
|
15
|
+
}
|
|
16
|
+
logScript(entry) {
|
|
17
|
+
this.write({ timestamp: new Date().toISOString(), type: 'bash_script', ...entry });
|
|
18
|
+
}
|
|
19
|
+
write(entry) {
|
|
20
|
+
if (!this.stream)
|
|
21
|
+
return;
|
|
22
|
+
try {
|
|
23
|
+
this.stream.write(JSON.stringify(entry) + '\n');
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
// Auditing must never crash the agent. Failures here are silent by
|
|
27
|
+
// design; the operational logger will still surface execution errors.
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
close() {
|
|
31
|
+
this.stream?.end();
|
|
32
|
+
}
|
|
33
|
+
}
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function main(argv: string[]): Promise<void>;
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import chalk from 'chalk';
|
|
3
|
+
import { loadConfig, writeDefaultConfig } from './config.js';
|
|
4
|
+
import { Logger } from './logger.js';
|
|
5
|
+
import { AuditLogger } from './audit.js';
|
|
6
|
+
import { ReasoningLogger } from './reasoning.js';
|
|
7
|
+
import { UsageLogger } from './usage.js';
|
|
8
|
+
import { History } from './history.js';
|
|
9
|
+
import { runAgent } from './agent.js';
|
|
10
|
+
import { FILES, PATHS, DEFAULT_SCRIPT_FOLDER } from './paths.js';
|
|
11
|
+
const VERSION = '0.4.0';
|
|
12
|
+
/**
|
|
13
|
+
* Apply CLI flags on top of the loaded config. Flags only override; they
|
|
14
|
+
* never widen or compose with each other implicitly.
|
|
15
|
+
*/
|
|
16
|
+
function applyCliOverrides(cfg, opts) {
|
|
17
|
+
let next = cfg;
|
|
18
|
+
if (opts.verbose) {
|
|
19
|
+
next = { ...next, verbose: true };
|
|
20
|
+
}
|
|
21
|
+
if (opts.logLevel) {
|
|
22
|
+
next = { ...next, logging: { ...next.logging, level: opts.logLevel } };
|
|
23
|
+
}
|
|
24
|
+
if (opts.autoApprove) {
|
|
25
|
+
next = { ...next, autoApprove: { readOnly: true, all: true } };
|
|
26
|
+
}
|
|
27
|
+
if (opts.region) {
|
|
28
|
+
next = { ...next, defaultRegion: opts.region };
|
|
29
|
+
}
|
|
30
|
+
if (opts.interactive) {
|
|
31
|
+
next = { ...next, forceInteractive: true };
|
|
32
|
+
}
|
|
33
|
+
return next;
|
|
34
|
+
}
|
|
35
|
+
export async function main(argv) {
|
|
36
|
+
const program = new Command();
|
|
37
|
+
program
|
|
38
|
+
.name('aca')
|
|
39
|
+
.description('aws-cli-agent (aca): agentic AI assistant that turns natural language into AWS CLI commands.')
|
|
40
|
+
.version(VERSION)
|
|
41
|
+
.option('-v, --verbose', 'echo agent reasoning to the console as it runs')
|
|
42
|
+
.option('--log-level <level>', 'override logging.level for this run: silent | error | warn | info | debug | trace')
|
|
43
|
+
.option('--auto-approve', 'auto-approve all commands and scripts for this run (use with care)')
|
|
44
|
+
.option('--profile <name>', 'hint the agent to use this AWS profile')
|
|
45
|
+
.option('--region <name>', 'override defaultRegion for this run (only applies when the agent did not pick a region itself)')
|
|
46
|
+
.option('-i, --interactive', 'force AWS CLI commands to inherit your terminal (for shells, port-forwards, log tails). ' +
|
|
47
|
+
'Common patterns auto-detect; this is the manual override.');
|
|
48
|
+
program
|
|
49
|
+
.command('config')
|
|
50
|
+
.description('Print the config file path; create defaults if missing.')
|
|
51
|
+
.action(() => {
|
|
52
|
+
const p = writeDefaultConfig();
|
|
53
|
+
process.stdout.write(p + '\n');
|
|
54
|
+
});
|
|
55
|
+
program
|
|
56
|
+
.command('paths')
|
|
57
|
+
.description('Print paths used by aws-cli-agent.')
|
|
58
|
+
.action(() => {
|
|
59
|
+
const cfg = loadConfig();
|
|
60
|
+
const scriptFolder = cfg.scriptFolder ?? DEFAULT_SCRIPT_FOLDER;
|
|
61
|
+
const out = [
|
|
62
|
+
`config dir : ${PATHS.config}`,
|
|
63
|
+
`state dir : ${PATHS.state}`,
|
|
64
|
+
'',
|
|
65
|
+
`config file : ${FILES.config}`,
|
|
66
|
+
`history : ${FILES.history}`,
|
|
67
|
+
`general log : ${FILES.log}`,
|
|
68
|
+
`audit log : ${FILES.audit}`,
|
|
69
|
+
`reasoning log: ${FILES.reasoning}`,
|
|
70
|
+
`usage log : ${FILES.usage}`,
|
|
71
|
+
`script folder: ${scriptFolder}`,
|
|
72
|
+
].join('\n');
|
|
73
|
+
process.stdout.write(out + '\n');
|
|
74
|
+
});
|
|
75
|
+
program
|
|
76
|
+
.command('history')
|
|
77
|
+
.description('Print recent history entries.')
|
|
78
|
+
.option('-n, --count <number>', 'how many entries', '10')
|
|
79
|
+
.action(async (cmdOpts) => {
|
|
80
|
+
const cfg = loadConfig();
|
|
81
|
+
const h = new History(cfg.historyLimit);
|
|
82
|
+
await h.load();
|
|
83
|
+
const n = Number.parseInt(cmdOpts.count, 10);
|
|
84
|
+
for (const e of h.recent(Number.isFinite(n) ? n : 10)) {
|
|
85
|
+
process.stdout.write(`${chalk.dim(e.timestamp)} ${chalk.bold(e.input)}\n`);
|
|
86
|
+
if (e.profile)
|
|
87
|
+
process.stdout.write(` profile: ${e.profile}\n`);
|
|
88
|
+
for (const c of e.commands) {
|
|
89
|
+
process.stdout.write(` ${chalk.green(c)}\n`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
program
|
|
94
|
+
.command('run', { isDefault: true })
|
|
95
|
+
.description('Run a natural-language request (default).')
|
|
96
|
+
.argument('<request...>', 'natural-language request')
|
|
97
|
+
.action(async (requestArgs) => {
|
|
98
|
+
const globalOpts = program.opts();
|
|
99
|
+
const request = requestArgs.join(' ').trim();
|
|
100
|
+
if (!request) {
|
|
101
|
+
program.help();
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
const cfg = applyCliOverrides(loadConfig(), globalOpts);
|
|
105
|
+
const logger = new Logger(cfg.logging.level);
|
|
106
|
+
const audit = new AuditLogger(cfg.logging.auditLog);
|
|
107
|
+
const reasoning = new ReasoningLogger({
|
|
108
|
+
enabled: cfg.logging.reasoningLog,
|
|
109
|
+
consoleEcho: cfg.verbose,
|
|
110
|
+
});
|
|
111
|
+
const usage = new UsageLogger(cfg.logging.usageLog);
|
|
112
|
+
const history = new History(cfg.historyLimit);
|
|
113
|
+
await history.load();
|
|
114
|
+
const finalRequest = globalOpts.profile
|
|
115
|
+
? `${request}\n(Use AWS profile: ${globalOpts.profile})`
|
|
116
|
+
: request;
|
|
117
|
+
try {
|
|
118
|
+
const result = await runAgent({
|
|
119
|
+
input: finalRequest,
|
|
120
|
+
config: cfg,
|
|
121
|
+
logger,
|
|
122
|
+
history,
|
|
123
|
+
audit,
|
|
124
|
+
reasoning,
|
|
125
|
+
usage,
|
|
126
|
+
});
|
|
127
|
+
// Output policy: stdout is reserved for the AWS CLI's verbatim output.
|
|
128
|
+
// Everything else (reasoning, prompts, status, commands executed) goes
|
|
129
|
+
// to stderr via the logger. This keeps `aca ... | jq` and similar
|
|
130
|
+
// pipelines working as if the user ran aws directly.
|
|
131
|
+
// Decide what reaches the user's terminal:
|
|
132
|
+
// - Successful final command → its stdout goes to stdout (pipeable).
|
|
133
|
+
// - Genuine failure (non-zero exit, spawn error, etc.) → its stderr
|
|
134
|
+
// goes to stderr in red, and the process exits 1.
|
|
135
|
+
// - User declined/cancelled → quiet exit. The agent's text response
|
|
136
|
+
// (if any) tells the user the action was cancelled; no red noise.
|
|
137
|
+
// - Nothing useful → fall back to the agent's final text, if any.
|
|
138
|
+
const wasDeclined = result.finalError === '[declined by user]' ||
|
|
139
|
+
result.finalError === '[cancelled by user]';
|
|
140
|
+
if (result.ranCommand && result.finalOutput !== null) {
|
|
141
|
+
process.stdout.write(result.finalOutput);
|
|
142
|
+
if (!result.finalOutput.endsWith('\n'))
|
|
143
|
+
process.stdout.write('\n');
|
|
144
|
+
}
|
|
145
|
+
else if (result.finalError && !wasDeclined) {
|
|
146
|
+
process.stderr.write(chalk.red(result.finalError));
|
|
147
|
+
if (!result.finalError.endsWith('\n'))
|
|
148
|
+
process.stderr.write('\n');
|
|
149
|
+
process.exitCode = 1;
|
|
150
|
+
}
|
|
151
|
+
else if (result.text.trim().length > 0) {
|
|
152
|
+
process.stderr.write(result.text.trim() + '\n');
|
|
153
|
+
}
|
|
154
|
+
// Footer counts only commands that actually executed. Declined or
|
|
155
|
+
// cancelled commands appear in `result.commands` for the history
|
|
156
|
+
// log but don't count as "ran" since no subprocess was started.
|
|
157
|
+
if (result.executedCommandCount > 0) {
|
|
158
|
+
const tag = result.profile ? `[${result.profile}]` : '';
|
|
159
|
+
const cmds = result.executedCommandCount === 1
|
|
160
|
+
? '1 command'
|
|
161
|
+
: `${result.executedCommandCount} commands`;
|
|
162
|
+
process.stderr.write(chalk.dim(`\nran ${cmds} ${tag}\n`));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
catch (err) {
|
|
166
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
167
|
+
logger.error('Agent failed', msg);
|
|
168
|
+
process.stderr.write(chalk.red('Error: ') + msg + '\n');
|
|
169
|
+
process.exitCode = 1;
|
|
170
|
+
}
|
|
171
|
+
finally {
|
|
172
|
+
logger.close();
|
|
173
|
+
audit.close();
|
|
174
|
+
reasoning.close();
|
|
175
|
+
usage.close();
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
await program.parseAsync(argv);
|
|
179
|
+
}
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export declare const ConfigSchema: z.ZodObject<{
|
|
3
|
+
provider: z.ZodDefault<z.ZodEnum<{
|
|
4
|
+
bedrock: "bedrock";
|
|
5
|
+
anthropic: "anthropic";
|
|
6
|
+
openai: "openai";
|
|
7
|
+
google: "google";
|
|
8
|
+
}>>;
|
|
9
|
+
model: z.ZodDefault<z.ZodString>;
|
|
10
|
+
apiKeyEnv: z.ZodOptional<z.ZodString>;
|
|
11
|
+
bedrock: z.ZodOptional<z.ZodObject<{
|
|
12
|
+
region: z.ZodOptional<z.ZodString>;
|
|
13
|
+
profile: z.ZodOptional<z.ZodString>;
|
|
14
|
+
}, z.core.$strip>>;
|
|
15
|
+
defaultRegion: z.ZodOptional<z.ZodString>;
|
|
16
|
+
maxSteps: z.ZodDefault<z.ZodNumber>;
|
|
17
|
+
logging: z.ZodDefault<z.ZodObject<{
|
|
18
|
+
level: z.ZodDefault<z.ZodEnum<{
|
|
19
|
+
silent: "silent";
|
|
20
|
+
error: "error";
|
|
21
|
+
warn: "warn";
|
|
22
|
+
info: "info";
|
|
23
|
+
debug: "debug";
|
|
24
|
+
trace: "trace";
|
|
25
|
+
}>>;
|
|
26
|
+
auditLog: z.ZodDefault<z.ZodBoolean>;
|
|
27
|
+
reasoningLog: z.ZodDefault<z.ZodBoolean>;
|
|
28
|
+
usageLog: z.ZodDefault<z.ZodBoolean>;
|
|
29
|
+
}, z.core.$strip>>;
|
|
30
|
+
caching: z.ZodDefault<z.ZodBoolean>;
|
|
31
|
+
verbose: z.ZodDefault<z.ZodBoolean>;
|
|
32
|
+
autoApprove: z.ZodDefault<z.ZodObject<{
|
|
33
|
+
readOnly: z.ZodDefault<z.ZodBoolean>;
|
|
34
|
+
all: z.ZodDefault<z.ZodBoolean>;
|
|
35
|
+
}, z.core.$strip>>;
|
|
36
|
+
forceInteractive: z.ZodDefault<z.ZodBoolean>;
|
|
37
|
+
historyLimit: z.ZodDefault<z.ZodNumber>;
|
|
38
|
+
scriptFolder: z.ZodOptional<z.ZodString>;
|
|
39
|
+
}, z.core.$strip>;
|
|
40
|
+
export type Config = z.infer<typeof ConfigSchema>;
|
|
41
|
+
export declare function loadConfig(): Config;
|
|
42
|
+
/** Write a default config file if none exists. Returns the path either way. */
|
|
43
|
+
export declare function writeDefaultConfig(): string;
|